In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup as soup
import time
import os
import csv

### Set up Chrome Options to bypass Two-Factor Authentication. Use Chrome Webdriver to Log In. Get the URLS and show more transactions. Pull all the transactions as a soup using BeatifulSoup

In [2]:
# Chrome options to set up a user profile so once you log into Clarity Money once and enter the access code sent to the email. It will remember the user profile so it wont ask for the access code again.
# Need to create a chrome user profile under google chrome settings
chrome_options = Options()
chrome_options.add_argument("--user-data-dir=C:\\Users\\Charles\\AppData\\Local\\Google\\Chrome\\User Data\\Profile 1")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--profile-directory=Profile 1")

# Need to download chrome webdriver, check version of your google chrome and download the corresponding version of webdriver
driver = webdriver.Chrome(executable_path="chromedriver.exe", options = chrome_options)

# Gets email and password from local machine
email=os.getenv("CLARITY_MONEY_USER")
password=os.getenv("CLARITY_MONEY_PASS")

# URLs used for Clarity Money
url = 'https://app.claritymoney.com'
# Dashboard URL used for waitng for the page to load
dash_url= 'https://app.claritymoney.com/dashboard'

# Use Driver to access url and type in the email and passwords
driver.get(url)
driver.find_element_by_name('email').send_keys(email)
driver.find_element_by_name('password').send_keys(password)

# Clicks the log in button
driver.find_element_by_class_name('btn-deepblue.mb-1.btn.btn-full').click()

# Waits for dashboard page to load before anything else can be clicked.
WebDriverWait(driver,200).until(EC.url_to_be(dash_url))

#Clicks for full transactions on dashboard page
driver.find_element_by_class_name('transactions-header-title').click()
time.sleep(1)
#Clicks the show more transactions
driver.find_element_by_css_selector('#modal-transactions > div.ta-c.p-1 > div').click()
time.sleep(1)
driver.find_element_by_css_selector('#modal-transactions > div.ta-c.p-1 > div').click()
time.sleep(1)
driver.find_element_by_css_selector('#modal-transactions > div.ta-c.p-1 > div').click()
time.sleep(1)

# Get all transactions as html source
html_source = driver.page_source
# modal-undefined > div.ta-c.p-1 > div
# document.querySelector("#modal-undefined > div.ta-c.p-1 > div")

# Turn HTML into soup
page_soup = soup(html_source, "html.parser")

### Checks to see if the soup has correct information by checking transaction

In [3]:
# Gets the transactions by CSS Selector
transactions_soup = page_soup.select("#modal-transactions>div")

In [4]:
# Finds all the transactions groups in Clarity Money
transaction_container = page_soup.findAll("div", {"class":"transactions"})

In [5]:
# Prints the first group of transactions
transactions_soup[0]

<div><div class="transaction-bar"><span class="date">May 19, 2020</span></div><button class="border-b transaction-item --clickable" data-id="6525945063"><!-- react-text: 2680 --> <!-- /react-text --><div class="f-aligncenter mw-100"><div class="transaction-img f-cvja"><span class="category-icon ff-taxonomy-icons transaction-icon" style="background-image: -webkit-linear-gradient(top, rgb(17, 166, 102), rgb(26, 163, 115)); color: rgb(17, 166, 102); background-clip: text; -webkit-text-fill-color: transparent;"><!-- react-text: 2684 --> <!-- /react-text --><!-- react-text: 2685 --><!-- /react-text --><!-- react-text: 2686 --> <!-- /react-text --></span></div><div class="transaction-name">ORIG CO NAME:CARDMEMBER SERV CO ENTRY DESCR:WEB PYMT SEC:WEB IND ID:***********7440 ORIG ID:5911111111 (pending)</div><div class="transaction-info f-jcv"><span><span class=""><span aria-hidden="false" aria-label="$2732.59" class="currency"><span aria-hidden="true"><!-- react-text: 2693 -->$<!-- /react-tex

In [6]:
# Filters out the HTML for the date of the transaction
transactions_soup[0].find("span",{"class":"date"}).text

'May 19, 2020'

In [7]:
# Filters out the transactions associated with the date
transactions_soup[0].find_all("div",{"class":"transaction-name"})

[<div class="transaction-name">ORIG CO NAME:CARDMEMBER SERV CO ENTRY DESCR:WEB PYMT SEC:WEB IND ID:***********7440 ORIG ID:5911111111 (pending)</div>,
 <div class="transaction-name">Amazon (pending)</div>,
 <div class="transaction-name">Popeyes</div>,
 <div class="transaction-name">DOORDASH*PETERS HOT BA (pending)</div>,
 <div class="transaction-name">ORIG CO NAME:NYS DOL UI DD CO ENTRY DESCR:UI DD SEC:PPD ORIG ID:146013200U (pending)</div>,
 <div class="transaction-name">ORIG CO NAME:NYS DOL UI DD CO ENTRY DESCR:UI DD SEC:PPD ORIG ID:146013200U (pending)</div>,
 <div class="transaction-name">Amazon</div>,
 <div class="transaction-name">Qdoba</div>,
 <div class="transaction-name">Amazon.com (pending)</div>,
 <div class="transaction-name">Amazon</div>,
 <div class="transaction-name">Amazon (pending)</div>,
 <div class="transaction-name">Amazon</div>,
 <div class="transaction-name">Amazon (pending)</div>]

In [8]:
# Finds just the one transaction and filters out the HTML tags
transactions_soup[0].find("div",{"class":"transaction-name"}).find_all(text=True)

['ORIG CO NAME:CARDMEMBER SERV CO ENTRY DESCR:WEB PYMT SEC:WEB IND ID:***********7440 ORIG ID:5911111111 (pending)']

In [9]:
# Finds all the transaction prices
html_transaction_info = page_soup.findAll("div", {"class":"transaction-info"})

# Prints the first transactions and filters out the HTML tags
html_transaction_info[0].text

'$2,732.59'

In [25]:
# Finding the date for a corresponding transaction
transactions_soup[0].find("div",{"class":"transaction-name"}).find_previous("span",{"class":"date"}).text

'May 13, 2020'

### Combine all the collected transactions into 1 list using a For Loop

In [58]:
# Create an empty list to store data values
transactions_list=[]

In [59]:
# Loops through  transactions to find the transaction name, date, and information.
for transactions in transactions_soup[:-1]:
    current_date = transactions.find("div",{"class":"transaction-name"}).find_previous("span",{"class":"date"}).text
    transaction_date = transactions.find("span",{"class":"date"}).text
    transaction_name = transactions.find_all("div",{"class":"transaction-name"})
    transaction_info = transactions.find_all("div",{"class":"transaction-info f-jcv"})
    
    # Loop combines name, date, information into 1 list.
    for i in range(len(transaction_name)):
        transactions_list.append(current_date.replace(',',''))
        transactions_list.append(transaction_name[i].text.replace(',',""))
        transactions_list.append(transaction_info[i].text.replace(',',""))

In [60]:
# Check list
print(transactions_list[0:50])

['May 19 2020', 'ORIG CO NAME:CARDMEMBER SERV CO ENTRY DESCR:WEB PYMT SEC:WEB IND ID:***********7440 ORIG ID:5911111111 (pending)', '$2732.59', 'May 19 2020', 'Amazon (pending)', '$107.79', 'May 19 2020', 'Popeyes', '$19.58', 'May 19 2020', 'DOORDASH*PETERS HOT BA (pending)', '$20.78', 'May 19 2020', 'ORIG CO NAME:NYS DOL UI DD CO ENTRY DESCR:UI DD SEC:PPD ORIG ID:146013200U (pending)', '$525.00', 'May 19 2020', 'ORIG CO NAME:NYS DOL UI DD CO ENTRY DESCR:UI DD SEC:PPD ORIG ID:146013200U (pending)', '$441.00', 'May 19 2020', 'Amazon', '$43.52', 'May 19 2020', 'Qdoba', '$22.09', 'May 19 2020', 'Amazon.com (pending)', '$57.69', 'May 19 2020', 'Amazon', '$61.51', 'May 19 2020', 'Amazon (pending)', '$76.75', 'May 19 2020', 'Amazon', '$13.01', 'May 19 2020', 'Amazon (pending)', '$57.55', 'May 18 2020', 'DOORDASH*LONG ISLAND B', '$14.44', 'May 18 2020', 'Internet Payment Thank You', '$2732.59', 'May 18 2020', 'DOORDASH', '$2.00', 'May 18 2020', 'MACYS .COM']


In [61]:
# Sample to convert the string into a float
html_transaction_info = page_soup.findAll("div", {"class":"transaction-info"})
price = html_transaction_info[0].text
price.replace('$',"")

'2,732.59'

In [62]:
# test_string = transactions_list[0:7]
with open('transactions_may142020.csv', 'w') as csvfile:
    count = 0
    for i in transactions_list:
        if count < 3:
            csvfile.write(i)
            csvfile.write(',')
            count+=1
        else:
            csvfile.write('\n')
            csvfile.write(i)
            csvfile.write(',')
            count = 1
# Used to reference and view html gathered.
# print(page_soup)