In [197]:
# import the required libraries 
from googleapiclient.discovery import build 
from google_auth_oauthlib.flow import InstalledAppFlow 
from google.auth.transport.requests import Request 
import pickle 
import os.path 
import base64 
import email 
import re
import datetime
import pandas as pd
from tqdm import tqdm
from dateutil import parser
from bs4 import BeautifulSoup

In [158]:
def buscar_key(diccionario, key_buscada):
    if key_buscada in diccionario:
        return diccionario[key_buscada]
    for key, value in diccionario.items():
        if isinstance(value, dict):
            result = buscar_key(value, key_buscada)
            if result is not None:
                return result
        elif isinstance(value, list):
            for item in value:
                if isinstance(item, dict):
                    result = buscar_key(item, key_buscada)
                    if result is not None:
                        return result
    return None


def convert_to_float(amount):
    # Reemplazar comas por puntos
    amount = amount.replace(',', '.')
    # Encontrar la última aparición de un punto
    last_dot_index = amount.rfind('.')
    if last_dot_index != -1:
        # Eliminar todos los puntos anteriores al último
        amount = amount[:last_dot_index].replace('.', '') + amount[last_dot_index:]
    return float(amount)

In [76]:
# Define the SCOPES. If modifying it, delete the token.pickle file. 
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly'] 

# Variable creds will store the user access token. 
# If no valid token found, we will create one. 
creds = None

# The file token.pickle contains the user access token. 
# Check if it exists 
if os.path.exists('utils/token.pickle'): 

    # Read the token from the file and store it in the variable creds 
    with open('utils/token.pickle', 'rb') as token: 
        creds = pickle.load(token) 

# If credentials are not available or are invalid, ask the user to log in. 
if not creds or not creds.valid: 
    if creds and creds.expired and creds.refresh_token: 
        creds.refresh(Request()) 
    else: 
        flow = InstalledAppFlow.from_client_secrets_file('utils/credentials.json', SCOPES) 
        creds = flow.run_local_server(port=0) 

    # Save the access token in token.pickle file for the next run 
    with open('utils/token.pickle', 'wb') as token: 
        pickle.dump(creds, token) 

# Connect to the Gmail API 
service = build('gmail', 'v1', credentials=creds) 

In [169]:
# Get list of emails
# Define query
query = "from: prismahome@prismasystems.com.ar"

# We can also pass maxResults to get any number of emails. Like this: 
result = service.users().messages().list(maxResults=50, userId='me', q = query).execute() 
messages = result.get('messages')

In [235]:
results = []

for message in tqdm(messages):
    # Get info from mail
    txt = service.users().messages().get(userId='me', id=message['id']).execute()

    temp = pd.DataFrame(txt['payload']['headers'])
    subject = temp.loc[temp.name == 'Subject', 'value'].values[0]
    
    if 'débito' not in subject or 'debito' not in subject:
        # Get amount spent
        regex = r'\$\s*([0-9]+(?:[.,][0-9]{3})*(?:[.,][0-9]{2})?)'

        texto = txt['snippet']
        amount = re.findall(regex, texto)
        amount = convert_to_float(amount[0].split('$')[-1].strip())


        # Get date
        purchase_date = temp.loc[temp.name == 'Date','value'].values[0]
        purchase_date = parser.parse(purchase_date).date()

        # Save results
        results.append((purchase_date, amount))
    else:
        pass

100%|██████████| 50/50 [00:17<00:00,  2.83it/s]


In [236]:
results

[(datetime.date(2024, 5, 27), 10880.0),
 (datetime.date(2024, 5, 26), 34999.3),
 (datetime.date(2024, 5, 26), 38432.37),
 (datetime.date(2024, 5, 25), 14400.0),
 (datetime.date(2024, 5, 24), 10999.0),
 (datetime.date(2024, 5, 23), 600.0),
 (datetime.date(2024, 5, 23), 50000.0),
 (datetime.date(2024, 5, 23), 50000.0),
 (datetime.date(2024, 5, 23), 50000.0),
 (datetime.date(2024, 5, 20), 1761.02),
 (datetime.date(2024, 5, 20), 28843.84),
 (datetime.date(2024, 5, 20), 31238.36),
 (datetime.date(2024, 5, 16), 23530.63),
 (datetime.date(2024, 5, 16), 14675.5),
 (datetime.date(2024, 5, 15), 119999.0),
 (datetime.date(2024, 5, 14), 1588.41),
 (datetime.date(2024, 5, 12), 97495.0),
 (datetime.date(2024, 5, 10), 3782.0),
 (datetime.date(2024, 5, 6), 6142.04),
 (datetime.date(2024, 5, 5), 5075.0),
 (datetime.date(2024, 5, 4), 16240.0),
 (datetime.date(2024, 5, 4), 2539.79),
 (datetime.date(2024, 5, 4), 4300.0),
 (datetime.date(2024, 5, 4), 1326.78),
 (datetime.date(2024, 4, 30), 1300.0),
 (datet