First run the installs:

In [1]:
%pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib

Collecting google-api-python-client
  Downloading google_api_python_client-2.151.0-py2.py3-none-any.whl.metadata (6.7 kB)
Collecting google-auth-httplib2
  Using cached google_auth_httplib2-0.2.0-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting google-auth-oauthlib
  Downloading google_auth_oauthlib-1.2.1-py2.py3-none-any.whl.metadata (2.7 kB)
Collecting protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0.dev0,>=3.19.5 (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0.dev0,>=1.31.5->google-api-python-client)
  Downloading protobuf-4.25.5-cp37-abi3-macosx_10_9_universal2.whl.metadata (541 bytes)
Downloading google_api_python_client-2.151.0-py2.py3-none-any.whl (12.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.5/12.5 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hUsing cached google_auth_httplib2-0.2.0-py2.py3-none-any.whl (9.3 kB)
Downloading google_auth_oauthlib-1.2.1-py2.py3-none-any.whl (24

In [12]:
import os.path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import base64
from bs4 import BeautifulSoup
import pandas as pd
import re
import json

# If modifying these scopes, delete the file token.json.
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]

# Define a file path to store previous email IDs
PREVIOUS_EMAILS_FILE = "previous_emails.json"

def clean_text(text):
    text = re.sub(r'http\S+|www\.\S+', '', text)
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'\b(unsubscribe|click here|preferences|privacy)\b', '', text, flags=re.IGNORECASE)
    text = re.sub(r'<[^>]+>', '', text)
    return text.strip()

def get_email_content(payload):
    if 'parts' in payload:
        for part in payload['parts']:
            if part['mimeType'] == 'text/plain':
                text_content = base64.urlsafe_b64decode(part['body']['data']).decode('utf-8')
                return clean_text(text_content)
            elif part['mimeType'] == 'text/html':
                html_content = base64.urlsafe_b64decode(part['body']['data']).decode('utf-8')
                text_content = BeautifulSoup(html_content, "html.parser").get_text()
                return clean_text(text_content)
    elif 'body' in payload:
        text_content = base64.urlsafe_b64decode(payload['body']['data']).decode('utf-8')
        return clean_text(text_content)
    return "No content available"

def get_email_category(message):
    if 'labelIds' in message:
        labels = message['labelIds']
        if 'CATEGORY_PROMOTIONS' in labels:
            return 'Promotions'
        elif 'CATEGORY_SOCIAL' in labels:
            return 'Social'
        elif 'CATEGORY_PERSONAL' in labels:
            return 'Primary'
    return 'Unknown'

def load_previous_emails():
    if os.path.exists(PREVIOUS_EMAILS_FILE):
        with open(PREVIOUS_EMAILS_FILE, 'r') as f:
            return set(json.load(f))
    return set()

def save_current_emails(email_ids):
    with open(PREVIOUS_EMAILS_FILE, 'w') as f:
        json.dump(list(email_ids), f)

def get_latest_emails(service, num_emails=10):
    results = service.users().messages().list(userId='me', maxResults=num_emails, q="is:inbox").execute()
    messages = results.get('messages', [])

    # Load previous email IDs to identify new emails
    previous_email_ids = load_previous_emails()
    current_email_ids = set()
    email_data = []

    for msg in messages:
        email_id = msg['id']
        current_email_ids.add(email_id)
        
        # Flag as "New" if not seen before, else "Seen"
        is_new = "New" if email_id not in previous_email_ids else "Seen"
        
        message = service.users().messages().get(userId='me', id=email_id, format='full').execute()
        headers = message['payload']['headers']
        date = next((h['value'] for h in headers if h['name'] == 'Date'), "No Date")
        sender = next((h['value'] for h in headers if h['name'] == 'From'), "Unknown Sender")
        subject = next((h['value'] for h in headers if h['name'] == 'Subject'), "No Subject")
        
        # Determine category based on labels
        category = get_email_category(message)
        
        # Get the email content
        email_content = get_email_content(message['payload'])

        # Append data to list with Category and Status before Message Content and Subject
        email_data.append([date, sender, category, is_new, email_content, subject])

    # Save current email IDs for the next run
    save_current_emails(current_email_ids)

    # Create DataFrame with the modified column order
    df = pd.DataFrame(email_data, columns=["Date Received", "Sender", "Category", "Status", "Message Content", "Subject"])
    return df

def main():
    creds = None
    if os.path.exists("token.json"):
        creds = Credentials.from_authorized_user_file("token.json", SCOPES)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
            creds = flow.run_local_server(port=0)
        with open("token.json", "w") as token:
            token.write(creds.to_json())

    try:
        # Call the Gmail API
        service = build("gmail", "v1", credentials=creds)
        
        # Fetch the latest 10 emails in a DataFrame
        df = get_latest_emails(service, num_emails=10)
        df.to_csv("latest_emails.csv", index=False)  # Save to CSV
        print(df)

    except HttpError as error:
        print(f"An error occurred: {error}")

if __name__ == "__main__":
    main()


                           Date Received  \
0        Sat, 09 Nov 2024 10:49:02 +0000   
1         Sat, 9 Nov 2024 09:20:00 +0000   
2         Fri, 8 Nov 2024 17:03:44 +0400   
3        Sat, 09 Nov 2024 09:11:59 +0000   
4        Sat, 09 Nov 2024 03:02:12 -0600   
5        Sat, 09 Nov 2024 08:42:28 +0000   
6        Sat, 09 Nov 2024 08:05:34 +0000   
7  Sat, 9 Nov 2024 13:40:54 +1300 (NZDT)   
8         Sat, 9 Nov 2024 04:46:49 +0100   
9  Sat, 09 Nov 2024 03:00:00 +0000 (UTC)   

                                              Sender    Category Status  \
0                   Glovo <toktok@info.glovoapp.com>  Promotions    New   
1  Marriott Bonvoy <marriottbonvoy@email-marriott...     Unknown   Seen   
2  "First Abu Dhabi Bank – FAB" <donotreply@crm.b...  Promotions   Seen   
3    Emma from Bolt <france@rides-marketing.bolt.eu>  Promotions   Seen   
4            Boggi Milano <no-reply@email.boggi.com>  Promotions   Seen   
5                 Veepee <newsletter@news.veepee.fr>  Promotions 

In [13]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("latest_emails.csv")

# Filter for new emails only
new_emails_df = df[df['Status'] == 'New']

# Create a list of concatenated strings: "Sender - Subject - Message Content"
email_strings = new_emails_df.apply(lambda row: f"{row['Sender']} - {row['Subject']} - {row['Message Content']}", axis=1).tolist()

# Print or use the list as needed
print(email_strings)


['Glovo <toktok@info.glovoapp.com> - Vuelve a Glovo Prime y te regalamos el 50% de tus suscripción - Y envíos GRATIS ilimitados 🤑 Hero ( ) ********************************** Glovo Prime te espera con un 50% de descuento en tu próximo mes ********************************** Utiliza el código: B2PRIMEESPITTLJO Suscríbete ( ) Suscríbete ( ) Hero ( ) ( ) ¡Hola Hugo! Podrías haber ahorrado 81.3€ con PrimeSegún los gastos de envío que has pagado desde que usas Glovo. Son todo ventajas, Hugo. Además de recuperar tus envíos gratis ilimitados en lo que quieras, vas a pagar un 50% menos por tu suscripción en el próximo mes. Para que le saques todavía más provecho a Glovo Prime👇 Utiliza el código: B2PRIMEESPITTLJO ---------------------------------- Más de 10.000 restaurantes seleccionados. ( ) ---------------------------------- Muchísimo más que comida. Pide la compra del súper, regalos de última hora y lo que necesites de la parafarmacia. ( ) Suscríbete ( ) Suscríbete ( ) Tu plan trae todas estas