In [3]:
from googleapiclient.discovery import build
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
import pandas as pd

SCOPES = ['https://www.googleapis.com/auth/gmail.modify']
# Step 1. Authenticate and build Gmail service
def get_gmail_service(token_path="token.json", creds_path="credentials.json"):
    creds = None

    # Load existing token if available
    if os.path.exists(token_path):
        creds = Credentials.from_authorized_user_file(token_path, SCOPES)

    # If no valid token, go through OAuth flow
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(creds_path, SCOPES)
            creds = flow.run_local_server(port=0)  # opens a browser for login
        # Save token for next time
        with open(token_path, 'w') as token:
            token.write(creds.to_json())

    return build("gmail", "v1", credentials=creds)

# Step 2. Fetch emails
def fetch_emails(service, max_results=100):
    results = service.users().messages().list(userId="me", maxResults=max_results).execute()
    return results.get("messages", [])

# Step 3. Classify emails
def classify_email(subject, sender):
    delete_keywords = ["unsubscribe", "promo", "sale", "offer", "newsletter"]
    keep_keywords = ["invoice", "bank", "tax", "contract", "family"]

    subject_lower = subject.lower()
    if any(word in subject_lower for word in delete_keywords):
        return "DELETE"
    if any(word in subject_lower for word in keep_keywords):
        return "KEEP"
    return "REVIEW"

# Step 4. Export to CSV
def export_emails(service, emails, filename="emails_review.csv"):
    email_data = []
    for e in emails:
        msg = service.users().messages().get(userId="me", id=e["id"]).execute()
        headers = {h["name"]: h["value"] for h in msg["payload"]["headers"]}
        subject = headers.get("Subject", "")
        sender = headers.get("From", "")
        decision = classify_email(subject, sender)
        email_data.append({"Subject": subject, "Sender": sender, "Decision": decision})

    df = pd.DataFrame(email_data)
    df.to_csv(filename, index=False)
    print(f"Exported {len(df)} emails to {filename}")

In [4]:
import os

# service = get_gmail_service()

In [5]:
# emails = fetch_emails(service, max_results=2000)  # adjust number if you want more
# len(emails)

In [6]:
# export_emails(service, emails, filename="emails_review.csv")

In [7]:
import pandas as pd

df = pd.read_csv("emails_review.csv")

In [8]:
from gpt4all import GPT4All

model = GPT4All("Meta-Llama-3.1-8B-Instruct-128k-Q4_0.gguf")

In [9]:

import asyncio

async def process_dataframe(df: pd.DataFrame):
    tasks = [process_row(row) for row in df.itertuples()]
    results = await asyncio.gather(*tasks)

    return results

async def process_row(row: pd.Series):

    subject = row.Subject
    sender = row.Sender

    print(f"DEBUG: {sender}")
    print(f"DEBUG: {subject}")

    prompt = (
        "You are an intelligent email-filtering assistant. Your task is to help me declutter "
        "my Gmail inbox by identifying and filtering out spam, promotional, and non-essential emails.\n\n"
        "### Classification Rules (in order of priority)\n"
        "1. Promotional, marketing, or advertisement emails are NOT important.\n"
        "2. Social justice, political, or advocacy-related content is NOT important, "
        "   even if it includes dates, events, or calls to action.\n"
        "3. Emails about credit reports, credit scores, or credit monitoring are NOT important.\n"
        "4. Newsletters or community announcements (e.g., libraries, local events) are NOT important "
        "   unless they contain explicit requests that affect your personal or professional obligations.\n"
        "5. Only mark an email as IMPORTANT if it clearly relates to personal, work-related, or time-sensitive matters "
        "   that require your direct action (e.g., meeting confirmations, invoices, urgent requests, deadlines).\n\n"
        "### Email Information\n"
        f"Subject: {subject}\n"
        f"Sender: {sender}\n\n"
        "### Response Format\n"
        "IMPORTANT: <Yes or No>\n"
        "REASON: <Brief explanation>\n"
        "CONFIDENCE: <High / Medium / Low>\n"
    )

    with model.chat_session():
        response = model.generate(prompt)
        
        return response

df_results = await process_dataframe(df[0:1])

DEBUG: Credit Karma <notifications@creditkarma.ca>
DEBUG: Has your score changed Tamim?


In [10]:
df_results

['Based on the provided classification rules and email information, here\'s my response:\n\n**IMPORTANT:** NO\n**REASON:** The subject line "Has your score changed Tamim?" suggests a query about credit report changes, which falls under rule 3 (emails about credit reports, scores, or monitoring are NOT important).\n**CONFIDENCE:** HIGH\n\nThis classification is based on the clear indication that the email pertains to Credit Karma\'s services and not an urgent matter requiring your direct action.']

In [None]:
# import pandas as pd

# def delete_emails_from_csv(service, filename="emails_review.csv"):
#     # Load the reviewed CSV
#     df = pd.read_csv(filename)
# 
#     # Ensure we only pick rows marked DELETE
#     to_delete = df[df["Decision"].str.upper() == "DELETE"]
# 
#     print(f"Found {len(to_delete)} emails marked for deletion.")
# 
#     for _, row in to_delete.iterrows():
#         msg_id = row.get("Message_ID")  # This must be included in the export step
#         if pd.isna(msg_id):
#             continue
# 
#         try:
#             # Gmail API call to delete message
#             service.users().messages().delete(userId="me", id=msg_id).execute()
#             print(f"Deleted email: {row['Subject']} from {row['Sender']}")
#         except Exception as e:
#             print(f"Error deleting {row['Subject']}: {e}")



In [None]:
# def export_emails(service, emails, filename="emails_review.csv"):
#     email_data = []
#     for e in emails:
#         msg = service.users().messages().get(userId="me", id=e["id"]).execute()
#         headers = {h["name"]: h["value"] for h in msg["payload"]["headers"]}
#         subject = headers.get("Subject", "")
#         sender = headers.get("From", "")
#         decision = classify_email(subject, sender)
#         email_data.append({
#             "Message_ID": e["id"],   # Add this line
#             "Subject": subject,
#             "Sender": sender,
#             "Decision": decision
#         })
# 
#     df = pd.DataFrame(email_data)
#     df.to_csv(filename, index=False)
#     print(f"Exported {len(df)} emails to {filename}")
