In [1]:
import os
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# Gmail API scopes for read, send, and modify
SCOPES = [
    'https://www.googleapis.com/auth/gmail.readonly',
    'https://www.googleapis.com/auth/gmail.send',
    'https://www.googleapis.com/auth/gmail.modify'
]

def authenticate_gmail():
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return creds

# Authenticate and build the Gmail service
creds = authenticate_gmail()
service = build('gmail', 'v1', credentials=creds)

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=885913184552-86iog4pkulojgr4r46vjse61ukaa3ds8.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A59359%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.readonly+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.send+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.modify&state=lwa6v5eXSHzXY6II9nNHnVphMcqln8&access_type=offline


In [2]:
def get_last_unread_emails(service, max_results=5):
    """
    Fetch the most recent unread emails from the user's inbox.
    Returns a list of message dicts, each with an 'id' and 'threadId'.
    """
    query = "is:unread is:inbox"
    response = service.users().messages().list(
        userId='me', q=query, maxResults=max_results
    ).execute()
    messages = response.get('messages', [])
    return messages

# Fetch the last 5 unread emails
unread_emails = get_last_unread_emails(service, max_results=5)

# Example: print the IDs
for msg in unread_emails:
    print(f"Email ID: {msg['id']}")

Email ID: 1992e64cfce773d5
Email ID: 1992e3b96bf7c350
Email ID: 1992e34587b134ea
Email ID: 1992df8114139fb5
Email ID: 1992dcf0049d811d


In [3]:
def get_email_data(service, message_id):
    msg = service.users().messages().get(userId='me', id=message_id, format='full').execute()
    payload = msg['payload']
    headers = payload['headers']
    email_data = {'id': message_id}

    # Extract headers
    for header in headers:
        name = header['name']
        value = header['value']
        if name == 'From':
            email_data['from'] = value
        if name == 'Date':
            email_data['date'] = value
        if name == 'Subject':
            email_data['subject'] = value

    # Extract plain text body
    body = ""
    if 'parts' in payload:
        for part in payload['parts']:
            if part['mimeType'] == 'text/plain':
                data = part['body'].get('data')
                if data:
                    import base64
                    body = base64.urlsafe_b64decode(data).decode('utf-8')
                    break
    else:
        data = payload['body'].get('data')
        if data:
            import base64
            body = base64.urlsafe_b64decode(data).decode('utf-8')

    email_data['body'] = body
    return email_data

# Fetch essential info for the last 5 unread emails
emails_info = []
for msg in unread_emails:
    info = get_email_data(service, msg['id'])
    emails_info.append(info)
    print(f"From: {info.get('from')}\nSubject: {info.get('subject')}\nDate: {info.get('date')}\n---\n{info.get('body')[:200]}...\n{'='*40}")

From: GTBank Ltd <gtbank@gtbank.com>
Subject: Daily FX Rate For International Transactions on Your GTBank Naira Card
Date: 09 Sep 2025 12:13:02 -0000
---
<!DOCTYPE html><html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" lang="en"><head><title></title><meta http-equiv="Content-Type" content="text/html; charse...
From: Akintunde Adedolapo Abigail <invitations@linkedin.com>
Subject: You have an invitation
Date: Tue, 9 Sep 2025 11:28:01 +0000 (UTC)
---
Akintunde is waiting for your response
        
Hi Akintobi, I’d like to join your professional network

Akintunde Adedolapo Abigail
Administrative Virtual Assistant | Project Management Enthusia...
From: OpenAI Developer Community <notifications@openai1.discoursemail.com>
Subject: [OpenAI Developer Community] Summary
Date: Tue, 09 Sep 2025 11:20:06 +0000
---
A brief summary of [OpenAI Developer Community][1] since 2025-09-02 11:03:44 UTC

66 New Topics
1 Unread Notifications
5027 New Users


  
 

In [4]:
!pip install beautifulsoup4

Collecting beautifulsoup4
  Downloading beautifulsoup4-4.13.4-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4)
  Downloading soupsieve-2.7-py3-none-any.whl.metadata (4.6 kB)
Downloading beautifulsoup4-4.13.4-py3-none-any.whl (187 kB)
Downloading soupsieve-2.7-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4

   ---------------------------------------- 0/2 [soupsieve]
   ---------------------------------------- 0/2 [soupsieve]
   ---------------------------------------- 0/2 [soupsieve]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beaut

In [4]:
import base64
from bs4 import BeautifulSoup

def remove_hyperlinks(text):
    # Simple function to remove hyperlinks from text
    import re
    return re.sub(r'http\S+', '', text)

def get_email_data(service, message_id):
    msg = service.users().messages().get(userId='me', id=message_id, format='full').execute()
    payload = msg['payload']
    headers = payload['headers']
    email_data = {'id': message_id}

    # Extract headers
    for header in headers:
        name = header['name']
        value = header['value']
        if name == 'From':
            email_data['from'] = value
        if name == 'Date':
            email_data['date'] = value
        if name == 'Subject':
            email_data['subject'] = value

    # Extract text content, prefer plain text, fallback to HTML
    data = None
    text = ""
    if 'parts' in payload:
        for part in payload['parts']:
            if part['mimeType'] == 'text/plain' and part['body'].get('data'):
                data = part['body']['data']
                break
        if not data:
            for part in payload['parts']:
                if part['mimeType'] == 'text/html' and part['body'].get('data'):
                    data = part['body']['data']
                    break
    else:
        data = payload['body'].get('data')

    if data:
        text = base64.urlsafe_b64decode(data.encode('UTF-8')).decode('UTF-8')
        # If HTML, clean it
        soup = BeautifulSoup(text, 'html.parser')
        clean_text = soup.get_text()
        clean_text = remove_hyperlinks(clean_text)
        email_data['text'] = clean_text.strip()
    else:
        email_data['text'] = ""

    return email_data

# Usage example:
# email_data = get_email_data(service, message['id'])

In [10]:
!pip install openai



In [34]:
import openai
from dotenv import load_dotenv
import os
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [5]:
import os
import re
import base64
from bs4 import BeautifulSoup
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
import openai

# --- CONFIGURATION ---
from dotenv import load_dotenv
import os
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
SCOPES = [
    'https://www.googleapis.com/auth/gmail.readonly',
    'https://www.googleapis.com/auth/gmail.send',
    'https://www.googleapis.com/auth/gmail.modify'
]

# --- AUTHENTICATION ---
def authenticate_gmail():
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return creds

creds = authenticate_gmail()
service = build('gmail', 'v1', credentials=creds)

# --- UTILITY FUNCTIONS ---
def remove_hyperlinks(text):
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'\S+\.com\S*', '', text)
    text = re.sub(r'\S+\.net\S*', '', text)
    text = re.sub(r'\S+\.org\S*', '', text)
    return text

def chunk_text(text, max_chars):
    paragraphs = re.split(r'\n+', text)
    chunks, current_chunk = [], ''
    for paragraph in paragraphs:
        if len(current_chunk) + len(paragraph) <= max_chars:
            current_chunk += paragraph + '\n'
        else:
            chunks.append(current_chunk.strip())
            current_chunk = paragraph + '\n'
    if current_chunk:
        chunks.append(current_chunk.strip())
    return [c for c in chunks if c.strip()]

# --- EMAIL FETCHING AND CLEANING ---
def get_email_data(service, message_id):
    message = service.users().messages().get(userId='me', id=message_id, format='full').execute()
    payload = message['payload']
    headers = payload['headers']
    email_data = {
        'subject': next((h['value'] for h in headers if h['name'] == 'Subject'), ''),
        'from': next((h['value'] for h in headers if h['name'] == 'From'), ''),
        'date': next((h['value'] for h in headers if h['name'] == 'Date'), '')
    }
    data = None
    if 'parts' in payload:
        for part in payload['parts']:
            if part['mimeType'] == 'text/plain' and part['body'].get('data'):
                data = part['body']['data']
                break
        if not data:
            for part in payload['parts']:
                if part['mimeType'] == 'text/html' and part['body'].get('data'):
                    data = part['body']['data']
                    break
    else:
        data = payload['body'].get('data')
    if data:
        decoded = base64.urlsafe_b64decode(data.encode('UTF-8')).decode('UTF-8')
        soup = BeautifulSoup(decoded, 'html.parser')
        clean_text = remove_hyperlinks(soup.get_text())
        email_data['text'] = clean_text
    else:
        email_data['text'] = ''
    return email_data, message

# --- SUMMARIZATION ---
def summarize_email(email_text, model="gpt-3.5-turbo"):
    system_prompt = (
        "You are a professional email summarizer that creates hybrid summaries. "
        "Use a short narrative paragraph to introduce or explain the context, "
        "then follow up with 2–4 bullet points for specific details. "
        "Avoid repeating information. Keep it under 120 words."
    )

    user_input = f"Summarize this email with a hybrid style: {email_text}"

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_input}
    ]

    max_retries = 3
    for attempt in range(max_retries):
        try:
            response = openai.chat.completions.create(
                model=model,
                messages=messages,
                temperature=0.8,
                top_p=1,
                presence_penalty=0.4,
                frequency_penalty=0.3
            )
            return response.choices[0].message.content
        except Exception as e:
            print(f"OpenAI error: {e}")
            if attempt == max_retries - 1:
                return "Summary could not be generated due to an error."

# --- MAIN LOGIC TO PROCESS UNREAD EMAILS ---
def process_unread_emails(service, max_chars=3000, max_results=3):
    results = service.users().messages().list(userId='me', labelIds=['INBOX', 'UNREAD'], maxResults=max_results).execute()
    messages = results.get('messages', [])
    email_summaries = ""
    for message in messages:
        email_data, full_message = get_email_data(service, message['id'])
        text = email_data.get('text', '')
        if not text.strip():
            continue
        chunks = chunk_text(text, max_chars)
        summary = ""
        for chunk in chunks:
            if not chunk.strip():
                continue
            summary += summarize_email(chunk) + "\n"
        # Optional: Refine summary if too long (limit to 2 refinements)
        refine_attempts = 0
        while len(summary.split()) >= 125 and refine_attempts < 2:
            summary = summarize_email(summary)
            refine_attempts += 1
        email_summaries += (
            f"From: {email_data['from']}\n"
            f"Subject: {email_data['subject']}\n"
            f"Timestamp: {email_data['date']}\n"
            f"Link: https://mail.google.com/mail/u/0/#inbox/{message['id']}\n"
            f"Summary:\n{summary}\n\n"
        )
    return email_summaries

# --- RUN ---
summaries = process_unread_emails(service)
print(summaries)

From: GTBank Ltd <gtbank@gtbank.com>
Subject: Daily FX Rate For International Transactions on Your GTBank Naira Card
Timestamp: 09 Sep 2025 12:13:02 -0000
Link: https://mail.google.com/mail/u/0/#inbox/1992e64cfce773d5
Summary:
The email promotes a payment service for purchasing favorite items and offers an option to unsubscribe from receiving further emails.

- Payment service for all purchases
- Easy way to pay for favorite items
- Unsubscribe option available


From: Akintunde Adedolapo Abigail <invitations@linkedin.com>
Subject: You have an invitation
Timestamp: Tue, 9 Sep 2025 11:28:01 +0000 (UTC)
Link: https://mail.google.com/mail/u/0/#inbox/1992e3b96bf7c350
Summary:
Akintobi received a LinkedIn invitation from Akintunde seeking to connect professionally.
- Akintunde is an Administrative Virtual Assistant interested in project management and client relationships.
- They have 4 common connections in Lagos.
- LinkedIn suggests using Premium InMail for better communication effectiven