In [None]:
import os.path
import pickle
import base64
import json
import re
from bs4 import BeautifulSoup

from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

from openai import OpenAI

client = OpenAI(
  api_key=""
)

In [4]:
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def get_gmail_service():
    creds = None
    #use your own path
    #use your own path
    #use your own path
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)

    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:

            flow = InstalledAppFlow.from_client_secrets_file(
                'client_secret_349717392152-a31crp53p3kkaq5imdm5q41vdfka5e7n.apps.googleusercontent.com.json', SCOPES)
            creds = flow.run_local_server(port=0)

        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)
    
    service = build('gmail', 'v1', credentials=creds)
    print("Gmail Service Created")
    return service

In [5]:
service = get_gmail_service()

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=349717392152-a31crp53p3kkaq5imdm5q41vdfka5e7n.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A57869%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.readonly&state=nDq7bG0gOnA7rL1masIg7lnKrIOj5T&access_type=offline
Gmail Service Created


In [6]:
def list_messages(service, user_id='me', query='in:inbox'):
    """
    List all messages in the user's inbox.
    """
    messages = []
    results = service.users().messages().list(userId=user_id, q=query).execute()
    if 'messages' in results:
        messages.extend(results['messages'])
    #Next page
    while 'nextPageToken' in results:
        page_token = results['nextPageToken']
        results = service.users().messages().list(userId=user_id, q=query, pageToken=page_token).execute()
        if 'messages' in results:
            messages.extend(results['messages'])
    return messages

def get_message(service, msg_id, user_id='me'):
    """
    Get a message by id.
    """
    message = service.users().messages().get(userId=user_id, id=msg_id, format='full').execute()
    return message

In [7]:
messages = list_messages(service, query='in:inbox')
for msg_meta in messages[:3]:
    print(msg_meta)
    print('-----------------------------------')

{'id': '19548b2158ec5d93', 'threadId': '19548b2158ec5d93'}
-----------------------------------
{'id': '195474f449e1f1f9', 'threadId': '195474f449e1f1f9'}
-----------------------------------
{'id': '195432eff3686e1e', 'threadId': '195432eff3686e1e'}
-----------------------------------


In [11]:
messages = list_messages(service, query='in:inbox')
unread_ids = []
for msg_meta in messages[:10]:
    msg = get_message(service, msg_meta['id'])
    #print(msg)
    #print('-----------------------------------')
    
    if 'UNREAD' in msg['labelIds']:
        unread_ids.append(msg['id'])
    
print(unread_ids)

['19548b2158ec5d93', '195474f449e1f1f9', '195432eff3686e1e', '19542cec33f29fa1', '195374d51dbed939', '1951a0cffb053199']


In [12]:
print(type(unread_ids))

<class 'list'>


In [15]:
def process_unread_emails(service, unread_ids):
    processed_emails = []  # Final list of processed emails
    for msg_id in unread_ids:
        try:
            # Get message by ID
            message = service.users().messages().get(userId='me', id=msg_id, format='full').execute()
        except Exception as e:
            # Skip any errors and continue to the next email
            print(f"Failed to fetch email {msg_id}: {e}")
            continue

        # Initialize email info dict
        email_info = {"ID": msg_id, "From": None, "Subject": None, "Date": None, "Content": None}

        # Decode email headers
        headers = message.get('payload', {}).get('headers', [])
        for header in headers:
            name = header.get('name')
            value = header.get('value', '')
            if name == 'From':
                email_info["From"] = value
            elif name == 'Subject':
                email_info["Subject"] = value
            elif name == 'Date':
                email_info["Date"] = value

        # Decode email body
        payload = message.get('payload', {})
        body_content = ""

        def _get_part_content(part):
            
            mime_type = part.get('mimeType')
            body = part.get('body', {})
            data = body.get('data')
            if data:
                # Decode base64 data
                try:
                    text = base64.urlsafe_b64decode(data).decode('utf-8', errors='ignore')
                except Exception as e:
                    text = ""  # Skip any decoding errors
                return text
            # If no data key, try to get parts recursively
            for subpart in part.get('parts', []):
                text = _get_part_content(subpart)
                if text:
                    return text
            return ""

        # Check if the email is multipart
        if 'parts' in payload:
            # Iterate over parts of the multipart email
            for part in payload['parts']:
                if part.get('mimeType') == 'text/plain':
                    body_content = _get_part_content(part)
                    if body_content:
                        break
            # If plain text part is not found, try to get HTML part
            if not body_content:
                for part in payload['parts']:
                    if part.get('mimeType') == 'text/html':
                        body_content = _get_part_content(part)
                        if body_content:
                            # Remove HTML tags
                            body_content = re.sub(r'<[^>]+>', '', body_content)
                            break
            # If both plain text and HTML parts are not found, get the first part
            if not body_content:
                for part in payload['parts']:
                    body_content = _get_part_content(part)
                    if body_content:
                        break
        else:
            # If the email is not multipart, get the email body
            body_content = _get_part_content(payload)

        # Clean up the email content 
        if body_content:
            body_content = re.sub(r'http\S+', '', body_content)
            body_content = re.sub(r'\s+', ' ', body_content)
            body_content = body_content.strip()

        email_info["Content"] = body_content if body_content is not None else ""
        processed_emails.append(email_info)
    return processed_emails


In [17]:
result = process_unread_emails(service, unread_ids)

In [18]:
print(result)
print(type(result))

[{'ID': '19548b2158ec5d93', 'From': 'Kaggle <noreply@kaggle.com>', 'Subject': 'Introducing Kaggle Packages: Enhance Your MLOps Skills in the Drawing with LLMs Competition', 'Date': 'Thu, 27 Feb 2025 10:36:24 -0800', 'Content': "Kaggle Hi Rubiriya, Starting today, you can use a new Kaggle feature that brings real-world MLOps skills into your competition solutions. Kaggle Packages make your specialized AI solutions reusable, a valuable element of production ML engineering. Get ready to level up how you build solutions for Kaggle Competitions, starting with the Drawing with LLMs Competition. This competition challenges you to unleash your inner artist and coding wizard! Given a text prompt describing an image, your task is to generate code that renders it as closely as possible. Your solution must include a reusable model built with Kaggle Packages. Total Prizes: $50,000 Entry Deadline: May 20, 2025 Learn More Your feedback will help shape the future of Kaggle Packages. We're considering 

In [26]:
print(result[2])
print(type(result[2]))
print(result[2]['Content'])

{'ID': '195432eff3686e1e', 'From': 'Kaggle <noreply@kaggle.com>', 'Subject': 'Register for our 5-Day Gen AI Intensive Course with Google', 'Date': 'Wed, 26 Feb 2025 08:48:34 -0800', 'Content': "Kaggle Hi Rubiriya, We are excited to bring back the Gen AI Intensive course with Google on March 31 - April 4 and would love for you to join! Last year's course was a success with over 140,000 participants! This no-cost course, designed by Google ML researchers and engineers, explores key Generative AI technologies through theory, hands-on learning, and community engagement. Conclude this 5-day course with a practical GenAI challenge through a capstone project. Register for the Course Everyday, participants will receive the following in their inbox: 📚 Daily Assignments This includes newly published white papers, a companion podcast (generated by NotebookLM), and companion code labs in AI Studio. 💬 Discord Discussion Threads Kaggle's Discord server will have a dedicated channel for focused discu

In [37]:
def generate_summary(email_msg):
    prompt = f"You are a Email Digest Agent which can accurately grasp the content of an email by analyzing the sender's address, subject, and body, and generate insightful email summaries within 50 words.So consider the requirement and generate the summary of this email:It is from {email_msg['From']}, the subject is {email_msg['Subject']}, date is {email_msg['Date']}, and the eamil body is {email_msg['Content']}.The summary should include the main points and be concise.The summary should directly tell the user what they need to know, what to pay attention to, or what actions to take."
    
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        store=True
    )
    # Extract the summary from the response
    summary = completion.choices[0].message
    return summary.content

In [None]:
for email in result:
    summary = generate_summary(email)
    print(summary)

In [41]:
def classify_email(summary):
    system_message = (
        "You are an email classifier. You will be provided with the content of an email. "
        "Your task is to classify the email into one of the following categories: "
        "Receipt, Promotion, Ads, Notice, Career Development, Education, News. "
        "Respond with only the single category name and no other text."
    )

    user_message = f"Email content:\n{summary}\nCategory?"

    response = client.chat.completions.create(
        model="gpt-4-turbo", 
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_message}
        ],
        temperature=0
    )

    
    category = response.choices[0].message.content
    allowed_categories = ["Receipt", "Promotion", "Ads", "Notice", "Career Development", "Education", "News"]
    if category not in allowed_categories:
        return "Uncategorized"
    return category

In [38]:
summary_test = generate_summary(result[2])

In [39]:
print(summary_test)

Kaggle invites you to register for the free 5-Day Gen AI Intensive Course with Google from March 31 - April 4. The course includes daily assignments, livestream seminars, and a capstone project. Don't miss the opportunity to engage with experts and win prizes. Register now!


In [42]:
print(classify_email(summary_test))

Education
