# Staging Code

In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(api_key=api_key)

In [2]:
user_input = "Hello, suggest a trip plan in Tokyo"

messages = [
        {
            "role": "system", 
            "content": "Only suggest 1 activity for the trip plane"
        },
        {
            "role": "user",
            "content": user_input
        }
    ]

completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages
)

In [None]:
re_json = completion.choices[0].message.content
print(re_json)

# GMAIL API

In [None]:
import os.path

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

### Documentation Code

In [117]:
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]

creds = None
# The file token.json stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists("token.json"):
    creds = Credentials.from_authorized_user_file("token.json", SCOPES)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
    if creds and creds.expired and creds.refresh_token:
        creds.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file(
            "credentials.json", SCOPES
        )
        creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
    with open("token.json", "w") as token:
        token.write(creds.to_json())

service = build("gmail", "v1", credentials=creds)
results = service.users().labels().list(userId="me").execute()
labels = results.get("labels", [])

### Refactored Code

In [None]:
local = True

def get_credentials(token_path="token.json", scopes=None):
    '''
    Gets credentials from user
    Args:
        token_path: (str) the token.json file unique to local users
        scopes: (str) sets scope for get_credentials
    '''
    if scopes is None:
        scopes = ["https://www.googleapis.com/auth/gmail.readonly"]
    creds = None
    if os.path.exists(token_path):
        creds = Credentials.from_authorized_user_file(token_path, scopes)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            if local:
                client_config = {
                    "installed": {
                        "client_id": os.environ.get("GOOGLE_CLIENT_ID_DESK"),
                        "client_secret": os.environ.get("GOOGLE_CLIENT_SECRET_DESK"),
                        "auth_uri": "https://accounts.google.com/o/oauth2/auth",
                        "token_uri": "https://oauth2.googleapis.com/token"
                    }
                }
            # else:
            #     client_config = {
            #         "web": {
            #             "client_id": os.environ.get("GOOGLE_CLIENT_ID_DESK_WEB"),
            #             "client_secret": os.environ.get("GOOGLE_CLIENT_SECRET_WEB"),
            #             "auth_uri": "https://accounts.google.com/o/oauth2/auth",
            #             "token_uri": "https://oauth2.googleapis.com/token",
            #             "redirect_uris": ["https://zhangtaskscheduler-45d23d6f047a.herokuapp.com/oauth2callback"]
            #         }
            #     }
            flow = InstalledAppFlow.from_client_config(client_config, scopes)
            creds = flow.run_local_server(port=0)
        with open(token_path, "w") as token_file:
            token_file.write(creds.to_json())
    return creds

def build_gmail_service(creds):
    return build("gmail", "v1", credentials=creds)

creds = get_credentials()

service = build_gmail_service(creds)

# Doubly Refactored

In [12]:
from flask import redirect, url_for, request, session
from google_auth_oauthlib.flow import Flow
from pip._vendor import cachecontrol
from google.oauth2 import id_token
import google.auth.transport.requests
import os
import requests
from dotenv import load_dotenv

load_dotenv()

web_redirect = "https://zhangtaskscheduler-45d23d6f047a.herokuapp.com/oauth2callback"
local_redirect = "http://localhost:5000/oauth2callback"

os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
flow = Flow.from_client_config(
    client_config = {
        "web": {
            "client_id": os.environ.get("GOOGLE_CLIENT_ID_WEB"),
            "client_secret": os.environ.get("GOOGLE_CLIENT_SECRET_WEB"),
            "auth_uri": "https://accounts.google.com/o/oauth2/auth",
            "token_uri": "https://oauth2.googleapis.com/token",
            "redirect_uris": [web_redirect, local_redirect],
        }
    },
    scopes = ["https://www.googleapis.com/auth/userinfo.profile", "https://www.googleapis.com/auth/userinfo.email", "openid"],
    redirect_uri = local_redirect
)

authorization_url, state = flow.authorization_url()
print("Authorization URL:", authorization_url)

Authorization URL: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=137875663308-0qel8djda7mbimqj4kdpll89f8kkobpd.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A5000%2Foauth2callback&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.profile+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+openid&state=OY479pyUbQJS3hTQp2TeXGSnP6ynPl&access_type=offline


### Lists Past Messages

In [9]:
# Id's for messages
results = service.users().messages().list(userId="me").execute()["messages"]

print(results)

[{'id': '195de2c7843b2ee9', 'threadId': '195dcc60a537d914'}, {'id': '195de2c37e195b0f', 'threadId': '195dcf7e22f35651'}, {'id': '195de2c03ff2a559', 'threadId': '195dcf7e22f35651'}, {'id': '195de0daae4431bd', 'threadId': '195dcf7e22f35651'}, {'id': '195dde09a05a5c59', 'threadId': '195dde09a05a5c59'}, {'id': '195ddd25d831a0c2', 'threadId': '195dcf7e22f35651'}, {'id': '195ddd224a32d9e1', 'threadId': '195dcf7e22f35651'}, {'id': '195ddaf3cc27fff5', 'threadId': '195dcf7e22f35651'}, {'id': '195ddae28609c436', 'threadId': '195dcf7e22f35651'}, {'id': '195ddadf5efe933d', 'threadId': '195dcf7e22f35651'}, {'id': '195dd331fe5ecc50', 'threadId': '195dcf7e22f35651'}, {'id': '195dd32eb1981e4b', 'threadId': '195dcf7e22f35651'}, {'id': '195dd21133facdaf', 'threadId': '195dd21133facdaf'}, {'id': '195dd1792bde6b86', 'threadId': '195dd1792bde6b86'}, {'id': '195dcf7e22f35651', 'threadId': '195dcf7e22f35651'}, {'id': '195dce02f93849f7', 'threadId': '195dce02f93849f7'}, {'id': '195dcd144a4f8c24', 'threadId': 

### Gets messages based on found ID

In [120]:
res_messages = []

res_messages.append(service.users().messages().get(userId="me", id="195d36cc119281ab", format="full").execute())
# found snippet

### Narrowing down the Return Message

All the infromation I need for this project is in the 'payload'

In [127]:
res = res_messages[0].get("payload")

# base, contains headers and the data in ["parts"]
res

{'partId': '',
 'mimeType': 'multipart/alternative',
 'filename': '',
 'headers': [{'name': 'Delivered-To', 'value': 'mickey.zhang@colby.edu'},
  {'name': 'Received',
   'value': 'by 2002:a05:6a10:d1d5:b0:5ec:e349:56a1 with SMTP id m21csp3453854pxv;        Wed, 26 Mar 2025 10:07:55 -0700 (PDT)'},
  {'name': 'X-Google-Smtp-Source',
   'value': 'AGHT+IGJUgXh3E+OcnCAHJ9ekYMPMq2qZEZ9XimXmC3lVPCoF8O0Mvb9l8j6abYMz2M9B2dKJR9P'},
  {'name': 'X-Received',
   'value': 'by 2002:a05:6512:3e2a:b0:549:91c6:970d with SMTP id 2adb3069b0e04-54b011ce9a8mr196727e87.5.1743008874766;        Wed, 26 Mar 2025 10:07:54 -0700 (PDT)'},
  {'name': 'ARC-Seal',
   'value': 'i=1; a=rsa-sha256; t=1743008874; cv=none;        d=google.com; s=arc-20240605;        b=NR9NOFQIpH8gwHSon/pibibGu4PioQI78aa83xGUptIKfA6ZYJd8xI4ZQC3l28SYgh         B6H/uXl3qfkavq0hzG/OGA9TWSVAT00aX/MU5mnhN3PxEBUP0J6FscTFtcCq22C+Ol71         jr8gsyx782jy5TKp81Y72J3yks28Hl6/sAYat4Hwra26OYW8HS+0FRdTt/LCUDUl1pHB         enHvLG5SZST8L/Zmz+tFpIEL52+Da

### Retrieved the Sbuject Line for Gmails

In [108]:
for head in res.get("headers"):
    print(head.get('value')) if head['name'] == "Subject" else None

Re: Cookie Recipe/Ingredients for Cookies & Cocoa Thursday


In [109]:
res.get("parts")

[{'partId': '0',
  'mimeType': 'text/plain',
  'filename': '',
  'headers': [{'name': 'Content-Type', 'value': 'text/plain; charset="UTF-8"'},
   {'name': 'Content-Transfer-Encoding', 'value': 'quoted-printable'}],
  'body': {'size': 3013,
   'data': 'SGkgSmVubmlmZXIsIEhpIE1pY2tleSwNCg0KSSBqdXN0IHdhbnRlZCB0byBsZXQgeW91IGtub3cgdGhhdCBJIGhhdmUgYWxsIHRoZSBpdGVtcyBvbiB5b3VyIGxpc3QgZm9yDQp0aGUgQ29va2llIGFuZCBDb2NvYSBldmVudC4NCg0KSSBsZWZ0IHRoZSBpbmdyZWRpZW50cyBpbiB0aGUgUHVnaCBraXRjaGVuIG9uIHRoZSB0YWJsZSAgaW4gYSBXYWxtYXJ0IGJhZw0KYW5kIHRoZSBtaWxrICh3aG9sZSBhbmQgb2F0KSwgYnV0dGVyLCBhbmQgd2hpcHBlZCBjcmVhbSBpbiB0aGUgZnJpZGdlLg0KDQpUYWtlIGNhcmUsDQpLaW0NCg0KT24gTW9uLCBNYXIgMjQsIDIwMjUgYXQgNDo1MeKAr1BNIEplbm5pZmVyIExpYW5nIDxqbGlhbmcyNUBjb2xieS5lZHU-IHdyb3RlOg0KDQo-IENvcnJlY3QsIEkgY2hlY2tlZCBhbGwgdGhlIGV4cGlyYXRpb24gZGF0ZXMgZm9yIHRoZSBpbmdyZWRpZW50cyB3ZeKAmXJlDQo-IHVzaW5nIDopDQo-DQo-IFRoYW5rIHlvdSEhDQo-DQo-IEtpbmQgcmVnYXJkcywNCj4gSmVubmlmZXINCj4NCj4gT24gTWFyIDI0LCAyMDI1LCBhdCAxNjoxOCwgS2ltYmVybHkgV2Fs

In [129]:
print(res.get("parts")[0]['body']['data'])

SGkgSmVubmlmZXIsIEhpIE1pY2tleSwNCg0KSSBqdXN0IHdhbnRlZCB0byBsZXQgeW91IGtub3cgdGhhdCBJIGhhdmUgYWxsIHRoZSBpdGVtcyBvbiB5b3VyIGxpc3QgZm9yDQp0aGUgQ29va2llIGFuZCBDb2NvYSBldmVudC4NCg0KSSBsZWZ0IHRoZSBpbmdyZWRpZW50cyBpbiB0aGUgUHVnaCBraXRjaGVuIG9uIHRoZSB0YWJsZSAgaW4gYSBXYWxtYXJ0IGJhZw0KYW5kIHRoZSBtaWxrICh3aG9sZSBhbmQgb2F0KSwgYnV0dGVyLCBhbmQgd2hpcHBlZCBjcmVhbSBpbiB0aGUgZnJpZGdlLg0KDQpUYWtlIGNhcmUsDQpLaW0NCg0KT24gTW9uLCBNYXIgMjQsIDIwMjUgYXQgNDo1MeKAr1BNIEplbm5pZmVyIExpYW5nIDxqbGlhbmcyNUBjb2xieS5lZHU-IHdyb3RlOg0KDQo-IENvcnJlY3QsIEkgY2hlY2tlZCBhbGwgdGhlIGV4cGlyYXRpb24gZGF0ZXMgZm9yIHRoZSBpbmdyZWRpZW50cyB3ZeKAmXJlDQo-IHVzaW5nIDopDQo-DQo-IFRoYW5rIHlvdSEhDQo-DQo-IEtpbmQgcmVnYXJkcywNCj4gSmVubmlmZXINCj4NCj4gT24gTWFyIDI0LCAyMDI1LCBhdCAxNjoxOCwgS2ltYmVybHkgV2FsdG9uLVRyYWprb3Zza2kgPGt3YWx0b250QGNvbGJ5LmVkdT4NCj4gd3JvdGU6DQo-DQo-IO-7vw0KPiBUaGFua3MgSmVubmlmZXIhDQo-DQo-IEFuZCBub25lIG9mIHRoZSBpdGVtcyB0aGF0IHlvdSBmb3VuZCBpbiB0aGUga2l0Y2hlbiBhcmUgZXhwaXJlZCByaWdodD8_Pw0KPiBKdXN0IGRvdWJsZSBjaGVja2luZy4NCj4NCj4gSSds

In [12]:
import base64

data_str = "VGhpcyBpcyB3aHkgSSBsaWtlIENTLg0KDQpNZSB0ZXN0aW5nIG91dCBHb29nbGUncyBHbWFpbCBBUEkNCltpbWFnZTogaW1hZ2UucG5nXQ0KW2ltYWdlOiBpbWFnZS5wbmddDQotLSANCltpbWFnZTogQ29sYnldIDxodHRwczovL3d3dy5jb2xieS5lZHUvPg0KDQpNaWNrZXkgWmhhbmcNCg0KKEhlL0hpbSkNCg0KKlByZXNpZGVudCBvZiBDb2xieSBRdWVzdEJyaWRnZSBDaGFwdGVyKg0KDQoqQWZmaWxpYXRlIG9mIEZMSSBAIENvbGJ5Kg0KDQpDb21wdXRlciBTY2llbmNlOiBBSQ0KDQoyMDctNjgwLTU2NTANCg0KTWVldD8g8J"

# ChatGPTed
def decode_base64_data(data_str):
    missing_padding = len(data_str) % 4
    if missing_padding:
        data_str += "=" * (4 - missing_padding)
    decoded_bytes = base64.urlsafe_b64decode(data_str.encode("utf-8"))
    return decoded_bytes.decode("utf-8", errors="replace")


print(decode_base64_data(data_str))

This is why I like CS.

Me testing out Google's Gmail API
[image: image.png]
[image: image.png]
-- 
[image: Colby] <https://www.colby.edu/>

Mickey Zhang

(He/Him)

*President of Colby QuestBridge Chapter*

*Affiliate of FLI @ Colby*

Computer Science: AI

207-680-5650

Meet? �


In [10]:
from bs4 import BeautifulSoup

class EmailData:
    
    def __init__(self, subject, message, html_tag_threshold=100, img_tag_threshold=10):
        """
        Initialize an EmailData instance. Optionally, filter out messages
        that contain too many HTML or image tags.
        """
        self.subject = subject
        # Filter the message if it appears to be "massive" in HTML content.
        if self._is_massive_html(message, html_tag_threshold, img_tag_threshold):
            self.message = "[Filtered: Message contains too many HTML elements or images]"
        else:
            self.message = message
        
    def _is_massive_html(self, message, html_threshold, img_threshold):
        """
        Check if a message has too many HTML tags or <img> tags.
        Returns True if the message exceeds the thresholds.
        """
        # Parse the message as HTML. If it's plain text, BeautifulSoup will have few tags.
        soup = BeautifulSoup(message, "html.parser")
        
        # Count all HTML tags
        html_tags = soup.find_all()
        # Count <img> tags specifically
        img_tags = soup.find_all('img')
        
        # Debug prints (optional)
        # print("Total HTML tags:", len(html_tags))
        # print("Image tags:", len(img_tags))
        
        # If the number of tags exceeds the thresholds, we consider it "massive"
        if len(html_tags) > html_threshold or len(img_tags) > img_threshold:
            return True
        return False
        
    def get_subject(self):
        '''
        gettr for subject
        '''
        return self.subject
    
    def get_subject(self):
        '''
        gettr for mnessage
        '''
        return self.subject
    
    def to_string(self):
        '''
        to_string method for EmailData
        '''
        return f">>>>>>>START OF '{self.subject}' Email <<<<<<<\nSUBJECT: {self.subject} \nMESSAGE: {self.message}\n >>>>>>>END OF '{self.subject}' Email <<<<<<<"

In [13]:
results = service.users().messages().list(userId="me").execute()["messages"]
# creates a list of messages found with id
res_messages = []
counter = 0
for message in results:
    if counter == 10:
        break
    id = message.get('id')
    extracted_data = service.users().messages().get(userId="me", id=id, format="full").execute()
    payload = extracted_data.get("payload")
    # Get subject line
    subject = None
    for head in payload.get("headers"):
        subject = head.get('value') if head['name'] == "Subject" else None
        if subject:
            break
    uncoded_message = payload.get("parts")[0]['body']['data'] if payload.get("parts")[0]['body']['size'] != 0 else ""
    message = decode_base64_data(uncoded_message)
    email_data = EmailData(subject, message)
    res_messages.append(email_data)
    counter += 1

In [14]:
for mail in res_messages:
    print(mail.to_string())

>>>>>>>START OF 'Fwd: Spreadsheet shared with you: "INT Program Spring Break Portland Trip (Responses)"' Email <<<<<<<
SUBJECT: Fwd: Spreadsheet shared with you: "INT Program Spring Break Portland Trip (Responses)" 
MESSAGE: ---------- Forwarded message ---------
From: Michael Freese (via Google Sheets) <drive-shares-dm-noreply@google.com
>
Date: Fri, Mar 28, 2025, 08:42
Subject: Spreadsheet shared with you: "INT Program Spring Break Portland
Trip (Responses)"
To: <qzhang27@colby.edu>


Michael Freese shared a spreadsheet
[image: Header profile photo]
Michael Freese (mfreese@colby.edu) has invited you to *edit* the following
spreadsheet:
INT Program Spring Break Portland Trip (Responses)
<https://urldefense.com/v3/__https://docs.google.com/spreadsheets/d/1XEGYTIEGmwrt6vqud0UpAK9tZUJntC21BUep1Gb2sEg/edit?usp=sharing_eil&ts=67e69917__;!!P_zEGVH0kSMiWA!AC7rPTncgF-HzE_2VYLhoVofHhZN4fpWQ-nbmkhC7vjHqWtcQJ0mPmlIhHzyHkgD0L55kDmEgMOErl3AfYt_UsG1Fm8_auK0$>

Open
<https://urldefense.com/v3/__http

In [15]:
user_input = [email.to_string() for email in res_messages]

messages = [
        {
            "role": "system", 
            "content": "Take a list of strings as input, each string in the list is an instance of an "
                        "email with a SUBJECT field and MESSAGE field. Make sense of each email and provide a sumamry of each "
                        "if the SUBJECT or MESSAGE of the eamil string is nonsensical i.e. not understandable humaan language "
                        "remove it from output completely, and continue to the next email"
                        "Make the summaries short and digestable while mainting integrity"
                        "output a STRING of a list of jsons containing the SUBJECT and MESSAGE summaries"
                        "Remove any excessive styling or syntax of your return"
        },
        {
            "role": "user",
            "content": str(user_input)
        }
    ]

completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages
)

In [16]:
import json

re_json = completion.choices[0].message.content
print(json.loads(re_json))

[{'SUBJECT': 'Fwd: Spreadsheet shared with you: "INT Program Spring Break Portland Trip (Responses)"', 'MESSAGE': 'Michael Freese shared a spreadsheet regarding responses for the INT Program Spring Break trip to Portland.'}, {'SUBJECT': 'Re: Portland trip itinerary today and contact info for Mickey and Michael', 'MESSAGE': 'Google Maps location for pickup is confirmed for 5 PM at the Visitors Center on the edge of Old Port.'}, {'SUBJECT': 'Re: Portland trip itinerary today and contact info for Mickey and Michael', 'MESSAGE': 'Mai requests clarification on the pickup location for the trip.'}, {'SUBJECT': 'Re: Portland trip itinerary today and contact info for Mickey and Michael', 'MESSAGE': 'Michelle Phan and Thu Nguyen are already near the Maine port, so no need to wait for them.'}, {'SUBJECT': 'Re: Portland trip itinerary today and contact info for Mickey and Michael', 'MESSAGE': 'Mickey suggests the best exit from JCPENNEY for the trip pickup and confirms the time.'}]
