In [1]:
import os
import base64
import json
import psycopg2
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from datetime import datetime, timedelta

In [14]:
# Define your time range
START_DATE = "2025-02-10"  # Change to your desired start date (YYYY-MM-DD)
END_DATE = "2025-02-18"    # Change to your desired end date (YYYY-MM-DD)

# Gmail API scope
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

# Connect to PostgreSQL (Change credentials accordingly)
DB_CONFIG = {
    "dbname": "emails_db",
    "user": "your_user",
    "password": "your_password",
    "host": "localhost",
    "port": "5432"
}

# Connect to Gmail API
def authenticate_gmail():
    creds = None
    if os.path.exists("token.json"):
        creds = Credentials.from_authorized_user_file("token.json", SCOPES)
    if not creds or not creds.valid:
        flow = InstalledAppFlow.from_client_secrets_file("google_creds.json", SCOPES)
        creds = flow.run_local_server(port=0)
        with open("token.json", "w") as token:
            token.write(creds.to_json())
    return build("gmail", "v1", credentials=creds)

def date_to_unix(date_str):
    dt = datetime.strptime(date_str, "%Y-%m-%d")
    return int(dt.timestamp())  # Convert to Unix timestamp (seconds)

# Get emails within the time range
def get_emails(service):
    query = f"after:{date_to_unix(START_DATE)} before:{date_to_unix(END_DATE)}"
    results = service.users().messages().list(userId="me", q=query, maxResults=10).execute()
    messages = results.get("messages", [])
    
    emails = []
    for msg in messages:
        msg_data = service.users().messages().get(userId="me", id=msg["id"]).execute()
        payload = msg_data.get("payload", {})
        
        # Extract headers
        headers = {header["name"]: header["value"] for header in payload.get("headers", [])}
        sender = headers.get("From", "Unknown Sender")
        subject = headers.get("Subject", "No Subject")
        date = headers.get("Date", "Unknown Date")
        
        # Extract email content
        body = ""
        if "parts" in payload:
            for part in payload["parts"]:
                if part["mimeType"] == "text/plain":
                    body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8")
        elif "body" in payload and "data" in payload["body"]:
            body = base64.urlsafe_b64decode(payload["body"]["data"]).decode("utf-8")

        emails.append((sender, subject, date, body))
    return emails

# Store emails in PostgreSQL
def store_emails_in_db(emails):
    conn = psycopg2.connect(**DB_CONFIG)
    cursor = conn.cursor()
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS emails (
            id SERIAL PRIMARY KEY,
            sender TEXT,
            subject TEXT,
            timestamp TEXT,
            body TEXT
        );
    """)
    conn.commit()

    for email in emails:
        cursor.execute("INSERT INTO emails (sender, subject, timestamp, body) VALUES (%s, %s, %s, %s)", email)
    
    conn.commit()
    cursor.close()
    conn.close()

# Run the script
# if __name__ == "__main__":
service = authenticate_gmail()
emails = get_emails(service)
#store_emails_in_db(emails)
#print(f"Stored {len(emails)} emails in the database.")


In [18]:
# great it works now!
# now store in a SQL/vector DB
emails[5]

('LinkedIn Job Alerts <jobalerts-noreply@linkedin.com>',
 '“data scientist”: Eczacıbaşı Bilişim - DATA SCIENTIST and more',
 'Tue, 18 Feb 2025 04:57:26 +0000 (UTC)',
 'Your job alert for data scientist\r\n30+ new jobs match your preferences.\r\n          \r\nDATA SCIENTIST\r\nEczacıbaşı Bilişim\r\nTürkiye\r\nView job: https://www.linkedin.com/comm/jobs/view/4152720719/?trackingId=Xz%2FAp4wX%2BzG6a4eksFbUdw%3D%3D&refId=ByteString%28length%3D16%2Cbytes%3De16ba638...9079663b%29&lipi=urn%3Ali%3Apage%3Aemail_email_job_alert_digest_01%3BXzBVG9w3R8Wg5eP%2FBYaagg%3D%3D&midToken=AQEsx4nyGTXpHQ&midSig=0EKQFWn239tXE1&trk=eml-email_job_alert_digest_01-job_card-0-view_job&trkEmail=eml-email_job_alert_digest_01-job_card-0-view_job-null-5bx38z~m7a0j5c6~6p-null-null&eid=5bx38z-m7a0j5c6-6p&otpToken=MTEwNzFjZTMxNzJlY2NjNWIxMjQwNGVkNDUxN2VmYjE4YmM4ZDQ0NDkwYWQ4ODYxNzBjNTA5NmM0NjUzNWZmNmY2ZDJkZmEwNmRmOWYwZmI1MGIzY2NiNDJkM2RmOWNkNjJlNWJmZGYyMzI4YTkyMzNkYjQzOSwxLDE%3D\r\n\r\n---------------------------------