In [None]:
import os, re, json
from google_auth_oauthlib.flow import InstalledAppFlow
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from google.auth.transport.requests import Request
from email.utils import parsedate_to_datetime
import base64, os, re
import email


In [4]:


SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
TOKENS_DIR = "tokens"


In [5]:
def _ensure_dir():
    os.makedirs(TOKENS_DIR, exist_ok=True)

def _safe(name: str) -> str:
    # filename-safe (e.g., absar@example_com.json)
    return re.sub(r"[^a-zA-Z0-9_.-]", "_", name)


In [6]:
def login_once_and_save_tokens():
    """
    1st time per user: Google login -> take email -> save tokens/<email>.json
    """
    flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
    # 'offline' + 'consent' ensure refresh_token mile (first time)
    creds = flow.run_local_server(port=0, access_type="offline", prompt="consent")

    # get the user's Gmail address
    service = build("gmail", "v1", credentials=creds)
    profile = service.users().getProfile(userId="me").execute()
    email_addr = profile["emailAddress"]

    _ensure_dir()
    path = os.path.join(TOKENS_DIR, f"{_safe(email_addr)}.json")
    with open(path, "w", encoding="utf-8") as f:
        f.write(creds.to_json())

    print(f"✅ Saved credentials for {email_addr} -> {path}")
    return email_addr

In [7]:
def load_creds_for(email_addr: str) -> Credentials:
    """
    Later runs: load tokens, auto-refresh if expired, re-save.
    """
    path = os.path.join(TOKENS_DIR, f"{_safe(email_addr)}.json")
    if not os.path.exists(path):
        raise FileNotFoundError(f"No saved tokens for {email_addr}. Run login_once_and_save_tokens() first.")

    creds = Credentials.from_authorized_user_file(path, SCOPES)

    if not creds.valid:
        if creds.expired and creds.refresh_token:
            creds.refresh(Request())
            # persist refreshed access token & new expiry
            with open(path, "w", encoding="utf-8") as f:
                f.write(creds.to_json())
        else:
            # No refresh_token? Do first-time login again with prompt='consent'
            raise RuntimeError("Saved creds invalid and no refresh_token present. Run login_once_and_save_tokens().")

    return creds


In [8]:
def fetch_and_save_emails(email_addr: str, max_emails=10):
    """ Fetch & save emails as .eml files
    """
    creds = load_creds_for(email_addr)
    service = build("gmail", "v1", credentials=creds)

    # Fetch email IDs
    results = service.users().messages().list(userId="me", maxResults=max_emails).execute()
    messages = results.get("messages", [])

    if not messages:
        print("No emails found.")
        return

    os.makedirs("emails", exist_ok=True)

    for msg in messages:
        msg_id = msg["id"]
        msg_data = service.users().messages().get(userId="me", id=msg_id, format="raw").execute()
        raw_data = base64.urlsafe_b64decode(msg_data["raw"].encode("ASCII"))

        mime_msg = email.message_from_bytes(raw_data)


In [9]:
def fetch_attachments(service, user_email, msg_id, save_dir):
    message = service.users().messages().get(userId="me", id=msg_id, format="full").execute()
    parts = message.get("payload", {}).get("parts", [])
    # print(f"payload parts: {json.dumps(parts, indent=2)} , message : {message}")
    for part in parts:
        filename = part.get("filename")
        body = part.get("body", {})
        if filename and "attachmentId" in body:
            print(f"Found attachment: {filename}")
            att_id = body["attachmentId"]
            att = service.users().messages().attachments().get(
                userId="me", messageId=msg_id, id=att_id
            ).execute()

            file_data = base64.urlsafe_b64decode(att["data"].encode("UTF-8"))
            filepath = os.path.join(save_dir, filename)
            with open(filepath, "wb") as f:
                f.write(file_data)

            print(f"📎 Attachment saved: {filepath}")

In [10]:
def formated_save_data(raw_data , headers, sender,date_, subject, date_str, subject_name, sender_dir):
        
    # parse into email object
    msg_obj = email.message_from_bytes(raw_data)

    # extract plain text body
    body = ""
    if msg_obj.is_multipart():
        for part in msg_obj.walk():
            ctype = part.get_content_type()
            disp = str(part.get("Content-Disposition"))
            if ctype == "text/plain" and "attachment" not in disp:
                body = part.get_payload(decode=True).decode(errors="ignore")
                break
    else:
        body = msg_obj.get_payload(decode=True).decode(errors="ignore")

    # formatted content
    formatted = f"""
    From: {sender}
    To: {next((h["value"] for h in headers if h["name"] == "To"), "")}
    Date: {date_}
    Subject: {subject}

    {"-"*50}
    {body}
    """

    # save as readable text
    filepath = os.path.join(sender_dir, f"{date_str}__{subject_name}.txt")
    with open(filepath, "w", encoding="utf-8") as f:
        f.write(formatted)

In [11]:
def check_sync_mail(msg_id,msgs):
    INDEX_FILE = "emails/index.json"

    # load already saved IDs
    if os.path.exists(INDEX_FILE):
        with open(INDEX_FILE, "r") as f:
            saved_ids = set(json.load(f))
    else:
        saved_ids = set()

    new_ids = []
    for m in msgs:
        msg_id = m["id"]

        # skip if already saved
        if msg_id in saved_ids:
            return True
        else:
            new_ids.append(msg_id)
            return False
        
# update index.json
    if new_ids:
        saved_ids.update(new_ids)
        with open(INDEX_FILE, "w") as f:
            json.dump(list(saved_ids), f)    

In [None]:












try:
    # If you already know the user's email, put it here:
    USER_EMAIL = "absarqureshi88@gmail.com"  # e.g., "absaralam5432@gmail.com"
    if USER_EMAIL:
        creds = load_creds_for(USER_EMAIL)
    else:
        # First time for this user:
        USER_EMAIL = login_once_and_save_tokens()
        creds = load_creds_for(USER_EMAIL)

    INDEX_FILE = f"emails/{_safe(USER_EMAIL)}/index.json"
    
    # load already saved IDs
    if os.path.exists(INDEX_FILE):
        with open(INDEX_FILE, "r") as f:
            saved_ids = set(json.load(f))
    else:
        saved_ids = set()
    new_ids = []

    # Test call: list 5 message snippets without re-login
    svc = build("gmail", "v1", credentials=creds)
    res = svc.users().messages().list(userId="me").execute()

    msgs = res.get("messages", [])
    for m in msgs:

        
        # skip if already saved
        msg_id = m["id"]
        if msg_id in saved_ids:
            continue
        # skip if already saved
        full = svc.users().messages().get(userId="me", id=m["id"]).execute()
        headers = full["payload"]["headers"]
        subject = next((h["value"] for h in headers if h["name"] == "Subject"), "No Subject")
        sender  = next((h["value"] for h in headers if h["name"] == "From"), "Unknown")
        date_   = next((h["value"] for h in headers if h["name"] == "Date"), "Unknown")
        reciever = next((h["value"] for h in headers if h["name"] == "To"), "Unknown")
        # parse date
        try:
            parsed_date = parsedate_to_datetime(date_)
            date_str = parsed_date.strftime("%Y-%m-%d_%H-%M-%S")
        except:
            date_str = "unknown_date"

        # safe names
        sender_dir = f"emails/{_safe(USER_EMAIL)}/{_safe(sender)}"
        os.makedirs(sender_dir, exist_ok=True)



        subject_name = _safe(subject)
        filename = f"{subject_name}__{date_str}.eml"
        filepath = os.path.join(sender_dir, filename)

        # save raw mail
        raw = svc.users().messages().get(userId="me", id=m["id"], format="raw").execute()
        raw_data = base64.urlsafe_b64decode(raw["raw"].encode("UTF-8"))
        formated_save_data(raw_data , headers, sender,date_str, subject, date_str, subject_name, sender_dir)
        fetch_attachments(svc, USER_EMAIL, m["id"], sender_dir)

        new_ids.append(msg_id)
    # update index.json
    if new_ids:
        saved_ids.update(new_ids)
        with open(INDEX_FILE, "w") as f:
            json.dump(list(saved_ids), f)



except Exception as e:
    print("❌", e)