In [6]:
import os
import base64
import email
import pandas as pd


from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']


In [10]:
def get_gmail_service():
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    else:
        flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
        creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)

def get_label_id(service, label_name):
    labels = service.users().labels().list(userId='me').execute().get('labels', [])
    for label in labels:
        if label['name'].lower() == label_name.lower():
            return label['id']
    raise Exception(f"Label '{label_name}' not found in your Gmail.")


In [30]:
def classify_email(text):
    text = text.lower()

    applied_phrases = [
        "thank you for applying",
        "your application has been received",
        "your application was sent to",
        "we received your job application",
        "has been submitted successfully",
        "thank you for your interest in"
    ]

    rejection_phrases = [
        "we regret to inform you that we will not be",
        "unfortunately",
        "we won’t be proceeding further with your application",
        "we decided to move forward with other applicants",
        "not selected",
        "we are unable to move forward with your application",
        "your application to"
    ]

    interview_phrases = [
        "interview",
        "invite",
        "schedule a call"
    ]

    found_applied = any(phrase in text for phrase in applied_phrases)
    found_rejected = any(phrase in text for phrase in rejection_phrases)
    found_interview = any(phrase in text for phrase in interview_phrases)

    # Smart logic: Applied + Rejection = Rejected
    if found_applied and found_rejected:
        return "Rejected"
    elif found_rejected:
        return "Rejected"
    elif found_applied or found_interview:
        return "Applied"

    return "Others"


In [31]:
def extract_emails_and_save(label_name='IMPORTANT'):
    service = get_gmail_service()
    try:
        label_id = get_label_id(service, label_name)
    except Exception as e:
        print(str(e))
        return
    messages = service.users().messages().list(userId='me', labelIds=[label_id], q="newer_than:60d").execute().get('messages', [])
    response = service.users().messages().list(userId='me', labelIds=[label_id], q="newer_than:90d").execute()
    messages.extend(response.get('messages', []))

    while 'nextPageToken' in response:
        response = service.users().messages().list(userId='me', labelIds=[label_id], q="newer_than:90d", pageToken=response['nextPageToken']).execute()
        messages.extend(response.get('messages', []))

    print(f"✅ Total messages fetched: {len(messages)}")


    rows = []

    for msg in messages:
        msg_data = service.users().messages().get(userId='me', id=msg['id'], format='raw').execute()
        raw_msg = base64.urlsafe_b64decode(msg_data['raw'].encode('ASCII'))
        mime_msg = email.message_from_bytes(raw_msg)

        subject = mime_msg['subject'] or "No Subject"
        sender = mime_msg['from'] or "Unknown"
        body = ""

        if mime_msg.is_multipart():
            for part in mime_msg.walk():
                if part.get_content_type() == 'text/plain':
                    body += part.get_payload(decode=True).decode(errors='ignore')
        else:
            body = mime_msg.get_payload(decode=True).decode(errors='ignore')

         
        status = classify_email(subject + " " + body)

        rows.append({
            "From": sender,
            "Subject": subject,
            "Status": status
        })

    df = pd.DataFrame(rows)
    df.to_csv("data_new_2.csv", index=False)
    print("✅ Data saved to data_new_2.csv")

extract_emails_and_save()


✅ Total messages fetched: 367
✅ Data saved to data_new_2.csv
