In [1]:
import os
import base64
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pickle

# Define the scope
SCOPES = ['https://www.googleapis.com/auth/gmail.modify']

# Authenticate and initialize the Gmail API
def authenticate_gmail():
    creds = None
    # Load credentials if previously saved
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If no valid credentials, log in
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=8080)
        # Save credentials
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    return build('gmail', 'v1', credentials=creds)

In [2]:
import base64
import os
import re
from googleapiclient.discovery import build

def sanitize_folder_name(folder_name):
    """
    Sanitize a folder name to remove invalid characters for file systems.
    """
    return re.sub(r'[<>:"/\\|?*]', '_', folder_name)

def get_new_email_attachments(service, user_id='me'):
    """
    Fetch attachments from unread emails, organize them by sender email and subject, 
    and mark emails as read after processing.
    
    Args:
        service: The Gmail API service instance.
        user_id: The email address of the user. Default is 'me' (current authenticated user).
    """
    # Fetch unread emails with attachments
    query = 'is:unread has:attachment'
    results = service.users().messages().list(userId=user_id, q=query).execute()
    messages = results.get('messages', [])
    
    if not messages:
        print("No new emails with attachments.")
        return

    for message in messages:
        # Get email details
        msg = service.users().messages().get(userId=user_id, id=message['id']).execute()
        
        # Extract sender, date, and subject information
        headers = msg['payload']['headers']
        sender = next(header['value'] for header in headers if header['name'] == 'From')
        subject = next((header['value'] for header in headers if header['name'] == 'Subject'), 'No Subject')
        date = next(header['value'] for header in headers if header['name'] == 'Date')
        
        # Extract email address from sender
        sender_email = sender.split('<')[-1].strip('>')
        
        print(f"Processing email from: {sender} (Email: {sender_email}), Date: {date}, Subject: {subject}")
        
        # Create folder structure
        base_folder = 'attachments'
        sender_folder = sanitize_folder_name(sender_email)
        subject_folder = sanitize_folder_name(subject)
        full_path = os.path.join(base_folder, sender_folder, subject_folder)
        os.makedirs(full_path, exist_ok=True)
        
        # Extract attachments
        parts = msg.get('payload', {}).get('parts', [])
        for part in parts:
            if part['filename']:
                if 'attachmentId' in part['body']:
                    attachment = service.users().messages().attachments().get(
                        userId=user_id, messageId=message['id'], id=part['body']['attachmentId']
                    ).execute()
                    file_data = base64.urlsafe_b64decode(attachment['data'])
                    file_name = sanitize_folder_name(part['filename'])
                    
                    # Save the file
                    file_path = os.path.join(full_path, file_name)
                    with open(file_path, 'wb') as f:
                        f.write(file_data)
                    print(f"Saved attachment: {file_name} to {full_path}")
        
        # Mark email as read
        service.users().messages().modify(
            userId=user_id,
            id=message['id'],
            body={'removeLabelIds': ['UNREAD']}
        ).execute()


In [3]:
service = authenticate_gmail()

In [4]:
print("Fetching new email attachments...")
get_new_email_attachments(service)

Fetching new email attachments...
Processing email from: Bisma Ijaz <bismaijaz@gosaas.io> (Email: bismaijaz@gosaas.io), Date: Wed, 18 Dec 2024 09:30:57 +0000, Subject: Invitation: FDI Status Update @ Wed Dec 18, 2024 7pm - 8pm (GMT+5) (Haseeb Mahmood)
Saved attachment: invite.ics to attachments/bismaijaz@gosaas.io/Invitation_ FDI Status Update @ Wed Dec 18, 2024 7pm - 8pm (GMT+5) (Haseeb Mahmood)
