In [None]:
pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib

# importation

In [39]:
import os.path
import base64
import json
import pandas as pd
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

In [3]:
# Défénir les permissons demandées pour acceder a gmail
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

In [5]:
def authenticate_gmail_api():
    """Authentifie l'utilisateur et renvoie un service API Gmail."""
    creds = None
    # Le fichier token.json stocke les jetons d'accès et d'actualisation de l'utilisateur
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # Si il n'y a pas de (valide) jetons disponibles, laissez l'utilisateur se connecter.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file('../credentials.json', SCOPES)
            creds = flow.run_local_server(port=8081)
        # Enregistrer les jetons pour la prochaine exécution
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    # Construire le service Gmail
    service = build('gmail', 'v1', credentials=creds)
    return service

In [11]:
service = authenticate_gmail_api()

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=455190904108-7neb8q48h71eg7jqclts7n6kopf6hsi6.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8081%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.readonly&state=GTUapnSkiQMogylIE2FxDkdL5HTA7o&access_type=offline


In [85]:
def get_email_json(service, max_results=1):
    results = service.users().messages().list(
         userId='me', labelIds=['INBOX'], maxResults=max_results
    ).execute()
    messages = results.get('messages', [])
    
    for message in messages:
            msg = service.users().messages().get(userId='me', id=message['id']).execute()
            print(json.dumps(msg, indent=2))
    
    return messages
    

In [None]:
json = get_email_json(service)

In [125]:
def get_message_body(payload):
    """Récupère et décode le corps du message."""
    body = ''
    if 'body' in payload and 'data' in payload['body']:
        body = base64.urlsafe_b64decode(payload['body']['data'].encode('ASCII')).decode('UTF-8')
    elif 'parts' in payload:
        for part in payload['parts']:
            body += get_message_body(part)
    return body


In [155]:
def get_email_info(service, label_id, max_results=400):
    """Récupère les informations des emails pour un label donné."""
    try:
        email_infos = []
        next_page_token = None

        while len(email_infos) < max_results:
            results = service.users().messages().list(
                userId='me', labelIds=[label_id], maxResults=100, pageToken=next_page_token
            ).execute()
            messages = results.get('messages', [])

            if not messages:
                print(f'No more messages found.')
                break

            for message in messages:
                msg = service.users().messages().get(userId='me', id=message['id']).execute()

                # Récupération des informations du message
                headers = msg['payload']['headers']
                snippet = msg.get('snippet', '')

                from_email = next(header['value'] for header in headers if header['name'] == 'From')
                subject = next((header['value'] for header in headers if header['name'] == 'Subject'), '(No Subject)')
                labels = msg.get('labelIds', [])
                has_attachment = 'parts' in msg['payload']
                body = get_message_body(msg['payload'])  
                message_length = len(body)

                email_info = {
                    'From': from_email,
                    'Subject': subject,
                    'Snippet': snippet,
                    'Labels': labels,
                    'Has Attachments': has_attachment,                    
                    'Message Length': message_length
                    
                }

                email_infos.append(email_info)
                if len(email_infos) >= max_results:
                    break

            next_page_token = results.get('nextPageToken')
            if not next_page_token:
                break

        return email_infos

    except Exception as error:
        print(f'An error occurred: {error}')

In [157]:
inbox_emails = get_email_info(service, 'INBOX')

In [141]:
spam_emails = get_email_info(service, 'SPAM')

In [159]:
df_inbox = pd.DataFrame(inbox_emails)

In [145]:
df_spam = pd.DataFrame(spam_emails)

In [147]:
df = pd.concat([df_inbox, df_spam])

In [151]:
df.index=range(len(df))

In [153]:
df

Unnamed: 0,From,Subject,Snippet,Labels,Has Attachments,Message Length
0,Typology Paris <no-reply@typology.com>,Votre prescription personnalisée,Découvrez vos résultats ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ...,"[UNREAD, CATEGORY_UPDATES, INBOX]",True,38172
1,LinkedIn <notifications-noreply@linkedin.com>,Vous apparaissez dans 9 recherches,Des personnes recherchent vos compétences ͏ ͏ ...,"[UNREAD, CATEGORY_SOCIAL, INBOX]",True,30467
2,Typology Paris <no-reply@typology.com>,Votre prescription est prête,Découvrez votre routine personnalisée ͏ ͏ ͏ ͏ ...,"[UNREAD, IMPORTANT, CATEGORY_UPDATES, INBOX]",True,36211
3,France travail <Service-candidat@pole-emploi.fr>,"Vous avez reçu un courrier "" Relevé de situati...","France Travail Candidat Bonjour, Vous avez reç...","[CATEGORY_UPDATES, INBOX]",False,6846
4,Typology Paris <no-reply@typology.com>,Typologie AE(-) : Ce qu’ils pensent de nos pro...,Leurs avis valent toutes les recommandations. ...,"[CATEGORY_PROMOTIONS, UNREAD, INBOX]",True,41375
...,...,...,...,...,...,...
229,"""'GLS-France'"" <uMLdooGT@rumzg.fr>",melanie.picot01: 📦 Commande en attente (1). Vo...,𝗡𝗼𝘁𝗶𝗳𝗶𝗰𝗮𝘁𝗶𝗼𝗻 𝗱𝗲 𝘀𝘂𝗶𝘃𝗶 𝗱𝗲 𝗹𝗮 𝗹𝗶𝘃𝗿𝗮𝗶𝘀𝗼𝗻 𝗱𝗲 𝘃𝗼𝘁𝗿𝗲...,"[UNREAD, CATEGORY_PERSONAL, SPAM]",False,28050
230,"""ᴛᴇɴᴛᴀᴛɪᴠᴇ ᴅᴇ ʟɪᴠʀᴀɪsᴏɴ"" <iiEdXqjW@iiedxqjw.us>",ᴍɪsᴇ ᴀ̀ ᴊᴏᴜʀ ᴅᴇ ʟᴀ ʟɪᴠʀᴀɪsᴏɴ : ᴠᴏᴛʀᴇ ʟɪᴠʀᴀɪsᴏɴ...,"GLS. Bonjour melanie.picot01, Vous avez (1) co...","[UNREAD, CATEGORY_PERSONAL, SPAM]",True,25732
231,chrono-poste-express <MfKqNMmy@gdnoclslu.us>,"melanie.picot01, Vous avez (1) message de notr...",𝗡𝗼𝘁𝗶𝗳𝗶𝗰𝗮𝘁𝗶𝗼𝗻 𝗱𝗲 𝘀𝘂𝗶𝘃𝗶 𝗱𝗲 𝗹𝗮 𝗹𝗶𝘃𝗿𝗮𝗶𝘀𝗼𝗻 𝗱𝗲 𝘃𝗼𝘁𝗿𝗲...,"[UNREAD, CATEGORY_PERSONAL, SPAM]",False,26103
232,"""Emma de Gsm55.com"" <emma@newsletter.gsm55.com>",🌞 Soldes sur notre TOP de l'été,"Coques cordons, étuis waterproof, powerbank, a...","[CATEGORY_PROMOTIONS, UNREAD, SPAM]",True,97254
