In [6]:
import os
from base64 import urlsafe_b64decode

from google.oauth2.credentials import Credentials
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build


# ============================================================
# CONFIG
# ============================================================

SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']


# ============================================================
# AUTHENTIFICATION
# ============================================================

def get_authenticated_service():
    creds = None

    if os.path.exists("token.json"):
        creds = Credentials.from_authorized_user_file("token.json", SCOPES)

    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                "gmail_credentials.json",
                SCOPES
            )
            creds = flow.run_local_server(port=0)

        with open("token.json", "w") as token:
            token.write(creds.to_json())

    return build("gmail", "v1", credentials=creds)


# ============================================================
# BODY EXTRACTION (brut)
# ============================================================

def extract_all_body_parts(payload, collected):
    mime = payload.get("mimeType")

    if mime in ("text/plain", "text/html"):
        data = payload.get("body", {}).get("data")
        if data:
            collected.append((mime, data))

    for part in payload.get("parts", []):
        extract_all_body_parts(part, collected)


def get_full_body_from_subject(subject):
    service = get_authenticated_service()

    print("Recherche du mail :", subject)

    results = service.users().messages().list(
        userId="me",
        q=f'subject:"{subject}"',
        maxResults=1
    ).execute()

    messages = results.get("messages", [])

    if not messages:
        print("Aucun mail trouvé.")
        return

    msg = service.users().messages().get(
        userId="me",
        id=messages[0]["id"],
        format="full"
    ).execute()

    parts = []
    extract_all_body_parts(msg["payload"], parts)

    if not parts:
        print("Aucun body trouvé.")
        return

    print("\n================ BODY BRUT =================\n")

    for i, (mime, data) in enumerate(parts, start=1):
        decoded = urlsafe_b64decode(data).decode("utf-8", errors="ignore")

        print(f"\n---------- PART {i} ({mime}) ----------\n")
        print(decoded)

    print("\n========================================\n")




## CASE FOR A MAIL LABELED 5 -> DECLINED 

In [None]:
contenu = get_full_body_from_subject("Votre candidature : Data Engineer H/F chez LCL")


Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=663636271514-b7ojcvgafgs5otb35jp5lmfoe78k054r.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A49810%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.readonly&state=AtVC1sLVXbxLhVwonPgHag6G3uansa&access_type=offline


I need to extract : 
"Votre candidature : Data Engineer H/F chez LCL" in <h2 class='text-xl font-semibold font-sans"> </h2> <br>
    "Data engineer H/F" in <a href (with the ref)> </a> <br>
    "LCL Villejuif, IDF, France" in <p class ="text-system-gray-100 text-sm leading-[20px]" <br>
    "Candidature envoyée le 12 juillet" in <p class="text-system-gray-70 text-sm leading-[20px]" <br>
    "Nous vous remercions de l intérêt que vous manifestee pour le poste de DE [...] Cordialement LCL" in <p class="text-base font-sans"