In [17]:
import os
from base64 import urlsafe_b64decode

from google.oauth2.credentials import Credentials
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build


# ============================================================
# CONFIG
# ============================================================

SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']


# ============================================================
# AUTHENTIFICATION
# ============================================================

def get_authenticated_service():
    creds = None

    if os.path.exists("token.json"):
        creds = Credentials.from_authorized_user_file("token.json", SCOPES)

    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                "gmail_credentials.json",
                SCOPES
            )
            creds = flow.run_local_server(port=0)

        with open("token.json", "w") as token:
            token.write(creds.to_json())

    return build("gmail", "v1", credentials=creds)


# ============================================================
# BODY EXTRACTION (brut)
# ============================================================

def extract_all_body_parts(payload, collected):
    mime = payload.get("mimeType")

    if mime in ("text/plain", "text/html"):
        data = payload.get("body", {}).get("data")
        if data:
            collected.append((mime, data))

    for part in payload.get("parts", []):
        extract_all_body_parts(part, collected)


def get_full_body_from_subject(subject):
    service = get_authenticated_service()

    print("Recherche du mail :", subject)

    results = service.users().messages().list(
        userId="me",
        q=f'subject:"{subject}"',
        maxResults=1
    ).execute()

    messages = results.get("messages", [])

    if not messages:
        print("Aucun mail trouv√©.")
        return

    msg = service.users().messages().get(
        userId="me",
        id=messages[0]["id"],
        format="full"
    ).execute()

    parts = []
    extract_all_body_parts(msg["payload"], parts)

    if not parts:
        print("Aucun body trouv√©.")
        return

    print("\n================ BODY BRUT =================\n")

    for i, (mime, data) in enumerate(parts, start=1):
        decoded = urlsafe_b64decode(data).decode("utf-8", errors="ignore")

        print(f"\n---------- PART {i} ({mime}) ----------\n")
        print(decoded)

    print("\n========================================\n")


# ============================================================
# EXEMPLE
# ============================================================

if __name__ == "__main__":
    get_full_body_from_subject("Derni√®re nouvelle de LCL")


Recherche du mail : Derni√®re nouvelle de LCL
Aucun mail trouv√©.


In [18]:
contenu = get_mail_body_from_subject("Votre candidature¬†: Data Engineer H/F chez LCL ")


Recherche du mail avec le sujet : Votre candidature¬†: Data Engineer H/F chez LCL 
Mail trouv√©, id : 1980f07e2f634bee



---------- PART 1 ----------

Derni√®re nouvelle de LCL

----------------------------------------

Cet e-mail est destin√© √† Julien Ohana (üëî Apprenti Ing√©nieur Data - Thales
üìö √âtudiant - ECE Paris, √âcole d'Ing√©nieur)
D√©couvrez pourquoi nous pr√©cisons ceci¬†: https://www.linkedin.com/help/linkedin/answer/4788?lang=fr&lipi=urn%3Ali%3Apage%3Aemail_email_jobs_application_rejected_01%3B58COZutNT6yM7IiudSVksA%3D%3D&midToken=AQHMKonoCAnG1w&midSig=0zTN8yLmlYJrQ1&trk=eml-email_jobs_application_rejected_01-SecurityHelp-0-textfooterglimmer&trkEmail=eml-email_jobs_application_rejected_01-SecurityHelp-0-textfooterglimmer-null-fqd7q9~md4s39jp~ow-null-null&eid=fqd7q9-md4s39jp-ow&otpToken=MWIwMDFmZTIxNTI3Y2JjNGI1MjQwNGVkNDMxY2UyYmQ4N2M2ZDI0NzkwYWI4ODYxNzljNTA5NmI0ODVkNTRmNmY3ZDJkZmI2NmFlZmQwODQ1OGJkYWQxZGJjYWI1YjI2MGI4MjdhNzdjMTQxNTYxZmRkOWZiZCwxLDE%3D
Vous recevez des

I need to extract : 
"Votre candidature : Data Engineer H/F chez LCL" in <h2 class='text-xl font-semibold font-sans"> </h2> <br>
    "Data engineer H/F" in <a href (with the ref)> </a> <br>
    "LCL Villejuif, IDF, France" in <p class ="text-system-gray-70 text-sm leading-[20px]" <br>
    "Candidature envoy√©e le 12 juillet" in <p class="text-system-gray-70 text-sm leading-[20px]" <br>
    "Nous vous remercions de l int√©r√™t que vous manifestee pour le poste de DE [...] Cordialement LCL" in <p class="text-base font-sans"