> # Downloading Resumes from Emails into `Azure Data Lake Gen 2`
***
In this section we will be extracting a bunch of resumes from emails and then store the cvs in the Azure Data Lake Gen2.

The Dependencies needed for this notebook :
- **`google-auth-oauthlib`**
- **`google-api-python-client`**


In [None]:
from google_auth_oauthlib.flow import InstalledAppFlow
from azure.storage.blob import BlobServiceClient
from googleapiclient.discovery import build
import base64
import os

In [None]:
Azure_Gen2_conn_string="votre connecting string"

In [None]:
def extract_mails_with_cv():
    SCOPES = "votre scope"

    flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES) "a changer par votre fichier credentials"
    creds = flow.run_local_server(port=8080)   
    
    service = build('gmail', 'v1', credentials=creds)

    query = 'subject:stage_2025'
    results = service.users().messages().list(userId='me', q=query).execute()
    messages = results.get('messages', [])
    
    if not messages:
        print("No emails found with 'stage_2025' in the subject.")
        return
    
    print(f"{len(messages)} email(s) found with 'stage_2025' in the subject.")

    container_name = "resumecontainner"
    folder_name = "raw-data" 

    blob_service_client = BlobServiceClient.from_connection_string(Azure_Gen2_conn_string)
    container_client = blob_service_client.get_container_client(container_name)
    
    for msg in messages:
        msg_id = msg['id']
        message = service.users().messages().get(userId='me', id=msg_id).execute()

        payload = message.get('payload', {})
        parts = payload.get('parts', [])
   
        for part in parts:

            if part.get('filename'):  
                attachment_id = part.get('body', {}).get('attachmentId')
                if attachment_id:
                    attachment = service.users().messages().attachments().get(
                        userId='me', messageId=msg_id, id=attachment_id).execute()

                    file_data = base64.urlsafe_b64decode(attachment['data'].encode('UTF-8'))

                    if part['filename'].endswith('.pdf'):

                        azure_blob_name = f"{folder_name}/{part['filename']}"
                        blob_client = container_client.get_blob_client(azure_blob_name)

                        blob_client.upload_blob(file_data, overwrite=True)
                        print(f"CV uploaded and saved in Azure Blob: {azure_blob_name}")
                    else:
                        print(f"File ignored (not a PDF): {part['filename']}")


In [None]:

if __name__ == "__main__":
    extract_mails_with_cv()