# Liaison avec le Google Drive

## Installation des dépendances

## Importation des dépendances

In [1]:
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload, MediaIoBaseDownload
from google.oauth2 import service_account
import io
import os

## Code

### Environnement

In [2]:
SHARED_DRIVE_ID = "1y7Xik1Wj7yzqLtRMfABDSzwxWewRlbiK"

### Initialisation de la connexion au drive

In [3]:
credentials = service_account.Credentials.from_service_account_file(
        "credentials.json", scopes=["https://www.googleapis.com/auth/drive"]
    )
drive = build("drive", "v3", credentials=credentials)

### Fonctions principales

In [4]:
def get_or_create_folder_in_shared_drive(folder_name: str, parent: str = SHARED_DRIVE_ID) -> dict:
    """ 
    Réccupère un dossier dans le drive partagé ou le crée s'il n'existe pas.

    :param folder_name: Nom du dossier à récupérer ou à créer
    :param parents: Liste des parents du dossier
    """

    query = f"mimeType='application/vnd.google-apps.folder' and name='{folder_name}' and '{parent}' in parents"

    results = drive.files().list(q=query, spaces="drive").execute().get("files", [])

    if results:
        return results[0]
    
    folder_metadata = {
        "name": folder_name,
        "parents": [parent],
        "mimeType": "application/vnd.google-apps.folder"
    }
    
    folder = drive.files().create(body=folder_metadata, fields="id").execute()
    
    return folder

In [5]:
def upload_file_or_folder(local_path: str, parent: str = SHARED_DRIVE_ID) -> None:
    """
    Upload un fichier ou un dossier dans le drive partagé.
    
    :param local_path: Chemin du fichier ou du dossier à uploader
    :param parent: ID du dossier parent
    """
    
    if os.path.isdir(local_path):
        folder_name = os.path.basename(local_path)
        folder = get_or_create_folder_in_shared_drive(folder_name, parent)

        for item in os.listdir(local_path):
            item_path = os.path.join(local_path, item)
            upload_file_or_folder(item_path, folder["id"])
    else:
        file_name = os.path.basename(local_path)

        file_exists = False
        file_id = None

        results = drive.files().list(q=f"'{parent}' in parents and name = '{file_name}'", fields="files(id, name)").execute()
        files = results.get("files", [])

        if files:
            file_exists = True
            file_id = files[0]["id"]

        if file_exists:
            drive.files().delete(fileId=file_id).execute()
  
        file_metadata = {
            "name": file_name,
            "parents": [parent]
        }

        media = MediaFileUpload(local_path, resumable=True)
        drive.files().create(body=file_metadata, media_body=media, fields="id").execute()

        print(f"Fichier uploadé : {local_path}")

In [6]:
def list_files_in_folder(folder_id: str) -> list:
    """
    Liste les fichiers présents dans un dossier.

    :param folder_id: ID du dossier
    """
    
    query = f"'{folder_id}' in parents"
    results = drive.files().list(q=query, fields="nextPageToken, files(id, name, mimeType)").execute()
    items = results.get("files", [])
    
    return items

In [7]:
def download_file(file_id: str, file_name: str, download_path: str) -> None:
    """
    Télécharge un fichier depuis le drive partagé.

    :param file_id: ID du fichier
    :param file_name: Nom du fichier
    :param download_path: Chemin où le fichier sera téléchargé
    """

    request = drive.files().get_media(fileId=file_id)
    fh = io.FileIO(os.path.join(download_path, file_name), "wb")
    downloader = MediaIoBaseDownload(fh, request)
    
    done = False

    while done is False:
        status, done = downloader.next_chunk()
        print(f"Download {file_name}: {int(status.progress() * 100)}%.")

    print(f"Downloaded {file_name} to {download_path}")

In [8]:
def download_folder(folder_id: str, download_path: str) -> None:
    """
    Télécharge le dossier dans le chemin spécifié.

    :param folder_id: ID du dossier
    :param download_path: Chemin du dossier de destination
    """
    
    if not os.path.exists(download_path):
        os.makedirs(download_path)
    
    items = list_files_in_folder(folder_id)
    
    for item in items:
        if item["mimeType"] == "application/vnd.google-apps.folder":
            subfolder_path = os.path.join(download_path, item["name"])
            download_folder(item["id"], subfolder_path)
        else:
            download_file(item["id"], item["name"], download_path)

In [9]:
def upload_dataset(dataset: str) -> None:
    """
    Upload un dataset dans le drive partagé.
    
    :param dataset: Nom du dataset à uploader
    """
    
    parent = get_or_create_folder_in_shared_drive("data")
    parent = get_or_create_folder_in_shared_drive(dataset, parent["id"])
    upload_file_or_folder(f"../data/{dataset}/data.yaml", parent["id"])
    upload_file_or_folder(f"../data/{dataset}/images_labels", parent["id"])
    parent = get_or_create_folder_in_shared_drive("images_converted", parent["id"])
    upload_file_or_folder(f"../data/{dataset}/images_converted/test", parent["id"])
    

### Lancement du code

Il ne faut pas être préssé avec les vérifications / suppréssions. Je les fais un par un pour ne pas destabilisé mon Google Colab durant son entrainement / test.

In [10]:
upload_dataset("dataset1")

Fichier uploadé : ../data/dataset1/data.yaml
Fichier uploadé : ../data/dataset1/images_labels/train/labels/1efa5e73-30_028.txt
Fichier uploadé : ../data/dataset1/images_labels/train/labels/98b3f60c-50_009.txt
Fichier uploadé : ../data/dataset1/images_labels/train/labels/99292374-30_038.txt
Fichier uploadé : ../data/dataset1/images_labels/train/labels/6e9ee7e8-30_021.txt
Fichier uploadé : ../data/dataset1/images_labels/train/labels/292d1eae-50_014.txt
Fichier uploadé : ../data/dataset1/images_labels/train/labels/3c316cb1-30_006.txt
Fichier uploadé : ../data/dataset1/images_labels/train/labels/e65cc218-50_006.txt
Fichier uploadé : ../data/dataset1/images_labels/train/labels/c740a55a-50_012.txt
Fichier uploadé : ../data/dataset1/images_labels/train/labels/77675b55-50_016.txt
Fichier uploadé : ../data/dataset1/images_labels/train/labels/095a68ec-50_026.txt
Fichier uploadé : ../data/dataset1/images_labels/train/labels/aff1de5c-30_015.txt
Fichier uploadé : ../data/dataset1/images_labels/trai