In [0]:
import os
import zipfile

# Créer le dossier tmp dans /dbfs
os.makedirs('/dbfs/tmp', exist_ok=True)

links = [
    ("2016","https://www.data.gouv.fr/api/1/datasets/r/0c83108b-f87b-470d-8980-6207ac93f4eb"),
    ("2017","https://www.data.gouv.fr/api/1/datasets/r/5785427b-3167-49fa-a581-aef835f0fb04"),
    ("2018","https://www.data.gouv.fr/api/1/datasets/r/e7514726-19ec-47dc-bcc3-a59c9bfa5f7f"), 
    ("2019", "https://www.data.gouv.fr/api/1/datasets/r/a6f74cfd-b4f7-44fb-8772-7884775b35e1"),
    ("2020", "https://www.data.gouv.fr/api/1/datasets/r/1913d0d6-d650-409d-a19e-b7c7f09e09a0"),
    ("2021", "https://www.data.gouv.fr/api/1/datasets/r/3c5ebbd9-f6b5-4837-a194-12bfeda7f38e"),
    ("2022", "https://www.data.gouv.fr/api/1/datasets/r/77d3151a-739e-4aab-8c34-7a15d7fea55d"),
    ("2023", "https://www.data.gouv.fr/api/1/datasets/r/96452cf0-329a-4908-8adb-8f061adcca4c"),
    ("2024", "https://www.data.gouv.fr/api/1/datasets/r/c0350599-a041-4724-9942-ad4c2ba9a7b3"),
    ("2025", "https://www.data.gouv.fr/api/1/datasets/r/6994a9f1-3f4b-4e15-a4dc-0e358a6aac13")]

# Parcours pour chaque année
for year, link in links:
    url = link
    destination_path = f"/dbfs/tmp/dis-{year}-dept.zip"
    print(f"Téléchargement de {url} dans {destination_path}")
    
    # Exécuter le téléchargement
    os.system(f"wget {url} -O {destination_path}")
    extract_path = f"/dbfs/tmp/extracted_files/{year}"
    os.makedirs(extract_path, exist_ok=True)
    
    print(f"Extraction de {destination_path} dans {extract_path}")
    with zipfile.ZipFile(destination_path, 'r') as zip_ref:
        # Extraction uniquement des fichiers .txt
        for file_info in zip_ref.infolist():
            if file_info.filename.endswith('.txt'):
                zip_ref.extract(file_info, extract_path)
                print(f"Fichier extrait : {file_info.filename}")

In [0]:
import os
from pathlib import Path
from azure.storage.blob import BlobServiceClient

# Configurations
scope_name = "my-scope"
key_name = "adls-key"
account_url = "https://datalakequaliteeau.blob.core.windows.net/"
container_name = "source"
local_base_path = "/dbfs/tmp/extracted_files"

# Récupérer la clé d'accès
credential = dbutils.secrets.get(scope=scope_name, key=key_name)

# Créer le client de stockage
blob_service_client = BlobServiceClient(account_url=account_url, credential=credential)

# Parcourir toutes les années et tous les fichiers
for year in range(2016, 2026):
    year_path = os.path.join(local_base_path, str(year))
    if os.path.exists(year_path):
        for root, dirs, files in os.walk(year_path):
            for filename in files:
                file_path = os.path.join(root, filename)

                # Déterminer le dossier de destination selon le nom du fichier
                if "PLV" in filename.upper():
                    folder_dest = "PLV"
                elif "COM" in filename.upper():
                    folder_dest = "COM"
                elif "RESULT" in filename.upper():
                    folder_dest = "RESULT"
                else:
                    folder_dest = "AUTRES"  # Optionnel, si vous voulez gérer autrement

                # Créer le chemin blob
                blob_name = f"{folder_dest}/{filename}"

                # Créer le blob client
                container_client = blob_service_client.get_container_client(container_name)
                blob_client = container_client.get_blob_client(blob=blob_name)

                # Upload
                with open(file_path, "rb") as data:
                    blob_client.upload_blob(data, overwrite=True)
                print(f"Fichier {filename} uploadé dans {blob_name}")