
# UN Comtrade – Téléchargement CSV (1 fichier par pays, toutes années)

Ce notebook utilise l'**API officielle** de UN Comtrade (dataset HS) pour récupérer les données en **CSV** :  
- 1 CSV **par pays (reporter)**  
- **Toutes les années** disponibles (ps=all)  
- **Tous flux** (rg=all : import/export/re-export)  
- **Partenaire = Monde** (p=0) par défaut, configurable

> Avantage : plus robuste et plus rapide que le scraping Selenium de l'UI.


## 1) Dépendances

In [1]:

# Si 'requests' n'est pas installé, décommente la ligne suivante :
# import sys, subprocess; subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])

import os
import time
import sys
from typing import List, Tuple
import requests

print("Versions :")
print("  Python :", sys.version.split()[0])
print("  requests :", requests.__version__)


Versions :
  Python : 3.12.1
  requests : 2.32.3


## 2) Configuration

In [2]:

# === Paramètres principaux ===
HS_CODE = "85"            # Code HS (ex: "85", "840", "TOTAL")
OUT_DIR = "data_comtrade"  # Dossier de sortie (sera créé s'il n'existe pas)
SLEEP_SEC = 1.2            # Pause entre appels API (respect du rate limit)
MAX_COUNTRIES = None       # Pour tests : limiter le nombre de pays (ex: 10) ; None = tous
PARTNER = "0"              # "0" = Monde ; "all" = tous partenaires (attention fichiers très volumineux)

# === Vérification rapide ===
print(f"HS={HS_CODE} | PARTNER={PARTNER} | out={OUT_DIR} | sleep={SLEEP_SEC} | max_countries={MAX_COUNTRIES}")


HS=85 | PARTNER=0 | out=data_comtrade | sleep=1.2 | max_countries=None


## 3) Fonctions utilitaires

In [3]:

REPORTERS_URL = "https://comtrade.un.org/Data/cache/reporterAreas.json"
COMTRADE_GET_URL = (
    "https://comtrade.un.org/api/get"
    "?type=C&freq=A&px=HS&ps=all&rg=all&p={partner}&r={reporter}&cc={cc}&fmt=csv"
)

def get_reporters() -> List[Tuple[int, str]]:
    """Retourne la liste (id, nom) des reporters (pays) disponibles."""
    resp = requests.get(REPORTERS_URL, timeout=60)
    resp.raise_for_status()
    data = resp.json()
    items = data.get("results", [])
    reporters = []
    for it in items:
        try:
            _id = int(it["id"])
        except (KeyError, ValueError):
            continue
        name = it.get("text", "").strip()
        if _id > 0 and name and "all" not in name.lower():
            reporters.append((_id, name))
    reporters.sort(key=lambda x: x[1])
    return reporters

def sanitize_filename(s: str) -> str:
    bad = '<>:"/\\|?*'
    for ch in bad:
        s = s.replace(ch, "_")
    return "_".join(s.split())

def ensure_dir(path: str):
    os.makedirs(path, exist_ok=True)

def save_csv(content: str, path: str) -> None:
    ensure_dir(os.path.dirname(path))
    with open(path, "w", encoding="utf-8", newline="") as f:
        f.write(content)

def destination_path(out_dir: str, hs_code: str, partner: str, reporter_id: int, reporter_name: str) -> str:
    fname = f"comtrade_HS{hs_code}_p{partner}_{reporter_id}_{sanitize_filename(reporter_name)}.csv"
    return os.path.join(out_dir, fname)

def download_country_csv(reporter_id: int, reporter_name: str, hs_code: str, partner: str, out_dir: str) -> bool:
    """Télécharge le CSV pour un pays (True si fichier écrit)."""
    url = COMTRADE_GET_URL.format(reporter=reporter_id, cc=hs_code, partner=partner)
    r = requests.get(url, timeout=120)
    if r.status_code == 204 or not r.text.strip():
        return False

    text = r.text
    first_line = text.splitlines()[0] if text else ""
    if "Classification" not in first_line:
        if "No data" in text or "Error Message" in text:
            return False

    path = destination_path(out_dir, hs_code, partner, reporter_id, reporter_name)
    save_csv(text, path)
    return True


## 4) Lancer le téléchargement

In [5]:
reporters = get_reporters()
if MAX_COUNTRIES:
    reporters = reporters[:MAX_COUNTRIES]

print(f"{len(reporters)} pays à traiter.")
ok = ko = 0

for idx, (rid, rname) in enumerate(reporters, start=1):
    # Sauter si déjà téléchargé (reprise sur incident)
    out_path = destination_path(OUT_DIR, HS_CODE, PARTNER, rid, rname)
    if os.path.exists(out_path) and os.path.getsize(out_path) > 0:
        ok += 1
        print(f"[{idx}/{len(reporters)}] ✓ (existant) {rname}")
        continue

    try:
        wrote = download_country_csv(rid, rname, HS_CODE, PARTNER, OUT_DIR)
        if wrote:
            ok += 1
            print(f"[{idx}/{len(reporters)}] ✓ {rname}")
        else:
            ko += 1
            print(f"[{idx}/{len(reporters)}] – {rname} (pas de données)")
    except requests.HTTPError as e:
        ko += 1
        code = getattr(e.response, "status_code", "?")
        print(f"[{idx}/{len(reporters)}] ✗ {rname} (HTTP {code})")
    except Exception as e:
        ko += 1
        print(f"[{idx}/{len(reporters)}] ✗ {rname} (erreur: {e})")
    time.sleep(SLEEP_SEC)

print(f"Terminé. Fichiers OK: {ok} | Sans données/erreurs: {ko}\nDossier: {OUT_DIR}")


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

## 5) Aperçu des fichiers générés

In [None]:

from pathlib import Path

p = Path(OUT_DIR)
if p.exists():
    files = sorted(str(x.name) for x in p.glob("*.csv"))
    print(f"{len(files)} fichiers trouvés dans '{OUT_DIR}'.")
    for name in files[:20]:
        print(" -", name)
    if len(files) > 20:
        print(" ...")
else:
    print(f"Le dossier '{OUT_DIR}' n'existe pas encore.")
