le cleaning :
_ Garder seulement le Maroc.

_ Supprimer les colonnes inutiles (Country Code, Indicator Code, Unnamed: 69).

_ Garder les années 1990–2023.

_ Supprimer les valeurs manquantes (NaN).

_ Convertir les années en colonnes “année” et “valeur” (format long).

In [1]:
import pandas as pd
from pathlib import Path

RAW = Path("../data/raw")
BRONZE = Path("../data/bronze"); BRONZE.mkdir(parents=True, exist_ok=True)

files = {
    "fossiles": next(RAW.glob("API_EG.ELC.FOSL.ZS_*.csv")),
    "petrole":  next(RAW.glob("API_EG.ELC.PETR.ZS_*.csv")),
    "renouvhx": next(RAW.glob("API_EG.ELC.RNWX.KH_*.csv")),
}

YEARS = list(map(str, range(1990, 2024)))

def clean_one(path: Path, out_name: str):
    df = pd.read_csv(path, skiprows=4)
    df = df[df["Country Name"] == "Maroc"]
    keep = ["Indicator Name"] + YEARS
    df = df[keep].melt(id_vars=["Indicator Name"], var_name="annee", value_name="valeur")
    df = df.dropna(subset=["valeur"]).assign(annee=lambda d: d["annee"].astype(int))
    df = df.rename(columns={"Indicator Name": "indicateur"})
    df.to_csv(BRONZE / f"{out_name}.csv", index=False)
    return df

d_foss = clean_one(files["fossiles"], "maroc_fossiles_1990_2023")
d_petr = clean_one(files["petrole"],  "maroc_petrole_1990_2023")
d_renx = clean_one(files["renouvhx"], "maroc_renouvelables_hors_hydro_1990_2023")

# Fusion en un seul fichier large par an
df_all = (
    d_foss.merge(d_petr, on="annee", how="outer", suffixes=("_fossiles","_petrole"))
          .merge(d_renx, on="annee", how="outer")
          .rename(columns={"indicateur": "indicateur_renouvhx", "valeur":"valeur_renouvhx"})
)
df_all.to_csv(BRONZE / "maroc_energie_mix_1990_2023.csv", index=False)
print("OK -> data/bronze/*.csv")


OK -> data/bronze/*.csv


In [5]:
import pandas as pd

# Charger le fichier de meteo
df = pd.read_csv("../data/raw/weather_maroc_1901_2024.csv")

# Garder les colonnes utiles et les renommer
df = df.rename(columns={
    "Category": "annee",
    "Average Mean Surface Air Temperature (Annual Mean °C)": "temperature"
})[["annee", "temperature"]]

# Garder la période 1990–2023
df = df[(df["annee"] >= 1990) & (df["annee"] <= 2023)]

# Supprimer les valeurs manquantes
df = df.dropna()

# Sauvegarder
df.to_csv("../data/bronze/weather_maroc_1990_2023.csv", index=False)

print(df.head())


    annee  temperature
89   1990        18.04
90   1991        17.30
91   1992        17.40
92   1993        17.06
93   1994        17.93
