In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("data/logistics-data.csv")
df.columns = df.columns.str.strip()

def to_number(x):
    if pd.isna(x):
        return np.nan
    x = str(x).replace("\u202f", "").replace(" ", "")
    x = x.replace(",", ".")
    try:
        return float(x)
    except:
        return np.nan

for col in ["PLF", "CFX", "TOTAL"]:
    df[col] = df[col].apply(to_number)

df = df.sort_values(["INDICATEUR", "SOUS-INDICATEUR", "ANNEE"])

def interpolate_group(g):
    indicateur, sous_indicateur = g.name

    # --- Proportion PLF dans (PLF+CFX) quand on l'a (pour répartir TOTAL si besoin)
    ratio = np.nan
    mask_known = g["PLF"].notna() & g["CFX"].notna() & ((g["PLF"] + g["CFX"]) > 0)
    if mask_known.any():
        ratio = (g.loc[mask_known, "PLF"] / (g.loc[mask_known, "PLF"] + g.loc[mask_known, "CFX"])).median()
    if np.isnan(ratio):
        ratio = 0.5  # fallback

    g = g.set_index("ANNEE")
    years = range(g.index.min(), g.index.max() + 2)
    g = g.reindex(years)

    g["INDICATEUR"] = indicateur
    g["SOUS-INDICATEUR"] = sous_indicateur

    # 1) Si TOTAL existe et CFX manquant -> CFX = TOTAL - PLF
    m = g["TOTAL"].notna() & g["PLF"].notna() & g["CFX"].isna()
    g.loc[m, "CFX"] = g.loc[m, "TOTAL"] - g.loc[m, "PLF"]

    # 2) Si TOTAL existe et PLF manquant -> PLF = TOTAL - CFX
    m = g["TOTAL"].notna() & g["CFX"].notna() & g["PLF"].isna()
    g.loc[m, "PLF"] = g.loc[m, "TOTAL"] - g.loc[m, "CFX"]

    # 3) Si TOTAL existe et PLF+CFX manquent -> répartir via ratio
    m = g["TOTAL"].notna() & g["PLF"].isna() & g["CFX"].isna()
    g.loc[m, "PLF"] = g.loc[m, "TOTAL"] * ratio
    g.loc[m, "CFX"] = g.loc[m, "TOTAL"] * (1 - ratio)

    # 4) Interpolation linéaire
    g["PLF"] = g["PLF"].interpolate(method="linear", limit_direction="both")
    g["CFX"] = g["CFX"].interpolate(method="linear", limit_direction="both")

    # 5) Compléter TOTAL si manquant
    mt = g["TOTAL"].isna()
    g.loc[mt, "TOTAL"] = g.loc[mt, "PLF"] + g.loc[mt, "CFX"]

    return g.reset_index().rename(columns={"index": "ANNEE"})

df_interpolated = (
    df.groupby(["INDICATEUR", "SOUS-INDICATEUR"], group_keys=False)
      .apply(interpolate_group)
)

print("NaN restants :\n", df_interpolated[["PLF","CFX","TOTAL"]].isna().sum())
df_interpolated.to_csv("data/logistics-data-interpolated.csv", index=False)
print("Fichier sauvegardé ✅")

NaN restants :
 PLF      0
CFX      0
TOTAL    0
dtype: int64
Fichier sauvegardé ✅


In [2]:
df = pd.read_csv("data/logistics-data-interpolated.csv")

# --- 1) Dataset normal
df_normal = df.copy()
df_normal["MODE"] = "Normal"

# --- 2) Coefficients crise (à adapter)
def coef_crise(row):
    ind = str(row["INDICATEUR"]).lower()
    sous = str(row["SOUS-INDICATEUR"]).lower()

    if "restauration" in ind:
        return 1.15  # +15%
    if "lingerie" in ind:
        return 1.30  # +30%
    if "magasin" in ind and ("hygiène" in sous or "hygiene" in sous):
        return 1.60  # +60%
    if "déchets" in ind or "dechets" in ind:
        return 1.10  # +10%

    return 1.00  # pas de changement

# --- 3) Dataset crise
df_crise = df.copy()
df_crise["MODE"] = "Crise"
df_crise["COEF_CRISE"] = df_crise.apply(coef_crise, axis=1)

df_crise["PLF"] = df_crise["PLF"] * df_crise["COEF_CRISE"]
df_crise["CFX"] = df_crise["CFX"] * df_crise["COEF_CRISE"]
df_crise["TOTAL"] = df_crise["PLF"] + df_crise["CFX"]

# --- 4) Calcul évolution (Crise vs Normal)
merge_cols = ["ANNEE", "INDICATEUR", "SOUS-INDICATEUR"]

df_compare = df_normal.merge(
    df_crise[merge_cols + ["PLF", "CFX", "TOTAL"]],
    on=merge_cols,
    suffixes=("_NORMAL", "_CRISE")
)

df_compare["DELTA_TOTAL"] = df_compare["TOTAL_CRISE"] - df_compare["TOTAL_NORMAL"]
df_compare["DELTA_TOTAL_PCT"] = (df_compare["DELTA_TOTAL"] / df_compare["TOTAL_NORMAL"]) * 100

# (option) flèche ↑ ↓
df_compare["EVOLUTION"] = df_compare["DELTA_TOTAL"].apply(lambda x: "↑" if x > 0 else ("↓" if x < 0 else "→"))

print(df_compare.head(10))

# Sauvegarde
df_compare.to_csv("data/logistics-crise-comparaison.csv", index=False)
print("Fichier comparaison sauvegardé ✅")

   ANNEE   PLF_NORMAL  CFX_NORMAL  TOTAL_NORMAL INDICATEUR  \
0   2012   333.570000   27.500000    361.070000    Déchets   
1   2013   346.380000   29.333333    375.713333    Déchets   
2   2014   359.190000   31.166667    390.356667    Déchets   
3   2015   372.000000   33.000000    405.000000    Déchets   
4   2016   372.000000   33.000000    405.000000    Déchets   
5   2015   130.000000    1.990000    131.990000    Déchets   
6   2016   130.000000    1.990000    131.990000    Déchets   
7   2012  1086.190000   48.000000   1134.190000    Déchets   
8   2013  1080.793333   49.666667   1130.460000    Déchets   
9   2014  1075.396667   51.333333   1126.730000    Déchets   

                                     SOUS-INDICATEUR    MODE    PLF_CRISE  \
0                                            Cartons  Normal   366.927000   
1                                            Cartons  Normal   381.018000   
2                                            Cartons  Normal   395.109000   
3        