## 1️⃣ Import des datasets

In [1]:
import pandas as pd
from pathlib import Path

In [2]:
# 📂 Chargement des fichiers nettoyés
df_fin = pd.read_csv("../data/df_mm_financial_clean.csv")
df_sto = pd.read_csv("../data/df_mm_stores_clean.csv")

In [None]:
print("✅ Fichiers chargés :")
print(f"Finance : {df_fin.shape}")
print(f"Stores  : {df_sto.shape}")


✅ Fichiers chargés :
Finance : (33856, 15)
Stores  : (33855, 9)


In [4]:
# 🔍 Vérifions la clé commune
print("\nClés communes potentielles :")
print(set(df_fin.columns) & set(df_sto.columns))


Clés communes potentielles :
{'reportid'}


In [5]:
# 💡 Clé de jointure -> 'reportid'
key = "reportid"
if key not in df_fin.columns or key not in df_sto.columns:
    raise KeyError(f"La clé '{key}' n'existe pas dans les deux fichiers !")

In [6]:
# 🔑 Vérification de doublons sur la clé
print("\nDoublons sur la clé :")
print(f"Finance : {df_fin[key].duplicated().sum()}")
print(f"Stores  : {df_sto[key].duplicated().sum()}")


Doublons sur la clé :
Finance : 1
Stores  : 0


In [8]:
# 🔗 Fusion (INNER JOIN)
df_join = pd.merge(
    df_fin,
    df_sto,
    on=key,
    how="inner",
    suffixes=("_fin", "_sto")
)

print("\n✅ Fusion réussie !")
print(f"Dimensions finales : {df_join.shape}")

display(df_join.head(5))


✅ Fusion réussie !
Dimensions finales : (33856, 23)


Unnamed: 0,unit_price,unit_cost,sold_quantity,sales,cogs,marketing,total_expenses,pre-sales_inventory,profit,margin,...,expected_margin,reportid,quarter,storeid,state,position,market,prodid,product,type
0,4.5,2.69,666.0,2997.0,1791.54,226.44,2017.98,948.0,979.02,,...,48.38,JB578CJ,,Los Angeles - Sunset Boulevard,California,West,Large Market,1_Co,Amaretto,Coffee
1,4.5,2.69,1161.0,5224.5,3123.09,394.74,3517.83,2012.0,1706.67,24.0,...,46.03,UZ650HV,2023-04-01,Los Angeles - Sunset Boulevard,California,West,Large Market,,Amaretto,Coffee
2,4.5,2.69,409.0,1840.5,1100.21,139.06,1239.27,581.0,601.23,25.56,...,42.86,VB278N7,2023-07-01,Los Angeles - Sunset Boulevard,California,West,Large Market,1_Co,Amaretto,Coffee
3,4.5,2.69,1071.0,4819.5,2880.99,364.14,,1629.0,1574.37,25.06,...,48.6,TL8282Y,2023-10-01,Los Angeles - Sunset Boulevard,California,West,Large Market,1_Co,Amaretto,Coffee
4,4.5,2.69,735.0,3307.5,1977.15,249.9,2227.05,1217.0,1080.45,24.39,...,48.38,FB512II,2023-01-01,Los Angeles - Hollywood Boulevard,California,West,Large Market,1_Co,Amaretto,Coffee
