In [78]:
import pandas as pd
import chardet
import os
import re
from datetime import datetime

pd.set_option("display.max_columns", None)

## Fonctions

In [79]:
# 📌 Définir les dossiers
DOSSIER_VENDOR = "vendor_central_mois"
DOSSIER_AMVISOR = "amvisor_mois"
DOSSIER_DATA = "processed"

In [None]:
# Fonction pour extraire la date (YYYY-MM) d'un fichier
def extraire_mois_annee(nom_fichier):
    # Format AMVisor (YYYY-MM)
    match_amvisor = re.search(r"(\d{4})-(\d{2})", nom_fichier)
    if match_amvisor:
        return match_amvisor.group(0)
    
    # Format Vendor Central (JJ-MM-YYYY_JJ-MM-YYYY)
    match_vendor = re.search(r"(\d{2})-(\d{2})-(\d{4})_\d{2}-\d{2}-\d{4}", nom_fichier)
    if match_vendor:
        return f"{match_vendor.group(3)}-{match_vendor.group(2)}"  

    return None

# Fonction pour récupérer les fichiers d'un mois donné et vérifier leur présence
def get_files(annee_mois):
    fichiers_vendor = os.listdir(DOSSIER_VENDOR)
    fichiers_amvisor = os.listdir(DOSSIER_AMVISOR)

    # Filtrer les fichiers correspondant au mois donné
    fichiers_trouves = [
        os.path.join(DOSSIER_AMVISOR, f) for f in fichiers_amvisor if extraire_mois_annee(f) == annee_mois
    ] + [
        os.path.join(DOSSIER_VENDOR, f) for f in fichiers_vendor if extraire_mois_annee(f) == annee_mois
    ]

    # Vérification : on doit avoir exactement 3 fichiers (Export, Fabrication, Approvisionnement)
    if len(fichiers_trouves) != 3:
        print(f"❌ Fichiers manquants pour {annee_mois} : trouvés {len(fichiers_trouves)}/3.") # raise FileNotFoundError

    return fichiers_trouves
    
def process_amvisor(path):

    # Load Amvisor
    df = pd.read_csv(path, sep=None, engine='python', encoding="ISO-8859-1", dtype={"EAN": str})
    df = df.dropna(subset=["ASIN"])

    # Get columns
    columns_main = ["ASIN", 'Item no.', "EAN", "Item"]
    columns_overall = ["Extras", "Size", "Description Item", "Cat. 1", "Cat. 2", "Cat. 3", "Cat. 4", "Visible", "Title Content", "Variations", "Brand store URL", "Images", "Videos", "AI summary", "Code", "Catalogue"]
    columns_ads = ["Ads Impressions CM", "Ads Clicks CM", "Ads CTR CM", "Ads Units 14d CM", "Ads Costs CM", "Ads RoAS CM", "Ads CVR CM"]
    columns_sales = ["Sell-out CM", "Sell-out PM", 'Total sell-out CM', 'Total sell-out PM', "Revenue CM", "Revenue PM", "Units CM", "Units PM", "SRP", "Margin", "Replacements CM", "Replacements PM"]
    columns_stocks = ['Stock', 'Stock value', 'Total stock', 'Total stock value']
    columns_state = ["Reviews", "Stars",'Coverage', 'Rank 1', 'Buy Box', 'Buy Box PM', 'Days not Buy Box', "Views CM", "Views PM", "CVR CM", "CVR PM"]
    all_columns = columns_main + columns_overall + columns_ads + columns_sales + columns_stocks + columns_state
    all_columns = [col for col in all_columns if col in df.columns]
    
    # Transform columns
    colonnes_a_convertir = columns_ads + columns_sales + columns_stocks + columns_state
    colonnes_a_convertir = [col for col in colonnes_a_convertir if col != "Buy Box"]
    df[colonnes_a_convertir] = df[colonnes_a_convertir].replace({"€": "", "%": "", "\u202f": "", ",": "."}, regex=True).astype(float)

    # Select data
    df = df[all_columns]

    return df

def process_vendor_central(path1, path2):

    # Load data
    df_fab = pd.read_csv(path1, skiprows=1)
    df_app = pd.read_csv(path2, skiprows=1)

    # Select columns
    columns = ["ASIN", "Nom du produit", "Marque", "COGS expédié", "COGS expédié – Période antérieure (%)", "COGS expédié – Même période l'année dernière (%)", "Unités expédiées", "Unités expédiées – Période antérieure (%)", "Unités expédiées – Même période l'année dernière (%)", "Retours client", "Retours du client – Période antérieure (%)", "Retours du client – Même période l'année dernière (%)"]
    df_fab = df_fab[columns]
    df_app = df_app[columns]

    # Merge data
    df = pd.concat([df_fab, df_app])

    # Convert data
    colonnes_a_convertir = [
        "COGS expédié", "COGS expédié – Période antérieure (%)", "COGS expédié – Même période l'année dernière (%)",
        "Unités expédiées", "Unités expédiées – Période antérieure (%)", "Unités expédiées – Même période l'année dernière (%)",
        "Retours client", "Retours du client – Période antérieure (%)", "Retours du client – Même période l'année dernière (%)"
    ]
    df[colonnes_a_convertir] = df[colonnes_a_convertir].replace({"€": "", "%": "", "\u202f": "", ",": "."}, regex=True).astype(float)

    # Remove duplicates based on COGS
    df = df.sort_values(by=["ASIN", "COGS expédié"], ascending=[True, False])
    df = df.drop_duplicates(subset="ASIN", keep="first")

    # Rename columns
    columns = ["ASIN", "Nom du produit", "Marque", "COGS", "COGS evol LM", "COGS evol SPLY", "Unités", "Unités evol LM", "Unités evol SPLY", "Retours", "Retours evol LM", "Retours evol SPLY"]
    df.columns = columns

    # Compute before evols
    df["COGS LM"] = (df["COGS"] / (1 + df["COGS evol LM"] / 100)).round(2)
    df["Unités LM"] = (df["Unités"] / (1 + df["Unités evol LM"] / 100)).round().fillna(0).astype(int)
    df["Retours LM"] = df["Retours"] / (1 + df["Retours evol LM"] / 100)

    return df

def process_amazon(df_amvisor, df_vendor_central):

    # Merge data
    df = pd.merge(df_vendor_central, df_amvisor, how="outer", left_on="ASIN", right_on="ASIN")
    print(f"amvisor : {len(df_amvisor)} | vendor_central : {len(df_vendor_central)} | merged : {len(df)}")
    
    return df


def processing(dates, DOSSIER_DATA = "processed"):
    all_data = []

    for date in dates:
        print(f"-- Processing {date} ...")
        files = get_files(date)
        amvisor = process_amvisor(files[0])
        vendor_central = process_vendor_central(files[1],files[2])
        data = process_amazon(amvisor, vendor_central)
        data.to_csv(f"{DOSSIER_DATA}/data_{date}.csv")

        data["extract_date"] = date
        all_data.append(data)

    final_df = pd.concat(all_data, ignore_index=True)
    final_df.to_csv(f"{DOSSIER_DATA}/all_data.csv")
    return final_df

## EDA

In [81]:
def process_amazon(df_amvisor, df_vendor_central):

    # Merge data
    df = pd.merge(df_vendor_central, df_amvisor, how="outer", left_on="ASIN", right_on="ASIN")
    print(f"amvisor : {len(df_amvisor)} | vendor_central : {len(df_vendor_central)} | merged : {len(df)}")
    
    return df

In [82]:
files = get_files("2025-02")

In [83]:
amvisor = process_amvisor(files[0])
amvisor.head()

Unnamed: 0,ASIN,Item no.,EAN,Item,Extras,Size,Description Item,Cat. 1,Cat. 2,Cat. 3,Cat. 4,Visible,Title Content,Variations,Brand store URL,Images,Videos,AI summary,Code,Catalogue,Ads Impressions CM,Ads Clicks CM,Ads CTR CM,Ads Units 14d CM,Ads Costs CM,Ads RoAS CM,Ads CVR CM,Sell-out CM,Sell-out PM,Total sell-out CM,Total sell-out PM,Revenue CM,Revenue PM,Units CM,Units PM,SRP,Margin,Replacements CM,Replacements PM,Stock,Stock value,Total stock,Total stock value,Reviews,Stars,Coverage,Rank 1,Buy Box,Buy Box PM,Days not Buy Box,Views CM,Views PM,CVR CM,CVR PM
0,B01N6NQ73J,132469,4047443338518,Thomson Casque TV HED4407,Noir,8m,REGULAR,Audio,Headphone/Headsets | Thomson,Audio,Headphones/Headsets,Yes,Thomson Casque TV avec câble extra long (Casqu...,B00P7R9LMO B01MXGY1S1 B01N6NQ73J B077CQ48VS B0...,https://www.amazon.fr/stores/THOMSON/page/D23F...,7.0,2.0,Les clients apprécient la longueur du câble et...,PR,STO,7488.0,42.0,0.5609,8.0,5.37,35.27,19.05,599.4,1620.6,924.85,2731.49,988.33,2207.2,54.0,146.0,19.99,37.6,0.0,0.0,0.0,0.0,0.0,0.0,1573.0,6373.0,0.0,6268.0,Amazon,100.0,,1170.0,2489.0,7.18,9.96
1,B00006IVA8,44728,4007249447289,Hama Cassette vidéo de nettoyage VHS/S,,,REGULAR,Consumer Electronics,Storage,Universal,Cleaning,Yes,Hama Cassette vidéo de nettoyage (VHS / S-VHS ...,,,6.0,,Les clients apprécient l'efficacité de nettoya...,NP,STO,56760.0,521.0,0.9179,139.0,259.95,5.19,26.68,1738.1,673.4,1738.1,673.4,1512.72,586.08,191.0,74.0,14.99,-14.9,0.0,0.0,311.0,2830.1,311.0,2830.1,263.0,974.0,4.9,8.0,Amazon,100.0,,817.0,661.0,23.38,11.2
2,B00006JAXW,49593,4007249495938,Hama Plateau tournant universel,Noir,XL,REGULAR,Consumer Electronics,Wall Brackets,Television,Holders & Mounting,Yes,"Hama Plateau universel Hama rond (40 cm, taill...",B00006IVA7 B00006JAXW B00006JDEX B0DDVZ6BV8,,5.0,,Les clients apprécient la qualité du plateau t...,PR,STO,20503.0,184.0,0.8974,26.0,40.16,21.64,14.13,439.19,1163.26,593.5,2694.49,944.68,2492.73,37.0,98.0,29.99,62.0,0.0,0.0,228.0,2706.36,242.0,2872.54,390.0,1271.0,11.3,40.0,Amazon,100.0,,1144.0,1310.0,4.37,17.33
3,B0BNLNC6KX,54252,4007249542526,"Hama Tuner Hi-Fi ""DIT2105SBTX""",Argenté,"Couplage USB type A / Jack 3,52mm femelle / RC...",REGULAR,Audio,Digital Radio,Audio,Digital Radios,Yes,Hama iTuner Hi-FI DIT2105SBTX Dab+ Bluetooth (...,B0BNLLF6YV B0BNLNC6KX,,7.0,,,PR,STO,9601.0,55.0,0.5729,1.0,18.04,24.57,1.82,408.0,1020.0,408.0,1020.0,483.92,1037.15,2.0,5.0,279.0,-0.2,0.0,0.0,3.0,612.0,4.0,816.0,21.0,46.0,9.7,116813.0,Amazon,95.2,,239.0,584.0,0.84,0.86
4,B00D2S92L0,111514,4047443186188,Xavax Boîte de transport à gâteaux,Anthracite / Transparent,,REGULAR,Home & Office,Housewares,Cooking & Dining,Tableware,Yes,Xavax Hama Boîte de transport pour un gâteau (...,B00D2S92L0 B071YB8MFM B08Z6ZG9V7 B08Z85LS9K,,9.0,3.0,Les clients apprécient la solidité et la prati...,PR,STO,,,,,,,,544.92,172.08,1770.99,2660.07,1023.52,359.8,76.0,24.0,15.99,46.2,0.0,0.0,102.0,731.34,139.0,996.63,974.0,6500.0,1.4,1685.0,Amazon,100.0,,2504.0,3104.0,9.86,11.95


In [84]:
vendor_central = process_vendor_central(files[1],files[2])
vendor_central.head()

Unnamed: 0,ASIN,Nom du produit,Marque,COGS,COGS evol LM,COGS evol SPLY,Unités,Unités evol LM,Unités evol SPLY,Retours,Retours evol LM,Retours evol SPLY,COGS LM,Unités LM,Retours LM
776,B00005K49U,Hama Filtre UV 58mm (Ultraviolet Filtre de Pro...,Hama,,,-100.0,,,-100.0,,,,,0,
472,B00005K49X,"Hama Filtre UV (anti-UV et protection, compens...",Hama,,,-100.0,,,-100.0,,,-100.0,,0,
484,B00005K4A4,"Hama Filtre polarisant (circulaire, Traité, 58...",Hama,24.72,0.0,,1.0,0.0,,,,,24.72,1,
825,B00005K4A5,"Hama Filtre polarisant (Circulaire, Traité, 72...",Hama,,,-100.0,,,-100.0,,,,,0,
949,B00005K4AH,Hama Pare-soleil caoutchouté pour objectifs st...,Hama,,,-100.0,,,-100.0,,,,,0,


In [85]:
amvisor[amvisor["ASIN"].isna()]
vendor_central[vendor_central["ASIN"].isna()]

Unnamed: 0,ASIN,Nom du produit,Marque,COGS,COGS evol LM,COGS evol SPLY,Unités,Unités evol LM,Unités evol SPLY,Retours,Retours evol LM,Retours evol SPLY,COGS LM,Unités LM,Retours LM


In [86]:
data = process_amazon(amvisor, vendor_central)
data.head()

amvisor : 2583 | vendor_central : 1252 | merged : 2834


Unnamed: 0,ASIN,Nom du produit,Marque,COGS,COGS evol LM,COGS evol SPLY,Unités,Unités evol LM,Unités evol SPLY,Retours,Retours evol LM,Retours evol SPLY,COGS LM,Unités LM,Retours LM,Item no.,EAN,Item,Extras,Size,Description Item,Cat. 1,Cat. 2,Cat. 3,Cat. 4,Visible,Title Content,Variations,Brand store URL,Images,Videos,AI summary,Code,Catalogue,Ads Impressions CM,Ads Clicks CM,Ads CTR CM,Ads Units 14d CM,Ads Costs CM,Ads RoAS CM,Ads CVR CM,Sell-out CM,Sell-out PM,Total sell-out CM,Total sell-out PM,Revenue CM,Revenue PM,Units CM,Units PM,SRP,Margin,Replacements CM,Replacements PM,Stock,Stock value,Total stock,Total stock value,Reviews,Stars,Coverage,Rank 1,Buy Box,Buy Box PM,Days not Buy Box,Views CM,Views PM,CVR CM,CVR PM
0,B00005K49U,Hama Filtre UV 58mm (Ultraviolet Filtre de Pro...,Hama,,,-100.0,,,-100.0,,,,,0.0,,70158.0,4007249701589.0,"Hama Filtre anti-UV/de protection, revêtement ...",,,REGULAR,Photo & Accessories,Optics & Cleaning,Camera,Filters,Yes,"Hama Filtre UV (anti-UV et protection, compens...",B00005K49R B00005K49S B00005K49T B00005K49U B0...,,6.0,,Les clients sont satisfaits de la protection q...,PR,STO,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.99,42.5,0.0,0.0,0.0,0.0,0.0,0.0,3222.0,8653.0,,328.0,Amazon,100.0,,,,,
1,B00005K49X,"Hama Filtre UV (anti-UV et protection, compens...",Hama,,,-100.0,,,-100.0,,,-100.0,,0.0,,70172.0,4007249701725.0,Hama Filtre UV traité,Noir,72mm,NOT ACTIVE,Photo & Accessories,Optics & Cleaning,Camera,Filters,Yes,"Hama Filtre UV (anti-UV et protection, compens...",B00005K49R B00005K49S B00005K49T B00005K49U B0...,,7.0,,Les clients sont satisfaits de la protection q...,EOL,PUA,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.99,51.1,0.0,0.0,0.0,0.0,0.0,0.0,3222.0,8653.0,,508.0,Stock Bureau Maison Bien-être,,,,,,
2,B00005K4A4,"Hama Filtre polarisant (circulaire, Traité, 58...",Hama,24.72,0.0,,1.0,0.0,,,,,24.72,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,B00005K4A5,"Hama Filtre polarisant (Circulaire, Traité, 72...",Hama,,,-100.0,,,-100.0,,,,,0.0,,72572.0,4007249725721.0,Hama Filtre polarisant circulaire,,72mm,REGULAR,Photo & Accessories,Optics & Cleaning,Camera,Filters,Yes,"Hama Filtre polarisant (circulaire, Traité, 72...",B00005K4A4 B00005K4A5 B00005KHT8 B00005KHT9 B0...,,3.0,,Les clients apprécient la qualité du filtre. I...,PR,STO,18994.0,67.0,0.3527,0.0,42.39,1.08,,0.0,0.0,0.0,57.16,0.0,0.0,0.0,0.0,37.49,64.3,0.0,0.0,0.0,0.0,0.0,0.0,742.0,1419.0,,60.0,Amazon,100.0,,93.0,128.0,0.0,1.56
4,B00005K4AH,Hama Pare-soleil caoutchouté pour objectifs st...,Hama,,,-100.0,,,-100.0,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [87]:
data[(data['Total sell-out CM'] != data['COGS']) & ((data['Total sell-out CM']>0) | (data['COGS'] >0))][['ASIN','Item',  'COGS', 'Sell-out CM', "Total sell-out CM"]]

Unnamed: 0,ASIN,Item,COGS,Sell-out CM,Total sell-out CM
2,B00005K4A4,,24.72,,
5,B00005KHSU,,41.00,,
8,B00005KHTA,,35.20,,
9,B00005KHTB,,18.44,,
10,B00005KHTC,,20.22,,
...,...,...,...,...,...
1814,B0CXHQQ9J3,EzeeTabs Détartrant Machine à Café - 2 Pastilles,,0.0,10.98
1825,B0CXJHGBJH,EzeeTabs NETTOYANT SALLE DE BAIN - 4 PASTILL,,0.0,7.33
1828,B0CXJ11MM5,EzeeTabs NETTOYANT UNIVERSEL - 2 PASTILLES,,0.0,10.98
1829,B0CXHQ8J4Z,EzeeTabs NETTOYANT UNIVERSEL - 4 PASTILLES,,0.0,14.66


In [88]:
data[data['Total sell-out CM']>0][['ASIN', 'COGS', 'Sell-out CM', "Total sell-out CM"]]

Unnamed: 0,ASIN,COGS,Sell-out CM,Total sell-out CM
6,B00005KHSV,35.50,35.50,35.50
11,B00005KHTD,23.50,0.00,23.50
13,B00005QF9N,199.04,18.66,180.38
16,B00005QFAY,288.63,15.71,257.21
17,B00005QFAZ,115.08,0.00,115.08
...,...,...,...,...
1814,B0CXHQQ9J3,,0.00,10.98
1825,B0CXJHGBJH,,0.00,7.33
1828,B0CXJ11MM5,,0.00,10.98
1829,B0CXHQ8J4Z,,0.00,14.66


In [89]:
hors_catalog = vendor_central[~vendor_central["ASIN"].isin(amvisor["ASIN"])]
hors_catalog

Unnamed: 0,ASIN,Nom du produit,Marque,COGS,COGS evol LM,COGS evol SPLY,Unités,Unités evol LM,Unités evol SPLY,Retours,Retours evol LM,Retours evol SPLY,COGS LM,Unités LM,Retours LM
484,B00005K4A4,"Hama Filtre polarisant (circulaire, Traité, 58...",Hama,24.72,0.00,,1.0,0.00,,,,,24.72,1,
949,B00005K4AH,Hama Pare-soleil caoutchouté pour objectifs st...,Hama,,,-100.00,,,-100.0,,,,,0,
391,B00005KHSU,"Hama Filtre UV (anti-UV et protection, compens...",Hama,41.00,,,5.0,,,,,,,0,
851,B00005KHT9,"Hama Filtre polarisant (circulaire, Traité, 49...",Hama,,,-100.00,,,-100.0,,,,,0,
390,B00005KHTA,"Hama Filtre polarisant (circulaire, Traité, 52...",Hama,35.20,100.00,45.09,2.0,100.00,100.0,,,,17.60,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
729,B0CYNL66LP,Hama Coque pour Samsung Galaxy A35 5G Fantasti...,Hama,,-100.00,,,-100.00,,,,,,0,
786,B0CYNMJ86M,Hama 45W Chargeur USB C Rapide Compatible iPho...,Hama,,-100.00,,,-100.00,,,,,,0,
306,B0D5HHBJ6Y,Hama Cuir Synthétique Tapis de Souris - Tapis ...,Hama,62.29,-41.36,,7.0,-41.67,,,-100.0,,106.22,12,
1032,B0D6CPS3G3,Hama Coque pour Xiaomi Redmi Note 13 5G Daily ...,Hama,,-100.00,,,-100.00,,,,,,0,
