# üß† MOD√àLE PR√âDICTIF - SILENT SURGE
# **Objectif** : Pr√©dire les best-sellers avec interpr√©tabilit√©

In [11]:
try:
    # Core
    import pandas as pd
    import numpy as np
    from pathlib import Path
    
    # ML
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score, classification_report
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import make_pipeline
    
    # Visualisation (version s√©curis√©e)
    import matplotlib.pyplot as plt
    import seaborn as sns
    plt.style.use('ggplot')  # Style alternatif plus stable
    
    # Utilitaires
    import joblib
    
    print("‚úÖ Tous les imports r√©ussis")
except ImportError as e:
    print(f"‚ùå Erreur d'import : {e}")
    print("Solution : lancez dans le terminal :")
    print("pip install pandas scikit-learn matplotlib seaborn joblib")

‚úÖ Tous les imports r√©ussis


## 3. PR√âPARATION DES DONN√âES

In [13]:
# Features et Target
features = ["saison", "zone", "type_commerce"]
target = "produit"

# Encodage intelligent (√©vite le LabelEncoder probl√©matique)
preprocessor = ColumnTransformer(
    transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), features)],
    remainder='passthrough'
)

X = df[features]
y = df[target]

# Split stratifi√©
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [14]:

# 1. IMPORTS OBLIGATOIRES
import pandas as pd
from pathlib import Path
import sys

# V√©rification des imports critiques
try:
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import make_pipeline
    import joblib
    print("‚úÖ Import ML r√©ussis")
except ImportError:
    !{sys.executable} -m pip install scikit-learn joblib
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import make_pipeline
    import joblib


# 2. CHARGEMENT DES DONN√âES

print("√âtape 1: Chargement des donn√©es...")

# Chemin absolu v√©rifi√©
DATA_PATH = Path(r"C:\Users\Sajed\Desktop\SilentSurgeFinal\data")

# Liste des fichiers requis
FICHIERS = {
    'pizzeria': DATA_PATH / "ventes_pizzeria.csv",
    'boucherie': DATA_PATH / "ventes_boucherie.csv",
    'epicerie': DATA_PATH / "ventes_epicerie.csv"
}

# V√©rification visuelle
print("\nContenu du dossier data/:")
for f in DATA_PATH.glob("*.csv"):
    print(f"- {f.name}")

# Chargement avec v√©rification
dfs = []
for nom, chemin in FICHIERS.items():
    try:
        df_temp = pd.read_csv(chemin).assign(type_commerce=nom.upper())
        dfs.append(df_temp)
        print(f"\n‚úÖ {nom} charg√© : {len(df_temp)} lignes")
    except Exception as e:
        print(f"\n‚ùå Erreur sur {nom} : {str(e)}")
        continue

if not dfs:
    raise ValueError("Aucun fichier charg√© - v√©rifiez les donn√©es")

df = pd.concat(dfs, ignore_index=True)
print(f"\nDonn√©es finales : {len(df)} lignes")


# 3. PR√âPARATION DES DONN√âES
print("\n√âtape 2: Pr√©paration des donn√©es...")

features = ["saison", "zone", "type_commerce"]
target = "produit"

# V√©rification des colonnes
colonnes_manquantes = [col for col in features + [target] if col not in df.columns]
if colonnes_manquantes:
    raise ValueError(f"Colonnes manquantes : {colonnes_manquantes}")

# Pr√©processing
preprocessor = ColumnTransformer(
    transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), features)],
    remainder='passthrough'
)

# Split
X_train, X_test, y_train, y_test = train_test_split(
    df[features],
    df[target],
    test_size=0.2,
    stratify=df[target],
    random_state=42
)


# 4. MOD√âLISATION
print("\n√âtape 3: Entra√Ænement du mod√®le...")

model = make_pipeline(
    preprocessor,
    RandomForestClassifier(
        n_estimators=100,
        random_state=42,
        class_weight='balanced'
    )
)

model.fit(X_train, y_train)
print("‚úÖ Mod√®le entra√Æn√©")

# Sauvegarde
Path("models").mkdir(exist_ok=True)
joblib.dump(model, "models/modele_prod.joblib")
print("üíæ Mod√®le sauvegard√©")


# 5. √âVALUATION
from sklearn.metrics import classification_report, accuracy_score

y_pred = model.predict(X_test)

print("\nüìä Performance du mod√®le:")
print(classification_report(y_test, y_pred))
print(f"Pr√©cision globale: {accuracy_score(y_test, y_pred):.2%}")

‚úÖ Import ML r√©ussis
√âtape 1: Chargement des donn√©es...

Contenu du dossier data/:
- ventes_boucherie.csv
- ventes_epicerie.csv
- ventes_pizzeria.csv

‚úÖ pizzeria charg√© : 1646 lignes

‚úÖ boucherie charg√© : 1695 lignes

‚úÖ epicerie charg√© : 1668 lignes

Donn√©es finales : 5009 lignes

√âtape 2: Pr√©paration des donn√©es...

√âtape 3: Entra√Ænement du mod√®le...
‚úÖ Mod√®le entra√Æn√©
üíæ Mod√®le sauvegard√©

üìä Performance du mod√®le:
                   precision    recall  f1-score   support

       4 fromages       0.00      0.00      0.00        10
  Blanc de poulet       0.13      0.11      0.12        35
       Brochettes       0.08      0.03      0.05        59
          Calzone       0.10      0.18      0.13        11
            Chips       0.15      0.18      0.16        33
             Coca       0.00      0.00      0.00        37
Cuisses de canard       0.00      0.00      0.00        37
            Curly       0.00      0.00      0.00        35
     C√¥te de b≈

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
