In [5]:
# ===============================
# behavior_model_train.py
# ===============================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# -------------------------------
# 1️⃣ Charger les données
# -------------------------------
df = pd.read_csv("driving_data_raw.csv")  # remplace par ton fichier CSV

# Features communes
feature_cols = [col for col in df.columns if col not in ['style', 'hard_brake', 'aggressive_accel', 'sharp_turn', 'speeding']]
X = df[feature_cols]

# -------------------------------
# 2️⃣ Modèle 1 : Style de conduite
# -------------------------------
y_style = df['style']

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y_style, test_size=0.2, random_state=42, stratify=y_style
)

# Normalisation
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Régression logistique avec GridSearchCV
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l2'],
    'solver': ['lbfgs'],
    'max_iter': [1000]
}

log_reg = GridSearchCV(
    LogisticRegression(multi_class='multinomial'),
    param_grid,
    cv=5,
    scoring='accuracy'
)
log_reg.fit(X_train_scaled, y_train)

print("✅ Style model - meilleurs hyperparamètres :", log_reg.best_params_)

# Évaluation
y_pred_style = log_reg.predict(X_test_scaled)
print("Matrice de confusion - Style:\n", confusion_matrix(y_test, y_pred_style))
print("Classification Report - Style:\n", classification_report(y_test, y_pred_style))

# Sauvegarde modèle et scaler
joblib.dump(log_reg, "style_model_logreg.pkl")
joblib.dump(scaler, "scaler.pkl")

# -------------------------------
# 3️⃣ Modèle 2 : Événements de conduite (multi-label)
# -------------------------------
event_cols = ['hard_brake', 'aggressive_accel', 'sharp_turn', 'speeding']
y_events = df[event_cols]

# Split train/test
X_train_ev, X_test_ev, y_train_ev, y_test_ev = train_test_split(
    X, y_events, test_size=0.2, random_state=42
)

# Normalisation
scaler_events = StandardScaler()
X_train_ev_scaled = scaler_events.fit_transform(X_train_ev)
X_test_ev_scaled = scaler_events.transform(X_test_ev)

# Random Forest multi-output
rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    random_state=42
)
multi_rf = MultiOutputClassifier(rf)
multi_rf.fit(X_train_ev_scaled, y_train_ev)

# Évaluation
y_pred_events = multi_rf.predict(X_test_ev_scaled)
for i, col in enumerate(event_cols):
    print(f"\n--- {col} ---")
    print(classification_report(y_test_ev[col], y_pred_events[:, i]))

# Sauvegarde du modèle multi-label
joblib.dump(multi_rf, "event_model_rf.pkl")
joblib.dump(scaler_events, "scaler_events.pkl")

print("\n✅ Tous les modèles sont entraînés et sauvegardés.")


FileNotFoundError: [Errno 2] No such file or directory: 'driving_data_raw.csv'