In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib
import os

def find_project_root(folder_name="ProjetMobilite"):
    current = os.getcwd()
    while folder_name not in os.path.basename(current):
        parent = os.path.dirname(current)
        if parent == current:
            raise Exception(f"❌ Impossible de trouver la racine du projet '{folder_name}'")
        current = parent
    return current

project_root = find_project_root("ProjetMobilite")

file_path = os.path.join(project_root, "donnes_propre_pour_entrainement", "tomtom_dataset.csv")

if not os.path.exists(file_path):
    raise FileNotFoundError(f"❌ Fichier introuvable : {file_path}")

df = pd.read_csv(file_path)

df[['lat', 'lon']] = df['location'].str.replace("(", "").str.replace(")", "").str.split(",", expand=True).astype(float)
df['hour'] = pd.to_datetime(df['hour'])
df['hour_num'] = df['hour'].dt.hour

le = LabelEncoder()
df['congestion_encoded'] = le.fit_transform(df['congestion_level'])
features = ['current_speed', 'free_flow_speed', 'lat', 'lon', 'hour_num']
X = df[features]
y = df['congestion_encoded']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(" Rapport de classification :\n", classification_report(y_test, y_pred, target_names=le.classes_))

model_dir = os.path.join(project_root, "model")
os.makedirs(model_dir, exist_ok=True)
joblib.dump(model, os.path.join(model_dir, "congestion_predictor.pkl"))
joblib.dump(le, os.path.join(model_dir, "label_encoder.pkl"))

print("\nNouveau modèle avec l’heure incluse sauvegardé dans 'model/'")


📊 Rapport de classification :
               precision    recall  f1-score   support

      bouché       1.00      0.97      0.98        32
       dense       0.98      0.99      0.98       429
      fluide       1.00      1.00      1.00      5200

    accuracy                           1.00      5661
   macro avg       0.99      0.98      0.99      5661
weighted avg       1.00      1.00      1.00      5661


✅ Nouveau modèle avec l’heure incluse sauvegardé dans 'model/'
