In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier

In [2]:
df = pd.read_csv('./SILVER/futuristic_city_traffic.csv')

In [3]:
quartiles = df['Traffic Density'].quantile([0.25, 0.5, 0.75])
q25, q50, q75 = quartiles[0.25], quartiles[0.5], quartiles[0.75]

def categorize_density(density):
    if density <= q25:
        return "Low"
    elif density <= q50:
        return "Medium"
    elif density <= q75:
        return "High"
    else:
        return "Very High"

df['Density_Category'] = df['Traffic Density'].apply(categorize_density)


In [4]:
df['Density_Category'].value_counts()

Density_Category
Low          302818
Very High    302354
High         302217
Medium       302178
Name: count, dtype: int64

In [5]:
# === PARAMS ===
X_hours = 1
city_cols = ['Is_AquaCity','Is_Ecoopolis','Is_MetropolisX','Is_Neuroburg','Is_SolarisVille','Is_TechHaven']

# Trier et créer la cible future
df_sorted = df.sort_values(city_cols + ['Day Of Week','Hour Of Day']).reset_index(drop=True)
df_sorted['Traffic_Density_Future'] = df_sorted.groupby(city_cols)['Traffic Density'].shift(-X_hours)
df_sorted = df_sorted.dropna(subset=['Traffic_Density_Future'])

# Créer une catégorie de densité si ce n'est pas déjà fait
df_sorted['Density_Category'] = pd.qcut(df_sorted['Traffic_Density_Future'], q=[0,0.25,0.5,0.75,1], labels=['Low','Medium','High','Very High'])

# Features pour l'entraînement (on peut inclure tout pour l'entraînement)
feature_cols = [c for c in df_sorted.columns if c not in ['Traffic_Density_Future', 'Density_Category', 'Traffic Density']]
X = df_sorted[feature_cols]
y = df_sorted['Density_Category']

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modèle classifier
model = RandomForestClassifier(n_estimators=250, max_depth=20, random_state=42)

mlflow.set_experiment("Traffic_Prediction_X_Hours")

<Experiment: artifact_location='file:///Users/antoinetheissen/Downloads/HACKATON/mlruns/699471715612415917', creation_time=1760446546090, experiment_id='699471715612415917', last_update_time=1760446546090, lifecycle_stage='active', name='Traffic_Prediction_X_Hours', tags={}>

In [None]:
with mlflow.start_run():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Metrics simples
    accuracy = (y_pred == y_test).mean()

    mlflow.log_param("model_type", "RandomForestClassifier")
    mlflow.log_param("n_estimators", 200)
    mlflow.log_param("max_depth", 20)
    mlflow.log_param("X_hours", X_hours)

    mlflow.log_metric("accuracy", accuracy)
    mlflow.sklearn.log_model(model, artifact_path="traffic_model")

    print(f"✅ Modèle loggé dans MLflow")
    print(f"Accuracy: {accuracy:.4f}")

In [None]:
def prepare_input(city_name, hour, day_of_week):
    input_data = {c:0 for c in city_cols}  # Toutes les villes = 0
    input_data[f'Is_{city_name}'] = 1      # Ville choisie

    # Colonnes pratiques
    input_data['Hour Of Day'] = hour
    input_data['Day Of Week'] = day_of_week

    # Colonnes restantes à valeur fixe ou moyenne
    input_data['Speed'] = 40  # valeur arbitraire
    input_data['Is Peak Hour'] = 1 if hour in [7,8,9,17,18,19] else 0
    input_data['Random Event Occurred'] = 0
    input_data['Energy Consumption'] = 200
    input_data['Is_Clear'] = 1  # exemple météo par défaut
    input_data['Is_Autonomous Vehicle'] = 0
    # ... ajouter les autres colonnes comme ça

    return pd.DataFrame([input_data])

# Exemple
df_input = prepare_input("AquaCity", 8, 2)
predicted_category = model.predict(df_input)[0]
print(predicted_category)


In [None]:
import pandas as pd

# Exemple : seuils de densité (à ajuster selon ton dataset)
SEUIL_FLUIDE = 1000   # en dessous = trafic fluide
SEUIL_CONGESTION = 2000  # au-dessus = trafic congestionné

# Fonction pour générer la recommandation
def traffic_recommendation(predicted_density):
    if predicted_density < SEUIL_FLUIDE:
        return "Trafic fluide — utiliser la voiture"
    elif predicted_density > SEUIL_CONGESTION:
        return "Trafic dense — privilégier les transports en commun"
    else:
        return "Trafic modéré — prudence recommandée"

# Exemple : appliquer sur une ligne du test set
sample = X_test.iloc[0:1]
predicted_density = model.predict(sample)[0]

reco = traffic_recommendation(predicted_density)

print(f"Predicted Traffic Density in 1h: {predicted_density:.2f}")
print(f"Recommendation: {reco}")


In [None]:

sample = X_test.iloc[0:1]
predicted_density = model.predict(sample)[0]
print(f"Predicted Traffic Density in {X_hours}h: {predicted_density:.4f}")

In [None]:
import pandas as pd

# Exemple : seuils de densité (à ajuster selon ton dataset)
SEUIL_FLUIDE = 1000   # en dessous = trafic fluide
SEUIL_CONGESTION = 2000  # au-dessus = trafic congestionné

# Fonction pour générer la recommandation
def traffic_recommendation(predicted_density):
    if predicted_density < SEUIL_FLUIDE:
        return "Trafic fluide — utiliser la voiture"
    elif predicted_density > SEUIL_CONGESTION:
        return "Trafic dense — privilégier les transports en commun"
    else:
        return "Trafic modéré — prudence recommandée"

# Exemple : appliquer sur une ligne du test set
sample = X_test.iloc[0:1]
predicted_density = model.predict(sample)[0]

reco = traffic_recommendation(predicted_density)

print(f"Predicted Traffic Density in 1h: {predicted_density:.2f}")
print(f"Recommendation: {reco}")


In [None]:
MODEL_PATH = "./mlruns/699471715612415917/models/m-a9849227b3ff4f78a468eb7c1629ddc2/artifacts"


In [None]:
feature_names = X_train.columns.tolist()


In [None]:
feature_names

In [None]:
df.info()

# AUTRE Méthode


In [None]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# === CONFIGURATION ===
X_HOURS = 1
city_cols = ['Is_AquaCity', 'Is_Ecoopolis', 'Is_MetropolisX', 'Is_Neuroburg', 'Is_SolarisVille', 'Is_TechHaven']

# === 1. PRÉPARATION DU DATASET ===
df_sorted = df.sort_values(city_cols + ['Day Of Week','Hour Of Day']).reset_index(drop=True)

# Décaler la densité future
df_sorted['Traffic_Density_Future'] = df_sorted.groupby(city_cols)['Traffic Density'].shift(-X_HOURS)
df_sorted = df_sorted.dropna(subset=['Traffic_Density_Future'])

# === 2. CATÉGORISATION PAR QUANTILES ===
df_sorted['Density_Category'] = pd.qcut(df_sorted['Traffic_Density_Future'], q=5, labels=['Very Low','Low','Medium','High','Very High'])

# === 3. ENCODAGE DE LA SORTIE ===
label_encoder = LabelEncoder()
df_sorted['Density_Label'] = label_encoder.fit_transform(df_sorted['Density_Category'])

# === 4. FEATURES (uniquement heure + jour + ville) ===
feature_cols = ['Day Of Week', 'Hour Of Day'] + city_cols
X = df_sorted[feature_cols]
y = df_sorted['Density_Label']

# === 5. SPLIT TRAIN/TEST ===
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# === 6. ENTRAÎNEMENT ===
model = RandomForestClassifier(n_estimators=300, max_depth=25, random_state=42)


In [None]:

mlflow.set_experiment("Traffic_Density_Categorized")
with mlflow.start_run():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
    
    mlflow.log_param("model_type", "RandomForestClassifier")
    mlflow.log_param("n_estimators", 300)
    mlflow.log_param("max_depth", 25)
    mlflow.log_metric("accuracy", acc)

    mlflow.sklearn.log_model(model, artifact_path="traffic_model")

    print("✅ Modèle loggé dans MLflow")
    print(f"Accuracy: {acc:.4f}")
    print(report)

# Sauvegarder le label encoder pour le réutiliser dans l’API
import joblib
joblib.dump(label_encoder, "label_encoder.pkl")
