In [67]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
import joblib

In [68]:
sf = pd.read_csv("futuristic_city_traffic.csv")
sf.head()

Unnamed: 0,City,Vehicle Type,Weather,Economic Condition,Day Of Week,Hour Of Day,Speed,Is Peak Hour,Random Event Occurred,Energy Consumption,Traffic Density
0,SolarisVille,Drone,Snowy,Stable,Sunday,20,29.4268,0,0,14.7134,0.5241
1,AquaCity,Flying Car,Solar Flare,Recession,Wednesday,2,118.8,0,0,143.5682,0.3208
2,Neuroburg,Autonomous Vehicle,Solar Flare,Recession,Wednesday,16,100.3904,0,0,91.264,0.0415
3,Ecoopolis,Drone,Clear,Booming,Thursday,8,76.8,1,0,46.0753,0.1811
4,AquaCity,Autonomous Vehicle,Solar Flare,Stable,Saturday,16,45.2176,0,0,40.1934,0.4544


In [69]:
sf.shape

(1219567, 11)

In [70]:
df = sf.sample(12000, random_state=42)

In [71]:
print(df.shape)

(12000, 11)


In [72]:
df.to_csv("futuristic_city_traffic_sampled.csv", index=False)

In [73]:
df

Unnamed: 0,City,Vehicle Type,Weather,Economic Condition,Day Of Week,Hour Of Day,Speed,Is Peak Hour,Random Event Occurred,Energy Consumption,Traffic Density
847212,Ecoopolis,Drone,Clear,Booming,Thursday,23,45.8742,0,0,22.9371,0.1965
760122,TechHaven,Autonomous Vehicle,Solar Flare,Stable,Monday,8,43.4756,1,0,43.9147,0.2234
678988,MetropolisX,Autonomous Vehicle,Clear,Stable,Monday,14,66.3621,0,0,60.3292,0.3949
544510,MetropolisX,Autonomous Vehicle,Rainy,Stable,Thursday,15,46.4420,0,0,44.4421,0.4116
203363,MetropolisX,Autonomous Vehicle,Snowy,Recession,Thursday,5,42.2895,0,0,40.4684,0.0897
...,...,...,...,...,...,...,...,...,...,...,...
587356,MetropolisX,Autonomous Vehicle,Rainy,Recession,Tuesday,11,69.6072,0,0,66.6097,0.3314
611679,Ecoopolis,Autonomous Vehicle,Solar Flare,Recession,Monday,20,19.0563,0,0,16.9389,0.0811
647609,MetropolisX,Autonomous Vehicle,Snowy,Booming,Wednesday,20,25.2339,0,0,24.3912,0.6408
882671,MetropolisX,Autonomous Vehicle,Snowy,Recession,Thursday,12,93.4139,0,0,89.3913,0.1717


In [74]:
cat_cols = ["City","Vehicle Type","Weather","Economic Condition",
            "Day Of Week","Is Peak Hour","Random Event Occurred"]

In [75]:
X = df.drop("Energy Consumption", axis=1)
y = df["Energy Consumption"]

In [76]:
df.describe()

Unnamed: 0,Hour Of Day,Speed,Is Peak Hour,Random Event Occurred,Energy Consumption,Traffic Density
count,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0
mean,11.548167,59.541135,0.14825,0.050583,49.150583,0.279987
std,6.966017,26.84388,0.355362,0.219154,25.375383,0.221637
min,0.0,9.1314,0.0,0.0,6.7026,0.0067
25%,5.0,36.9417,0.0,0.0,28.69755,0.1049
50%,12.0,57.64235,0.0,0.0,45.36615,0.21965
75%,18.0,80.133175,0.0,0.0,66.0167,0.3991
max,23.0,148.2624,1.0,1.0,175.988,1.8553


In [77]:
def preprocess_data(df, cat_cols):
    """Preprocess the data by encoding categorical variables and scaling features"""
    # Create feature matrix
    X = df.copy()
    le = LabelEncoder()
    
    # Encode categorical columns
    for col in cat_cols:
        X[col] = le.fit_transform(X[col])
    
    # Select features and target
    X = X.drop("Traffic Density", axis=1)
    y = df["Traffic Density"]
    
    return X, y

In [78]:
def train_evaluate_model(X, y):
    """Train and evaluate the model"""
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train model
    model = LinearRegression()
    model.fit(X_train_scaled, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test_scaled)
    
    # Cross-validation score
    cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5)
    
    return model, scaler, X_test_scaled, y_test, y_pred, cv_scores

In [79]:
def evaluate_model(model, X_test_scaled, y_test, y_pred, cv_scores):
    """Print model performance metrics and create visualization"""
    print("Model Performance Metrics")
    print("-" * 35)
    print(f"R² Score: {r2_score(y_test, y_pred):.4f}")
    print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred):.2f}")
    print(f"Root Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}")
    print(f"Cross-validation Score: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")
    print(f"Model Accuracy: {model.score(X_test_scaled, y_test)*100:.2f}%")
    
    # Visualization
    fig = px.scatter(x=y_test, y=y_pred, 
                    labels={'x':'Actual Traffic Density', 'y':'Predicted Traffic Density'},
                    title="Actual vs Predicted Traffic Density")
    fig.add_trace(go.Scatter(x=[y_test.min(), y_test.max()],
                            y=[y_test.min(), y_test.max()],
                            mode='lines', 
                            name='Perfect Prediction', 
                            line=dict(color='red', dash='dash')))
    fig.show()

In [None]:
# Process data
X, y = preprocess_data(df, cat_cols)

# Train and evaluate model
model, scaler, X_test_scaled, y_test, y_pred, cv_scores = train_evaluate_model(X, y)

# Print evaluation metrics and show visualization
evaluate_model(model, X_test_scaled, y_test, y_pred, cv_scores)

# Save model and scaler
joblib.dump(model, "traffic_model.pkl")
joblib.dump(scaler, "scaler.pkl")
print("✅ Model and scaler saved successfully!")

Model Performance Metrics
-----------------------------------
R² Score: 0.1140
Mean Absolute Error: 0.16
Root Mean Squared Error: 0.04
Cross-validation Score: 0.1273 (+/- 0.0274)
Model Accuracy: 11.40%


✅ Model and scaler saved successfully!


In [81]:
fig4 = px.histogram(df, x="Traffic Density", nbins=10, title="Distribution of Traffic Density")
fig4.show()

In [82]:
fig5 = px.box(df, x="Is Peak Hour", y="Traffic Density",
              title="Traffic Density During Peak vs Non-Peak Hours")
fig5.show()