In [1]:
# train_model.py
import pandas as pd
import numpy as np
import xgboost as xgb
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
# --- Load dataset ---
df = pd.read_csv("traffic.csv")

# Convert DateTime
df['DateTime'] = pd.to_datetime(df['DateTime'], dayfirst=True)

# Feature engineering
df['hour'] = df['DateTime'].dt.hour
df['dayofweek'] = df['DateTime'].dt.dayofweek
df['month'] = df['DateTime'].dt.month
df['is_weekend'] = df['dayofweek'].isin([5,6]).astype(int)
df['is_peak'] = df['hour'].isin([7,8,9,17,18,19]).astype(int)

df = df.sort_values(["Junction","DateTime"])
df['lag_1'] = df.groupby("Junction")['Vehicles'].shift(1)
df['lag_2'] = df.groupby("Junction")['Vehicles'].shift(2)
df['lag_3'] = df.groupby("Junction")['Vehicles'].shift(3)

df = df.dropna()

features = ['hour','dayofweek','month','is_weekend','is_peak','lag_1','lag_2','lag_3']
target = 'Vehicles'

X = df[features]
y = df[target]

In [4]:

# --- Split ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [5]:
# --- Train ---
params = {
    "objective": "reg:squarederror",
    "eval_metric": "rmse",
    "max_depth": 6,
    "eta": 0.1,
    "subsample": 0.8,
    "colsample_bytree": 0.8
}

model = xgb.train(params, dtrain, num_boost_round=200)

In [6]:

# --- Evaluate ---
y_pred = model.predict(dtest)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("RMSE:", rmse)
print("R2 Score:", r2)


RMSE: 4.815935964279024
R2 Score: 0.7665134072303772


In [9]:
# --- Save model and metadata ---
model.save_model("traffic_model.json")
joblib.dump(features, "features.pkl")
print("✅ Model and features saved!")

✅ Model and features saved!
