In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# 1. Load datasets

fixed = pd.read_csv(r"C:\Users\USER\PycharmProjects\component2_traffic\data\component2_traffic_fixed.csv")
adaptive = pd.read_csv(r"C:\Users\USER\PycharmProjects\component2_traffic\data\component2_traffic_adaptive.csv")

# Add system column
fixed["system"] = "Fixed-Time"
adaptive["system"] = "Adaptive"

# Combine datasets 
df = pd.concat([fixed, adaptive], ignore_index=True)


# 2️. Define features and target

features = ["route_length", "depart_time", "duration", "time_loss"]
target = "waiting_time"

X = df[features]
y = df[target]


# 3️. Preprocessing (optional scaling)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


# 4️. Train/Test split

X_train, X_test, y_train, y_test, train_idx, test_idx = train_test_split(
    X_scaled, y, df.index, test_size=0.2, random_state=42
)


# 5️. Train final Linear Regression

lr = LinearRegression()
lr.fit(X_train, y_train)


# 6️. Predict waiting time

y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)


In [2]:
# 7️. Evaluate model

train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
train_r2 = r2_score(y_train, y_train_pred)

test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
test_r2 = r2_score(y_test, y_test_pred)

print("Linear Regression Performance:")
print(f"Train RMSE: {train_rmse:.3f}, R²: {train_r2:.3f}")
print(f"Test  RMSE: {test_rmse:.3f}, R²: {test_r2:.3f}")


Linear Regression Performance:
Train RMSE: 19.498, R²: 0.998
Test  RMSE: 19.650, R²: 0.997
