In [1]:

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch.utils.data import DataLoader, TensorDataset

vehicle_data = pd.read_csv("vehicle_summary (1).csv")

le_vehicle_type = LabelEncoder()
le_direction = LabelEncoder()
le_entry = LabelEncoder()
le_exit = LabelEncoder()

vehicle_data['vehicle_type_enc'] = le_vehicle_type.fit_transform(vehicle_data['vehicle_type'])
vehicle_data['direction_enc'] = le_direction.fit_transform(vehicle_data['direction'])
vehicle_data['entry_enc'] = le_entry.fit_transform(vehicle_data['entry_zone_id'])
vehicle_data['exit_enc'] = le_exit.fit_transform(vehicle_data['exit_zone_id'])

features = [
    'vehicle_type_enc', 'most_frequent_lane', 'entry_enc',
    'duration', 'idle_time', 'co2_emission_g', 'direction_enc'
]
target = 'exit_enc'

X = vehicle_data[features].values
y = vehicle_data[target].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)

class ExitPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(ExitPredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)

input_size = X.shape[1]
hidden_size = 64
num_classes = len(np.unique(y))

model = ExitPredictor(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(20):
    model.train()
    for xb, yb in train_loader:
        pred = model(xb)
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

torch.save(model.state_dict(), "exit_path_model.pt")
import joblib
joblib.dump((scaler, le_vehicle_type, le_direction, le_entry, le_exit), "encoders.pkl")
print("\nModel and encoders saved.")


Epoch 1, Loss: 1.4877
Epoch 2, Loss: 1.4714
Epoch 3, Loss: 1.4554
Epoch 4, Loss: 1.4396
Epoch 5, Loss: 1.4240
Epoch 6, Loss: 1.4088
Epoch 7, Loss: 1.3937
Epoch 8, Loss: 1.3790
Epoch 9, Loss: 1.3644
Epoch 10, Loss: 1.3500
Epoch 11, Loss: 1.3359
Epoch 12, Loss: 1.3219
Epoch 13, Loss: 1.3082
Epoch 14, Loss: 1.2948
Epoch 15, Loss: 1.2815
Epoch 16, Loss: 1.2684
Epoch 17, Loss: 1.2555
Epoch 18, Loss: 1.2428
Epoch 19, Loss: 1.2303
Epoch 20, Loss: 1.2180

Model and encoders saved.


In [2]:
print("Allowed directions:", le_direction.classes_)
print("Allowed entries:", le_entry.classes_)


Allowed directions: ['0->2' '2->1' '2->3' '3->0' '3->2']
Allowed entries: [0 2 3]


In [24]:

model = ExitPredictor(input_size=7, hidden_size=64, num_classes=len(le_exit.classes_))
model.load_state_dict(torch.load("exit_path_model.pt"))
model.eval()

scaler, le_vehicle_type, le_direction, le_entry, le_exit = joblib.load("encoders.pkl")

def predict_exit_zone(model, scaler, le_vehicle_type, le_direction, le_entry, le_exit,
                      vehicle_type, most_frequent_lane, entry_zone_id,
                      duration, idle_time, co2_emission_g, direction):
    if isinstance(entry_zone_id, str) and entry_zone_id.startswith('E'):
        entry_zone_id = entry_zone_id[1:]

    vehicle_type_enc = le_vehicle_type.transform([vehicle_type])[0]
    direction_enc = le_direction.transform([direction])[0]
    entry_enc = le_entry.transform([entry_zone_id])[0]

    input_vector = np.array([[vehicle_type_enc, most_frequent_lane, entry_enc,
                              duration, idle_time, co2_emission_g, direction_enc]])
    input_scaled = scaler.transform(input_vector)
    input_tensor = torch.tensor(input_scaled, dtype=torch.float32)

    with torch.no_grad():
        output = model(input_tensor)
        predicted_idx = torch.argmax(output, dim=1).item()
        predicted_label = le_exit.inverse_transform([predicted_idx])[0]
    
    return predicted_label

# Call prediction
predicted = predict_exit_zone(
    model=model,
    scaler=scaler,
    le_vehicle_type=le_vehicle_type,
    le_direction=le_direction,
    le_entry=le_entry,
    le_exit=le_exit,
    vehicle_type="car",
    most_frequent_lane=2,
    entry_zone_id="E2",    
    duration=15.5,
    idle_time=3.0,
    co2_emission_g=280.0,
    direction="2->3"
)

print("Predicted optimal exit zone:", predicted)


Predicted optimal exit zone: 2


In [3]:
from sklearn.metrics import classification_report

true_labels = []
predicted_labels = []

for _, row in test_df.iterrows():
    true_exit = row['true_exit_zone']
    pred_exit = predict_exit_zone(
        model=model,
        scaler=scaler,
        le_vehicle_type=le_vehicle_type,
        le_direction=le_direction,
        le_entry=le_entry,
        le_exit=le_exit,
        vehicle_type=row['vehicle_type'],
        most_frequent_lane=row['most_frequent_lane'],
        entry_zone_id=row['entry_zone_id'],
        duration=row['duration'],
        idle_time=row['idle_time'],
        co2_emission_g=row['co2_emission_g'],
        direction=row['direction']
    )
    
    true_labels.append(true_exit)
    predicted_labels.append(pred_exit)

# Generate and print the classification report
print(classification_report(true_labels, predicted_labels))


NameError: name 'test_df' is not defined

In [3]:
import pandas as pd
import plotly.graph_objs as go
import plotly.io as pio

# Load your vehicle summary CSV
df = pd.read_csv("vehicle_summary (1).csv")

# Time range: second-by-second
min_time = int(df['start_time'].min())
max_time = int(df['end_time'].max())
time_range = range(min_time, max_time + 1)

# Prepare traffic counts per lane
lane_ids = sorted(df['most_frequent_lane'].unique())
lane_traffic = {lane: [] for lane in lane_ids}

for t in time_range:
    active = df[(df['start_time'] <= t) & (df['end_time'] >= t)]
    for lane in lane_ids:
        lane_traffic[lane].append((active['most_frequent_lane'] == lane).sum())

# Convert to DataFrame
traffic_df = pd.DataFrame(lane_traffic, index=time_range)
traffic_df.index.name = "time"
traffic_df.to_csv("lane_traffic_time_series.csv")
print("Saved lane-wise traffic time series to 'lane_traffic_time_series.csv'")

# Plot using Plotly
fig = go.Figure()

for lane in lane_ids:
    fig.add_trace(go.Scatter(x=traffic_df.index, y=traffic_df[lane],
                             mode='lines',
                             name=f"Lane {lane}"))

fig.update_layout(
    title="Traffic Density Over Time (Per Lane)",
    xaxis_title="Time (s)",
    yaxis_title="Number of Vehicles",
    legend_title="Lanes",
    template="plotly_white"
)

# Save the figure to HTML
pio.write_html(fig, file="traffic_density_plot.html", auto_open=True)
print("Interactive traffic density graph saved as 'traffic_density_plot.html'")


Saved lane-wise traffic time series to 'lane_traffic_time_series.csv'
Interactive traffic density graph saved as 'traffic_density_plot.html'


In [4]:
import pandas as pd
import plotly.graph_objects as go

df = pd.read_csv("lane_traffic_time_series.csv", index_col='time')

df['slot'] = (df.index // 1800) * 1800  # 1800 sec = 30 min
df['slot'] = pd.to_datetime(df['slot'], unit='s').dt.strftime('%H:%M')

slot_summary = df.groupby('slot').sum()

cycle_time = 120

green_times = slot_summary.div(slot_summary.sum(axis=1), axis=0) * cycle_time
green_times = green_times.round(1)

fig = go.Figure()
colors = ['#f5c71a', '#1f77b4', '#7f7f7f', '#d95f02']  # Matching colors

for i, lane in enumerate(green_times.columns):
    fig.add_trace(go.Bar(
        name=f"Green Signal for Path {lane}",
        x=green_times.index,
        y=green_times[lane],
        marker_color=colors[i % len(colors)],
        text=green_times[lane],
        textposition='auto'
    ))

fig.update_layout(
    barmode='stack',
    title="Green Light Optimization for all four paths",
    xaxis_title="Time Slot",
    yaxis_title="Green Time (seconds)",
    template="plotly_white",
    legend_title="Paths"
)

fig.write_html("green_light_optimization.html", auto_open=True)
print("Saved stacked green signal optimization chart to green_light_optimization.html")


Saved stacked green signal optimization chart to green_light_optimization.html


In [10]:
import pandas as pd
import plotly.graph_objs as go
import plotly.io as pio
import numpy as np

# Load your vehicle summary CSV
df = pd.read_csv("vehicle_summary (1).csv")

# Time range: second-by-second
min_time = int(df['start_time'].min())
max_time = int(df['end_time'].max())
time_range = range(min_time, max_time + 1)

# Prepare traffic counts per lane
lane_ids = sorted(df['most_frequent_lane'].unique())
lane_traffic = {lane: [] for lane in lane_ids}

for t in time_range:
    active = df[(df['start_time'] <= t) & (df['end_time'] >= t)]
    for lane in lane_ids:
        lane_traffic[lane].append((active['most_frequent_lane'] == lane).sum())

# Convert to DataFrame
traffic_df = pd.DataFrame(lane_traffic, index=time_range)
traffic_df.index.name = "time"
traffic_df.to_csv("lane_traffic_time_series.csv")
print("Saved lane-wise traffic time series to 'lane_traffic_time_series.csv'")

# Plot traffic density over time
fig = go.Figure()
for lane in lane_ids:
    fig.add_trace(go.Scatter(x=traffic_df.index, y=traffic_df[lane],
                             mode='lines',
                             name=f"Lane {lane}"))

fig.update_layout(
    title="Traffic Density Over Time (Per Lane)",
    xaxis_title="Time (s)",
    yaxis_title="Number of Vehicles",
    legend_title="Lanes",
    template="plotly_white"
)

pio.write_html(fig, file="traffic_density_plot.html", auto_open=True)
print("Interactive traffic density graph saved as 'traffic_density_plot.html'")

# -------------------------- Green Light Optimization --------------------------
interval_size = 5  # 5-second intervals
total_cycle_time = 120  # Total green light cycle time in seconds
service_rate = 20  # Œº in vehicles/sec

intervals = list(range(min_time, max_time + 1, interval_size))
green_time_df = pd.DataFrame(columns=['interval_start', 'interval_end'] + [f'lane_{lane}_g' for lane in lane_ids])

for i in range(len(intervals) - 1):
    start, end = intervals[i], intervals[i + 1]
    sub_df = df[(df['start_time'] >= start) & (df['start_time'] < end)]
    arrival_rates = []
    for lane in lane_ids:
        arrivals = (sub_df['most_frequent_lane'] == lane).sum()
        Œª = arrivals / interval_size  # vehicles/sec
        arrival_rates.append(Œª)

    Œª_total = sum(arrival_rates) if sum(arrival_rates) > 0 else 1
    green_times = [(Œª / Œª_total) * total_cycle_time for Œª in arrival_rates]

    green_time_df.loc[i] = [start, end] + green_times

# Plot optimized green time for each lane
fig2 = go.Figure()
for idx, lane in enumerate(lane_ids):
    fig2.add_trace(go.Scatter(
        x=green_time_df['interval_start'],
        y=green_time_df[f'lane_{lane}_g'],
        mode='lines+markers',
        name=f"Lane {lane}"
    ))

fig2.update_layout(
    title="Green Signal Duration Over Time (Per Lane)",
    xaxis_title="Time Interval Start (s)",
    yaxis_title="Green Time (s)",
    legend_title="Lanes",
    template="plotly_white"
)

pio.write_html(fig2, file="green_signal_optimization_plot.html", auto_open=True)
print("Interactive green signal optimization graph saved as 'green_signal_optimization_plot.html'")

Saved lane-wise traffic time series to 'lane_traffic_time_series.csv'
Interactive traffic density graph saved as 'traffic_density_plot.html'
Interactive green signal optimization graph saved as 'green_signal_optimization_plot.html'


In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from torch.utils.data import DataLoader, TensorDataset

# Load your file
vehicle_data = pd.read_csv("vehicle_summary_with_50_extra_rows.csv")

# Encode categorical features
le_vehicle_type = LabelEncoder()
le_direction = LabelEncoder()
le_entry = LabelEncoder()
le_exit = LabelEncoder()

vehicle_data['vehicle_type_enc'] = le_vehicle_type.fit_transform(vehicle_data['vehicle_type'])
vehicle_data['direction_enc'] = le_direction.fit_transform(vehicle_data['direction'])
vehicle_data['entry_enc'] = le_entry.fit_transform(vehicle_data['entry_zone_id'])
vehicle_data['exit_enc'] = le_exit.fit_transform(vehicle_data['exit_zone_id'])

# Features & target
features = [
    'vehicle_type_enc', 'most_frequent_lane', 'entry_enc',
    'duration', 'idle_time', 'co2_emission_g', 'direction_enc'
]
X = vehicle_data[features].values
y = vehicle_data['exit_enc'].values

# Normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)

# Define the model
class ExitPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(ExitPredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)

# Initialize
input_size = len(features)
hidden_size = 64
num_classes = len(np.unique(y))

model = ExitPredictor(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train
for epoch in range(20):
    model.train()
    for xb, yb in train_loader:
        pred = model(xb)
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

# Evaluate
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor)
    predicted_classes = torch.argmax(predictions, dim=1).numpy()

# Report
accuracy = accuracy_score(y_test, predicted_classes)
report = classification_report(y_test, predicted_classes, target_names=le_exit.classes_.astype(str))

print(f"\nModel Accuracy: {accuracy:.4f}")
print("\nClassification Report:\n", report)


Epoch 1, Loss: 1.3529
Epoch 2, Loss: 1.3588
Epoch 3, Loss: 1.3424
Epoch 4, Loss: 1.3433
Epoch 5, Loss: 1.2718
Epoch 6, Loss: 1.2243
Epoch 7, Loss: 1.2507
Epoch 8, Loss: 1.1697
Epoch 9, Loss: 1.1977
Epoch 10, Loss: 1.1358
Epoch 11, Loss: 1.1438
Epoch 12, Loss: 1.1416
Epoch 13, Loss: 1.1209
Epoch 14, Loss: 1.0686
Epoch 15, Loss: 1.0855
Epoch 16, Loss: 1.1302
Epoch 17, Loss: 1.1399
Epoch 18, Loss: 1.0154
Epoch 19, Loss: 1.1215
Epoch 20, Loss: 1.0137

Model Accuracy: 0.5714

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.00      0.00      0.00         2
           2       0.62      1.00      0.77         5
           3       0.50      0.60      0.55         5

    accuracy                           0.57        14
   macro avg       0.28      0.40      0.33        14
weighted avg       0.40      0.57      0.47        14



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [6]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, predicted_classes)
labels = le_exit.classes_

plt.figure(figsize=(8, 6))
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Exit Zone Prediction")
plt.colorbar()

tick_marks = np.arange(len(labels))
plt.xticks(tick_marks, labels, rotation=45)
plt.yticks(tick_marks, labels)

# Annotate cells
thresh = cm.max() / 2
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, format(cm[i, j], 'd'),
                 ha="center", va="center",
                 color="white" if cm[i, j] > thresh else "black")

plt.ylabel('Actual Exit Zone')
plt.xlabel('Predicted Exit Zone')
plt.tight_layout()
plt.show()


ImportError: cannot import name 'artist' from 'matplotlib' (C:\Python310\lib\site-packages\matplotlib\__init__.py)

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from torch.utils.data import DataLoader, TensorDataset

vehicle_data = pd.read_csv("vehicle_summary_with_50_extra_rows.csv")

le_vehicle_type = LabelEncoder()
le_direction = LabelEncoder()
le_entry = LabelEncoder()
le_exit = LabelEncoder()

vehicle_data['vehicle_type_enc'] = le_vehicle_type.fit_transform(vehicle_data['vehicle_type'])
vehicle_data['direction_enc'] = le_direction.fit_transform(vehicle_data['direction'])
vehicle_data['entry_enc'] = le_entry.fit_transform(vehicle_data['entry_zone_id'])
vehicle_data['exit_enc'] = le_exit.fit_transform(vehicle_data['exit_zone_id'])

features = [
    'entry_enc', 'direction_enc', 'most_frequent_lane', 'duration', 'vehicle_type_enc'
]
X = vehicle_data[features].values
y = vehicle_data['exit_enc'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=16, shuffle=True)

class ExitPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(ExitPredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

input_size = len(features)
hidden_size = 128
num_classes = len(np.unique(y))

model = ExitPredictor(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

# Train model
for epoch in range(50):
    model.train()
    for xb, yb in train_loader:
        pred = model(xb)
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

# Evaluate model
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor)
    predicted_classes = torch.argmax(predictions, dim=1).numpy()

# Evaluation report
accuracy = accuracy_score(y_test, predicted_classes)
report = classification_report(y_test, predicted_classes, target_names=le_exit.classes_.astype(str))

print(f"\n‚úÖ Model Accuracy: {accuracy:.4f}")
print("\nüìä Classification Report:\n", report)


Epoch 1, Loss: 1.4419
Epoch 2, Loss: 1.4374
Epoch 3, Loss: 1.5236
Epoch 4, Loss: 1.2685
Epoch 5, Loss: 1.3230
Epoch 6, Loss: 1.1896
Epoch 7, Loss: 1.3598
Epoch 8, Loss: 1.2471
Epoch 9, Loss: 1.2066
Epoch 10, Loss: 1.1415
Epoch 11, Loss: 0.9597
Epoch 12, Loss: 1.3169
Epoch 13, Loss: 1.0733
Epoch 14, Loss: 1.2306
Epoch 15, Loss: 1.0463
Epoch 16, Loss: 1.0169
Epoch 17, Loss: 1.1771
Epoch 18, Loss: 1.0716
Epoch 19, Loss: 1.1876
Epoch 20, Loss: 1.1579
Epoch 21, Loss: 1.0406
Epoch 22, Loss: 1.1529
Epoch 23, Loss: 0.7845
Epoch 24, Loss: 1.0101
Epoch 25, Loss: 0.9043
Epoch 26, Loss: 1.0010
Epoch 27, Loss: 0.8604
Epoch 28, Loss: 0.9996
Epoch 29, Loss: 0.7530
Epoch 30, Loss: 0.9733
Epoch 31, Loss: 0.7831
Epoch 32, Loss: 0.8513
Epoch 33, Loss: 0.8146
Epoch 34, Loss: 0.9593
Epoch 35, Loss: 0.8117
Epoch 36, Loss: 0.7403
Epoch 37, Loss: 0.8481
Epoch 38, Loss: 0.7014
Epoch 39, Loss: 0.9865
Epoch 40, Loss: 0.8867
Epoch 41, Loss: 0.5135
Epoch 42, Loss: 0.5693
Epoch 43, Loss: 0.7425
Epoch 44, Loss: 0.72

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load Data
vehicle_data = pd.read_csv("vehicle_summary_with_50_extra_rows.csv")

# Encoding categorical features
le_vehicle_type = LabelEncoder()
le_direction = LabelEncoder()
le_entry = LabelEncoder()
le_exit = LabelEncoder()

vehicle_data['vehicle_type_enc'] = le_vehicle_type.fit_transform(vehicle_data['vehicle_type'])
vehicle_data['direction_enc'] = le_direction.fit_transform(vehicle_data['direction'])
vehicle_data['entry_enc'] = le_entry.fit_transform(vehicle_data['entry_zone_id'])
vehicle_data['exit_enc'] = le_exit.fit_transform(vehicle_data['exit_zone_id'])

features = ['entry_enc', 'direction_enc', 'most_frequent_lane', 'duration', 'vehicle_type_enc']
X = vehicle_data[features].values
y = vehicle_data['exit_enc'].values

# Normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define models with grid search params
models = {
    "Random Forest": {
        "model": RandomForestClassifier(),
        "params": {
            "n_estimators": [50, 100, 150],
            "max_depth": [None, 10, 20]
        }
    },
    "XGBoost": {
        "model": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
        "params": {
            "n_estimators": [50, 100],
            "max_depth": [3, 5],
            "learning_rate": [0.01, 0.1]
        }
    },
    "Gradient Boosting": {
        "model": GradientBoostingClassifier(),
        "params": {
            "n_estimators": [50, 100],
            "learning_rate": [0.05, 0.1],
            "max_depth": [3, 5]
        }
    },
    "Logistic Regression": {
        "model": LogisticRegression(max_iter=1000),
        "params": {
            "C": [0.1, 1, 10],
            "solver": ["liblinear", "lbfgs"]
        }
    },
    "MLP Classifier": {
        "model": MLPClassifier(max_iter=1000),
        "params": {
            "hidden_layer_sizes": [(100,), (128, 64)],
            "activation": ['relu', 'tanh'],
            "alpha": [0.0001, 0.001]
        }
    }
}

results = {}

# Train and evaluate
for name, mp in models.items():
    print(f"\nüîç Tuning and Training: {name}")
    clf = GridSearchCV(mp['model'], mp['params'], cv=5, scoring='accuracy', n_jobs=-1)
    clf.fit(X_train, y_train)
    
    best_model = clf.best_estimator_
    preds = best_model.predict(X_test)
    
    acc = accuracy_score(y_test, preds)
    report = classification_report(y_test, preds, target_names=le_exit.classes_.astype(str))
    
    results[name] = {
        "accuracy": acc,
        "classification_report": report,
        "best_params": clf.best_params_
    }

# Display results
print("\nüìà Model Comparison Results:\n")
for model_name, res in results.items():
    print(f"=== {model_name} ===")
    print(f"‚úÖ Accuracy: {res['accuracy']:.4f}")
    print(f"üõ†Ô∏è Best Params: {res['best_params']}")
    print(f"üìä Classification Report:\n{res['classification_report']}")
    print("="*40)





üîç Tuning and Training: Random Forest

üîç Tuning and Training: XGBoost





üîç Tuning and Training: Gradient Boosting

üîç Tuning and Training: Logistic Regression

üîç Tuning and Training: MLP Classifier

üìà Model Comparison Results:

=== Random Forest ===
‚úÖ Accuracy: 1.0000
üõ†Ô∏è Best Params: {'max_depth': None, 'n_estimators': 50}
üìä Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         5
           3       1.00      1.00      1.00         5

    accuracy                           1.00        14
   macro avg       1.00      1.00      1.00        14
weighted avg       1.00      1.00      1.00        14

=== XGBoost ===
‚úÖ Accuracy: 1.0000
üõ†Ô∏è Best Params: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50}
üìä Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier

# Load dataset
vehicle_data = pd.read_csv("vehicle_summary_with_50_extra_rows.csv")

# Encode categorical variables
le_vehicle_type = LabelEncoder()
le_direction = LabelEncoder()
le_entry = LabelEncoder()
le_exit = LabelEncoder()

vehicle_data['vehicle_type_enc'] = le_vehicle_type.fit_transform(vehicle_data['vehicle_type'])
vehicle_data['direction_enc'] = le_direction.fit_transform(vehicle_data['direction'])
vehicle_data['entry_enc'] = le_entry.fit_transform(vehicle_data['entry_zone_id'])
vehicle_data['exit_enc'] = le_exit.fit_transform(vehicle_data['exit_zone_id'])

# üß† CHOOSE FEATURES CAREFULLY TO REDUCE LEAKAGE
# Removed 'entry_enc' to avoid direct mapping to 'exit_enc'
features = ['direction_enc', 'most_frequent_lane', 'duration', 'vehicle_type_enc']
X = vehicle_data[features].values
y = vehicle_data['exit_enc'].values

# üé≤ Add Gaussian noise to simulate real-world imperfection
np.random.seed(42)
noise = np.random.normal(0, 0.3, X.shape)
X_noisy = X + noise

# üîß Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_noisy)

# ‚úÇÔ∏è Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# üöÄ Train XGBoost Classifier
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', 
                      n_estimators=50, max_depth=3, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

# üß™ Test & Evaluate
preds = model.predict(X_test)
acc = accuracy_score(y_test, preds)
report = classification_report(y_test, preds, target_names=le_exit.classes_.astype(str))

# üìä Show results
print(f"\nüìâ Realistic Accuracy: {acc:.4f}")
print("üìä Classification Report:\n", report)



üìâ Realistic Accuracy: 1.0000
üìä Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         5
           3       1.00      1.00      1.00         5

    accuracy                           1.00        14
   macro avg       1.00      1.00      1.00        14
weighted avg       1.00      1.00      1.00        14





In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load Data
vehicle_data = pd.read_csv("vehicle_summary_with_50_extra_rows.csv")

# Encoding categorical features
le_vehicle_type = LabelEncoder()
le_direction = LabelEncoder()
le_entry = LabelEncoder()
le_exit = LabelEncoder()

vehicle_data['vehicle_type_enc'] = le_vehicle_type.fit_transform(vehicle_data['vehicle_type'])
vehicle_data['direction_enc'] = le_direction.fit_transform(vehicle_data['direction'])
vehicle_data['entry_enc'] = le_entry.fit_transform(vehicle_data['entry_zone_id'])
vehicle_data['exit_enc'] = le_exit.fit_transform(vehicle_data['exit_zone_id'])

# ‚ö†Ô∏è Remove entry_enc to avoid label leakage
features = ['direction_enc', 'most_frequent_lane', 'duration', 'vehicle_type_enc']

X = vehicle_data[features].values
y = vehicle_data['exit_enc'].values

# üé≤ Add Gaussian noise to simulate real-world imperfection
np.random.seed(42)
noise = np.random.normal(0, 0.3, X.shape)
X_noisy = X + noise

# Normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_noisy)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define models with grid search params
models = {
    "Random Forest": {
        "model": RandomForestClassifier(),
        "params": {
            "n_estimators": [50, 100],
            "max_depth": [5, 10]  # Reduce capacity
        }
    },
    "XGBoost": {
        "model": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
        "params": {
            "n_estimators": [50, 100],
            "max_depth": [3, 5],  # Reduce complexity
            "learning_rate": [0.05, 0.1]
        }
    },
    "Gradient Boosting": {
        "model": GradientBoostingClassifier(),
        "params": {
            "n_estimators": [50, 100],
            "learning_rate": [0.05, 0.1],
            "max_depth": [3, 5]
        }
    },
    "Logistic Regression": {
        "model": LogisticRegression(max_iter=1000),
        "params": {
            "C": [0.1, 1],
            "solver": ["liblinear", "lbfgs"]
        }
    },
    "MLP Classifier": {
        "model": MLPClassifier(max_iter=1000),
        "params": {
            "hidden_layer_sizes": [(64,), (128,)],
            "activation": ['relu', 'tanh'],
            "alpha": [0.001, 0.01]  # Increase regularization
        }
    }
}

results = {}

# Train and evaluate
for name, mp in models.items():
    print(f"\nüîç Tuning and Training: {name}")
    clf = GridSearchCV(mp['model'], mp['params'], cv=5, scoring='accuracy', n_jobs=-1)
    clf.fit(X_train, y_train)
    
    best_model = clf.best_estimator_
    preds = best_model.predict(X_test)
    
    acc = accuracy_score(y_test, preds)
    report = classification_report(y_test, preds, target_names=le_exit.classes_.astype(str))
    
    results[name] = {
        "accuracy": acc,
        "classification_report": report,
        "best_params": clf.best_params_
    }

# Display results
print("\nüìà Model Comparison Results:\n")
for model_name, res in results.items():
    print(f"=== {model_name} ===")
    print(f"‚úÖ Accuracy: {res['accuracy']:.4f}")
    print(f"üõ†Ô∏è Best Params: {res['best_params']}")
    print(f"üìä Classification Report:\n{res['classification_report']}")
    print("="*40)





üîç Tuning and Training: Random Forest

üîç Tuning and Training: XGBoost





üîç Tuning and Training: Gradient Boosting

üîç Tuning and Training: Logistic Regression

üîç Tuning and Training: MLP Classifier

üìà Model Comparison Results:

=== Random Forest ===
‚úÖ Accuracy: 1.0000
üõ†Ô∏è Best Params: {'max_depth': 5, 'n_estimators': 50}
üìä Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         5
           3       1.00      1.00      1.00         5

    accuracy                           1.00        14
   macro avg       1.00      1.00      1.00        14
weighted avg       1.00      1.00      1.00        14

=== XGBoost ===
‚úÖ Accuracy: 1.0000
üõ†Ô∏è Best Params: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
üìä Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1 