In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [3]:
df=pd.read_csv('Banglore_traffic_Dataset.csv')

In [6]:
df

Unnamed: 0,Date,Area Name,Road/Intersection Name,Traffic Volume,Average Speed,Travel Time Index,Congestion Level,Road Capacity Utilization,Incident Reports,Environmental Impact,Public Transport Usage,Traffic Signal Compliance,Parking Usage,Pedestrian and Cyclist Count,Weather Conditions,Roadwork and Construction Activity
0,2022-01-01,Indiranagar,100 Feet Road,50590,50.230299,1.500000,100.000000,100.000000,0,151.180,70.632330,84.044600,85.403629,111,Clear,No
1,2022-01-01,Indiranagar,CMH Road,30825,29.377125,1.500000,100.000000,100.000000,1,111.650,41.924899,91.407038,59.983689,100,Clear,No
2,2022-01-01,Whitefield,Marathahalli Bridge,7399,54.474398,1.039069,28.347994,36.396525,0,64.798,44.662384,61.375541,95.466020,189,Clear,No
3,2022-01-01,Koramangala,Sony World Junction,60874,43.817610,1.500000,100.000000,100.000000,1,171.748,32.773123,75.547092,63.567452,111,Clear,No
4,2022-01-01,Koramangala,Sarjapur Road,57292,41.116763,1.500000,100.000000,100.000000,3,164.584,35.092601,64.634762,93.155171,104,Clear,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8931,2024-08-09,Electronic City,Hosur Road,11387,23.440276,1.262384,35.871483,57.354487,1,72.774,21.523289,83.530352,97.898279,211,Fog,No
8932,2024-08-09,M.G. Road,Trinity Circle,36477,45.168429,1.500000,100.000000,100.000000,3,122.954,29.822312,60.738488,60.355967,95,Clear,No
8933,2024-08-09,M.G. Road,Anil Kumble Circle,42822,22.028609,1.500000,100.000000,100.000000,1,135.644,43.185905,85.321627,61.333731,110,Clear,No
8934,2024-08-09,Jayanagar,South End Circle,20540,52.254798,1.020520,72.639152,97.845527,2,91.080,44.416043,89.586947,79.197198,94,Clear,No


In [8]:
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df['Day of Week'] = df['Date'].dt.day_name()

In [10]:
# Create Target Column (Traffic Status: 1 = Traffic, 0 = No Traffic)
#traffic_threshold = 50  # Adjust based on data analysis
#df['Traffic Status'] = (df['Congestion Level'] > traffic_threshold).astype(int)
median_threshold = df['Congestion Level'].median()
df['Traffic Status'] = (df['Congestion Level'] > median_threshold).astype(int)


In [12]:
# Drop Unnecessary Columns
drop_columns = ['Date', 'Congestion Level']
df = df.drop(columns=drop_columns)

In [14]:
# Encode Categorical Features
categorical_cols = ['Area Name', 'Road/Intersection Name', 'Day of Week','Weather Conditions','Roadwork and Construction Activity']
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Save the encoder
 # Save encoders for later use

In [16]:
# Split Data
X = df.drop(columns=['Traffic Status'])
y = df['Traffic Status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
# Train Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [20]:
# Evaluate Model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print(classification_report(y_test, y_pred))

Model Accuracy: 95.30%
              precision    recall  f1-score   support

           0       0.95      0.96      0.95       880
           1       0.96      0.95      0.95       908

    accuracy                           0.95      1788
   macro avg       0.95      0.95      0.95      1788
weighted avg       0.95      0.95      0.95      1788



In [22]:
# Save Model
joblib.dump(model, "bengaluru_traffic_model.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")


['label_encoders.pkl']

In [24]:
def predict_traffic(area_name, day_of_week):
    # Load Model and Encoders
    model = joblib.load("bengaluru_traffic_model.pkl")
    label_encoders = joblib.load("label_encoders.pkl")
    
    # Get the feature names used during training
    feature_names = model.feature_names_in_  # Extract feature names from the trained model
    
    # Create a Sample Input DataFrame with all required features initialized to 0
    input_data = pd.DataFrame(0, index=[0], columns=feature_names)
    
    # Encode Area Name
    if area_name in label_encoders['Area Name'].classes_:
        input_data['Area Name'] = label_encoders['Area Name'].transform([area_name])[0]
    else:
        raise ValueError(f"Error: Area '{area_name}' not found in training data. Please check the input area name.")
    
    # Encode the Day of the Week
    if day_of_week in label_encoders['Day of Week'].classes_:
        input_data['Day of Week'] = label_encoders['Day of Week'].transform([day_of_week])[0]
    else:
        raise ValueError("Error: Invalid day of the week.")
    
    # Predict
    prediction = model.predict(input_data)
    return "Traffic" if prediction[0] == 1 else "No Traffic"

In [26]:
area_name = "Electronic City"
day_of_week = "Tuesday"
result = predict_traffic(area_name, day_of_week)
print(f"Traffic prediction for {area_name} on {day_of_week}: {result}")

Traffic prediction for Electronic City on Tuesday: No Traffic
