In [10]:
import pandas as pd
df = pd.read_csv(r'notebook\traffic_dataset.csv')
df.head()


Unnamed: 0,road,frc,currentSpeed,freeFlowSpeed,currentTravelTime,freeFlowTravelTime,confidence,roadClosure,latitude,longitude,Day,Hour,minute,Delay ratio,Delay,Traffic level
0,8,0,39,39,918,918,1.0,0,19.25,73.05,27,18,30,0.0,0,Low
1,8,3,23,23,190,190,1.0,0,19.25,72.98,27,18,30,0.0,0,Low
2,0,3,6,10,6,3,1.0,0,19.2183,72.9781,27,18,30,1.0,3,High
3,0,3,19,19,148,148,1.0,0,19.076,72.8777,27,18,30,0.0,0,Low
4,1,3,21,21,228,228,1.0,0,19.196,72.96,27,18,30,0.0,0,Low


In [11]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Traffic level'] = le.fit_transform(df['Traffic level'])
df.head()

Unnamed: 0,road,frc,currentSpeed,freeFlowSpeed,currentTravelTime,freeFlowTravelTime,confidence,roadClosure,latitude,longitude,Day,Hour,minute,Delay ratio,Delay,Traffic level
0,8,0,39,39,918,918,1.0,0,19.25,73.05,27,18,30,0.0,0,1
1,8,3,23,23,190,190,1.0,0,19.25,72.98,27,18,30,0.0,0,1
2,0,3,6,10,6,3,1.0,0,19.2183,72.9781,27,18,30,1.0,3,0
3,0,3,19,19,148,148,1.0,0,19.076,72.8777,27,18,30,0.0,0,1
4,1,3,21,21,228,228,1.0,0,19.196,72.96,27,18,30,0.0,0,1


In [24]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier


# Defining features and target
x = df[['road','frc','currentSpeed','freeFlowSpeed','currentTravelTime','freeFlowTravelTime','confidence',
        'roadClosure','latitude','longitude', 'Day', 'Hour', 'minute']]
y = df['Traffic level']

# Train test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

#Trainning model
model = RandomForestClassifier()
model.fit(x_train,y_train)

#prediction
y_pred = model.predict(x_test)

#evaluating with model performance
print('Accuracy: ',accuracy_score(y_test,y_pred))
print('Classification report',classification_report(y_test,y_pred))

Accuracy:  0.98
Classification report               precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       0.99      0.98      0.98       129
           2       0.95      0.98      0.97        57

    accuracy                           0.98       200
   macro avg       0.98      0.99      0.98       200
weighted avg       0.98      0.98      0.98       200



In [26]:
#hyperparameter tunning
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [5, 10, 15],           
    'min_samples_split': [2, 5, 10],    
    'min_samples_leaf': [1, 4, 8]       
}
grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5)
grid.fit(x_train, y_train)
y_pred = grid.predict(x_test)





print("Best Parameters:", grid.best_params_)

print("Test Accuracy:",accuracy_score(y_test,y_pred))
print('Classification report',classification_report(y_test,y_pred))


Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Test Accuracy: 0.98
Classification report               precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       0.99      0.98      0.98       129
           2       0.95      0.98      0.97        57

    accuracy                           0.98       200
   macro avg       0.98      0.99      0.98       200
weighted avg       0.98      0.98      0.98       200

