In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

In [2]:
# Load dataset
df = pd.read_csv("Traffic.csv")

# view the head
df.head(10)

Unnamed: 0,Time,Date,Day of the week,CarCount,BikeCount,BusCount,TruckCount,Total,Traffic Situation
0,12:00:00 AM,10,Tuesday,31,0,4,4,39,low
1,12:15:00 AM,10,Tuesday,49,0,3,3,55,low
2,12:30:00 AM,10,Tuesday,46,0,3,6,55,low
3,12:45:00 AM,10,Tuesday,51,0,2,5,58,low
4,1:00:00 AM,10,Tuesday,57,6,15,16,94,normal
5,1:15:00 AM,10,Tuesday,44,0,5,4,53,low
6,1:30:00 AM,10,Tuesday,37,0,1,4,42,low
7,1:45:00 AM,10,Tuesday,42,4,4,5,55,low
8,2:00:00 AM,10,Tuesday,51,0,9,7,67,low
9,2:15:00 AM,10,Tuesday,34,0,4,7,45,low


In [3]:
# view the tail
df.tail(10)

Unnamed: 0,Time,Date,Day of the week,CarCount,BikeCount,BusCount,TruckCount,Total,Traffic Situation
2966,9:30:00 PM,9,Thursday,106,18,13,27,164,high
2967,9:45:00 PM,9,Thursday,107,14,10,18,149,normal
2968,10:00:00 PM,9,Thursday,17,4,1,20,42,normal
2969,10:15:00 PM,9,Thursday,11,4,1,30,46,normal
2970,10:30:00 PM,9,Thursday,11,4,0,23,38,normal
2971,10:45:00 PM,9,Thursday,16,3,1,36,56,normal
2972,11:00:00 PM,9,Thursday,11,0,1,30,42,normal
2973,11:15:00 PM,9,Thursday,15,4,1,25,45,normal
2974,11:30:00 PM,9,Thursday,16,5,0,27,48,normal
2975,11:45:00 PM,9,Thursday,14,3,1,15,33,normal


In [4]:
# view the shape of the data

print("The shape of the data is: ", df.shape)

The shape of the data is:  (2976, 9)


In [5]:
# view the info of the data
print("the info of the data is:",df.info)

the info of the data is: <bound method DataFrame.info of              Time  Date Day of the week  CarCount  BikeCount  BusCount  \
0     12:00:00 AM    10         Tuesday        31          0         4   
1     12:15:00 AM    10         Tuesday        49          0         3   
2     12:30:00 AM    10         Tuesday        46          0         3   
3     12:45:00 AM    10         Tuesday        51          0         2   
4      1:00:00 AM    10         Tuesday        57          6        15   
...           ...   ...             ...       ...        ...       ...   
2971  10:45:00 PM     9        Thursday        16          3         1   
2972  11:00:00 PM     9        Thursday        11          0         1   
2973  11:15:00 PM     9        Thursday        15          4         1   
2974  11:30:00 PM     9        Thursday        16          5         0   
2975  11:45:00 PM     9        Thursday        14          3         1   

      TruckCount  Total Traffic Situation  
0         

In [6]:
# view the data type of the data
df.dtypes

Time                 object
Date                  int64
Day of the week      object
CarCount              int64
BikeCount             int64
BusCount              int64
TruckCount            int64
Total                 int64
Traffic Situation    object
dtype: object

##### Data cleaning and preprocessing

In [7]:
# Convert Time to proper time format
df['Time'] = pd.to_datetime(df['Time'], format='%I:%M:%S %p').dt.time

# Create artificial sequential datetime index
start_date = pd.to_datetime("2024-01-01")

df['Datetime'] = pd.date_range(
    start=start_date,
    periods=len(df),
    freq='H'
)

df.drop(['Date', 'Time'], axis=1, inplace=True)
 
# Extract the hour, day, and the month from Datetime
df['year'] = df['Datetime'].dt.year
df['month'] = df['Datetime'].dt.month
df['day'] = df['Datetime'].dt.day
df['hour'] = df['Datetime'].dt.hour
df['dayofweek'] = df['Datetime'].dt.dayofweek

df.drop(columns=['Datetime'], inplace=True)
df.drop(columns=['Day of the week'], inplace=True)

  df['Datetime'] = pd.date_range(


In [8]:
df.dtypes

CarCount              int64
BikeCount             int64
BusCount              int64
TruckCount            int64
Total                 int64
Traffic Situation    object
year                  int32
month                 int32
day                   int32
hour                  int32
dayofweek             int32
dtype: object

#### Features Encoding

In [None]:
# Encode categorical feature
df['Traffic Situation'] = df['Traffic Situation'].astype('category').cat.codes
#df['Day of the week'] = df['Day of the week'].astype('category').cat.codes                      

X = df[['CarCount','BikeCount','BusCount','TruckCount','Total','year',"month",'day', 'hour', 'dayofweek']]
y = df['Traffic Situation']

In [11]:
#view the X
X

Unnamed: 0,CarCount,BikeCount,BusCount,TruckCount,Total,year,month,day,hour,dayofweek
0,31,0,4,4,39,2024,1,1,0,0
1,49,0,3,3,55,2024,1,1,1,0
2,46,0,3,6,55,2024,1,1,2,0
3,51,0,2,5,58,2024,1,1,3,0
4,57,6,15,16,94,2024,1,1,4,0
...,...,...,...,...,...,...,...,...,...,...
2971,16,3,1,36,56,2024,5,3,19,4
2972,11,0,1,30,42,2024,5,3,20,4
2973,15,4,1,25,45,2024,5,3,21,4
2974,16,5,0,27,48,2024,5,3,22,4


In [12]:
#  view the y
y

0       2
1       2
2       2
3       2
4       3
       ..
2971    3
2972    3
2973    3
2974    3
2975    3
Name: Traffic Situation, Length: 2976, dtype: int8

In [13]:
# view  shape of X, and y
shape_of_X = X
shape_of_y = y

print("Shape of the X is now:", shape_of_X.shape)
print("Shape of the y is now:", shape_of_y.shape)


Shape of the X is now: (2976, 10)
Shape of the y is now: (2976,)


In [14]:
# reassign X and y
X = shape_of_X
y = shape_of_y

In [15]:
# splitting the data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# RandomForestClassifier ---> ML Algorithm
model = RandomForestClassifier(n_estimators=200)
model.fit(X_train, y_train)

# validation 
pred = model.predict(X_test)


# evaluation
acc = accuracy_score(y_test, pred)


# print the model accuracy
print("Model Accuracy:", acc)


# save the model 
joblib.dump(model, "traffic_model.pkl")

Model Accuracy: 0.9899328859060402


['traffic_model.pkl']