## Introduction
- Analyzing traffic data

### Resources
- [Traffic Dataset](https://www.kaggle.com/datasets/hasibullahaman/traffic-prediction-dataset)

### Credits
- [Hasibullah Aman ](https://www.kaggle.com/hasibullahaman)


In [96]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [97]:
# reading csv file, creating a dataframe named `df`
df = pd.read_csv('./dataset/Traffic.csv')

# About data
df.head()

Unnamed: 0,Time,Date,Day of the week,CarCount,BikeCount,BusCount,TruckCount,Total,Traffic Situation
0,12:00:00 AM,10,Tuesday,31,0,4,4,39,low
1,12:15:00 AM,10,Tuesday,49,0,3,3,55,low
2,12:30:00 AM,10,Tuesday,46,0,3,6,55,low
3,12:45:00 AM,10,Tuesday,51,0,2,5,58,low
4,1:00:00 AM,10,Tuesday,57,6,15,16,94,normal


In [98]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2976 entries, 0 to 2975
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Time               2976 non-null   object
 1   Date               2976 non-null   int64 
 2   Day of the week    2976 non-null   object
 3   CarCount           2976 non-null   int64 
 4   BikeCount          2976 non-null   int64 
 5   BusCount           2976 non-null   int64 
 6   TruckCount         2976 non-null   int64 
 7   Total              2976 non-null   int64 
 8   Traffic Situation  2976 non-null   object
dtypes: int64(6), object(3)
memory usage: 209.4+ KB


In [99]:
df.describe()

Unnamed: 0,Date,CarCount,BikeCount,BusCount,TruckCount,Total
count,2976.0,2976.0,2976.0,2976.0,2976.0,2976.0
mean,16.0,68.696573,14.917339,15.27957,15.324933,114.218414
std,8.945775,45.850693,12.847518,14.341986,10.603833,60.190627
min,1.0,6.0,0.0,0.0,0.0,21.0
25%,8.0,19.0,5.0,1.0,6.0,55.0
50%,16.0,64.0,12.0,12.0,14.0,109.0
75%,24.0,107.0,22.0,25.0,23.0,164.0
max,31.0,180.0,70.0,50.0,40.0,279.0


In [100]:
# Checking for null values
df.isnull().sum()

Time                 0
Date                 0
Day of the week      0
CarCount             0
BikeCount            0
BusCount             0
TruckCount           0
Total                0
Traffic Situation    0
dtype: int64

# Time Series Analysis

In [101]:
df['Time'] = pd.to_datetime(df['Time'])

In [102]:
df.groupby(df['Time'].dt.date)['Traffic Situation'].value_counts()

Time        Traffic Situation
2023-12-11  normal               1669
            heavy                 682
            high                  321
            low                   304
Name: Traffic Situation, dtype: int64

In [103]:
# Day-Wise Traffic situation
df.groupby(df['Day of the week'])['Traffic Situation'].value_counts()

Day of the week  Traffic Situation
Friday           normal               170
                 low                  118
                 heavy                 96
Monday           normal               214
                 heavy                 86
                 high                  48
                 low                   36
Saturday         normal               234
                 heavy                 88
                 high                  39
                 low                   23
Sunday           normal               213
                 heavy                 81
                 high                  62
                 low                   28
Thursday         normal               282
                 heavy                108
                 high                  58
                 low                   32
Tuesday          normal               276
                 heavy                112
                 high                  55
                 low                   37

In [104]:
#Converting target variable 'Traffic Situation' into Numeric form
df['Traffic Situation'] = df['Traffic Situation'].replace({'low':0, 'normal':1, 
                                                              'heavy':2, 'high':3})                                                         

In [105]:
#Dealing with 'Day of the week' feature
df['Day of the week'].value_counts()

Tuesday      480
Wednesday    480
Thursday     480
Friday       384
Saturday     384
Sunday       384
Monday       384
Name: Day of the week, dtype: int64

In [106]:
#Converting Days of weak into Numeric form
df['Day of the week'] = df['Day of the week'].replace({'Monday':1,'Tuesday':2,
                                                          'Wednesday':3,'Thursday':4,
                                                          'Friday':5,'Saturday':6,
                                                          'Sunday':7})

In [107]:
df.head()

Unnamed: 0,Time,Date,Day of the week,CarCount,BikeCount,BusCount,TruckCount,Total,Traffic Situation
0,2023-12-11 00:00:00,10,2,31,0,4,4,39,0
1,2023-12-11 00:15:00,10,2,49,0,3,3,55,0
2,2023-12-11 00:30:00,10,2,46,0,3,6,55,0
3,2023-12-11 00:45:00,10,2,51,0,2,5,58,0
4,2023-12-11 01:00:00,10,2,57,6,15,16,94,1


In [108]:
#Dayswise number of vehicles and Traffic Situation
grouped_df = df.groupby(['Day of the week']).sum()
grouped_df

  grouped_df = df.groupby(['Day of the week']).sum()


Unnamed: 0_level_0,Date,CarCount,BikeCount,BusCount,TruckCount,Total,Traffic Situation
Day of the week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,7200,25893,5064,6206,6213,43376,530
2,8544,33426,6627,7816,7446,55315,665
3,6048,33424,6704,7943,7783,55854,679
4,6528,32704,6633,8027,7626,54990,672
5,6048,26478,8452,3154,3896,41980,362
6,6528,26446,5624,6151,6265,44486,527
7,6720,26070,5290,6175,6378,43913,561


In [109]:
# Changing number into alphabets
limit=50
df3=df.copy()
# Use numpy's where function to replace values based on conditions
df3['CarCount'] = np.where(df3['CarCount'] > limit, 'many', 'few')
df3['BikeCount'] = np.where(df3['BikeCount'] > limit, 'many', 'few')
df3['BusCount'] = np.where(df3['BusCount'] > limit, 'many', 'few')
df3['TruckCount'] = np.where(df3['TruckCount'] > limit, 'many', 'few')
# Print the updated DataFrame
df3.head()

Unnamed: 0,Time,Date,Day of the week,CarCount,BikeCount,BusCount,TruckCount,Total,Traffic Situation
0,2023-12-11 00:00:00,10,2,few,few,few,few,39,0
1,2023-12-11 00:15:00,10,2,few,few,few,few,55,0
2,2023-12-11 00:30:00,10,2,few,few,few,few,55,0
3,2023-12-11 00:45:00,10,2,many,few,few,few,58,0
4,2023-12-11 01:00:00,10,2,many,few,few,few,94,1
