# Metro Interstate Traffic Volume Prediction
### Author : Farbodkhm

## Importing libraries

In [164]:
import numpy as np
import pandas as pd
from prettytable import PrettyTable
import warnings
warnings.filterwarnings("ignore")
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import r2_score, explained_variance_score, max_error, mean_squared_error, mean_absolute_error

## Importing the dataset

In [165]:
data = pd.read_csv('Metro_Interstate_Traffic_Volume.csv')

In [166]:
data.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,date_time,traffic_volume
0,,288.28,0.0,0.0,40,Clouds,scattered clouds,2012-10-02 09:00:00,5545
1,,289.36,0.0,0.0,75,Clouds,broken clouds,2012-10-02 10:00:00,4516
2,,289.58,0.0,0.0,90,Clouds,overcast clouds,2012-10-02 11:00:00,4767
3,,290.13,0.0,0.0,90,Clouds,overcast clouds,2012-10-02 12:00:00,5026
4,,291.14,0.0,0.0,75,Clouds,broken clouds,2012-10-02 13:00:00,4918


In [167]:
data.tail()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,date_time,traffic_volume
48199,,283.45,0.0,0.0,75,Clouds,broken clouds,2018-09-30 19:00:00,3543
48200,,282.76,0.0,0.0,90,Clouds,overcast clouds,2018-09-30 20:00:00,2781
48201,,282.73,0.0,0.0,90,Thunderstorm,proximity thunderstorm,2018-09-30 21:00:00,2159
48202,,282.09,0.0,0.0,90,Clouds,overcast clouds,2018-09-30 22:00:00,1450
48203,,282.12,0.0,0.0,90,Clouds,overcast clouds,2018-09-30 23:00:00,954


In [168]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48204 entries, 0 to 48203
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   holiday              48204 non-null  object 
 1   temp                 48204 non-null  float64
 2   rain_1h              48204 non-null  float64
 3   snow_1h              48204 non-null  float64
 4   clouds_all           48204 non-null  int64  
 5   weather_main         48204 non-null  object 
 6   weather_description  48204 non-null  object 
 7   date_time            48204 non-null  object 
 8   traffic_volume       48204 non-null  int64  
dtypes: float64(3), int64(2), object(4)
memory usage: 3.3+ MB


In [169]:
data['traffic_volume'].describe()

count    48204.000000
mean      3259.818355
std       1986.860670
min          0.000000
25%       1193.000000
50%       3380.000000
75%       4933.000000
max       7280.000000
Name: traffic_volume, dtype: float64

## Transforming Categorical Data to Numerical

#### We are using LabelEncoder in order to assign a numerical value for each category

In [170]:
le = LabelEncoder()

In [171]:
Holiday_labels = le.fit_transform(data['holiday'])
{index: label for index , label in enumerate(le.classes_)}

{0: 'Christmas Day',
 1: 'Columbus Day',
 2: 'Independence Day',
 3: 'Labor Day',
 4: 'Martin Luther King Jr Day',
 5: 'Memorial Day',
 6: 'New Years Day',
 7: 'None',
 8: 'State Fair',
 9: 'Thanksgiving Day',
 10: 'Veterans Day',
 11: 'Washingtons Birthday'}

In [172]:
Weather_main = le.fit_transform(data['weather_main'])
{index: label for index , label in enumerate(le.classes_)}

{0: 'Clear',
 1: 'Clouds',
 2: 'Drizzle',
 3: 'Fog',
 4: 'Haze',
 5: 'Mist',
 6: 'Rain',
 7: 'Smoke',
 8: 'Snow',
 9: 'Squall',
 10: 'Thunderstorm'}

In [173]:
Weather_description = le.fit_transform(data['weather_description'])
{index: label for index , label in enumerate(le.classes_)}

{0: 'SQUALLS',
 1: 'Sky is Clear',
 2: 'broken clouds',
 3: 'drizzle',
 4: 'few clouds',
 5: 'fog',
 6: 'freezing rain',
 7: 'haze',
 8: 'heavy intensity drizzle',
 9: 'heavy intensity rain',
 10: 'heavy snow',
 11: 'light intensity drizzle',
 12: 'light intensity shower rain',
 13: 'light rain',
 14: 'light rain and snow',
 15: 'light shower snow',
 16: 'light snow',
 17: 'mist',
 18: 'moderate rain',
 19: 'overcast clouds',
 20: 'proximity shower rain',
 21: 'proximity thunderstorm',
 22: 'proximity thunderstorm with drizzle',
 23: 'proximity thunderstorm with rain',
 24: 'scattered clouds',
 25: 'shower drizzle',
 26: 'shower snow',
 27: 'sky is clear',
 28: 'sleet',
 29: 'smoke',
 30: 'snow',
 31: 'thunderstorm',
 32: 'thunderstorm with drizzle',
 33: 'thunderstorm with heavy rain',
 34: 'thunderstorm with light drizzle',
 35: 'thunderstorm with light rain',
 36: 'thunderstorm with rain',
 37: 'very heavy rain'}

In [174]:
data['holiday'] = le.fit_transform(data['holiday'])
data['weather_main'] = le.fit_transform(data['weather_main'])
data['weather_description'] = le.fit_transform(data['weather_description'])
data.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,date_time,traffic_volume
0,7,288.28,0.0,0.0,40,1,24,2012-10-02 09:00:00,5545
1,7,289.36,0.0,0.0,75,1,2,2012-10-02 10:00:00,4516
2,7,289.58,0.0,0.0,90,1,19,2012-10-02 11:00:00,4767
3,7,290.13,0.0,0.0,90,1,19,2012-10-02 12:00:00,5026
4,7,291.14,0.0,0.0,75,1,2,2012-10-02 13:00:00,4918


In [175]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48204 entries, 0 to 48203
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   holiday              48204 non-null  int32  
 1   temp                 48204 non-null  float64
 2   rain_1h              48204 non-null  float64
 3   snow_1h              48204 non-null  float64
 4   clouds_all           48204 non-null  int64  
 5   weather_main         48204 non-null  int32  
 6   weather_description  48204 non-null  int32  
 7   date_time            48204 non-null  object 
 8   traffic_volume       48204 non-null  int64  
dtypes: float64(3), int32(3), int64(2), object(1)
memory usage: 2.8+ MB


#### In order to transform this format of date & time, we will separate them as four integer numbers!

In [176]:
data['date_time'] =  pd.to_datetime(data['date_time'])

In [177]:
data['year'] = pd.DatetimeIndex(data['date_time']).year
data['month'] = pd.DatetimeIndex(data['date_time']).month
data['day'] = pd.DatetimeIndex(data['date_time']).day
data['hour'] = pd.DatetimeIndex(data['date_time']).hour

In [178]:
data.pop('date_time')
data.insert(11, 'traffic_volume', data.pop('traffic_volume'))
data.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,year,month,day,hour,traffic_volume
0,7,288.28,0.0,0.0,40,1,24,2012,10,2,9,5545
1,7,289.36,0.0,0.0,75,1,2,2012,10,2,10,4516
2,7,289.58,0.0,0.0,90,1,19,2012,10,2,11,4767
3,7,290.13,0.0,0.0,90,1,19,2012,10,2,12,5026
4,7,291.14,0.0,0.0,75,1,2,2012,10,2,13,4918


## Split Train & Test Data

In [179]:
x = data.iloc[:, :-1].values

In [180]:
y = data.iloc[:, -1].values.reshape(-1, 1)

In [181]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

## Feature Scaling


In [182]:
sc_x = StandardScaler()
x_train = sc_x.fit_transform(x_train)
x_test = sc_x.transform(x_test)

In [183]:
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)
y_test = sc_y.transform(y_test)

## Training the Model

In [184]:
# Bagging Regressor
baggingRegressor = BaggingRegressor(n_estimators=50, random_state=0)
baggingRegressor.fit(x_train, y_train)
y_pred = baggingRegressor.predict(x_test).reshape(-1, 1)

## Evaluation

In [185]:
def Evaluation(y_test, y_pred):
    tabel_parameter = PrettyTable(['Metric', 'Value'])
    tabel_parameter.add_row(['Explained Variance Score','{:.10}'.format(explained_variance_score(y_test, y_pred))])
    tabel_parameter.add_row(['R^2', '{:.10}'.format(r2_score(y_test, y_pred))])
    tabel_parameter.add_row(['Max Error', '{:.10}'.format(max_error(y_test, y_pred))])
    tabel_parameter.add_row(['Mean Absolute Error', '{:.10}'.format(mean_absolute_error(y_test, y_pred))])
    tabel_parameter.add_row(['Mean Squared Error', '{:.10}'.format(mean_squared_error(y_test, y_pred))])
    print(tabel_parameter)

Evaluation(y_test, y_pred)

+--------------------------+--------------+
|          Metric          |    Value     |
+--------------------------+--------------+
| Explained Variance Score | 0.8331692994 |
|           R^2            | 0.8331315392 |
|        Max Error         | 2.383399178  |
|   Mean Absolute Error    | 0.2605022681 |
|    Mean Squared Error    | 0.1649302947 |
+--------------------------+--------------+


In [186]:
y_pred = sc_y.inverse_transform(baggingRegressor.predict(x_test).reshape(-1,1))
np.set_printoptions(precision=2)
np.set_printoptions(threshold=10, suppress=True)
result = np.concatenate((y_pred.reshape(len(y_pred),1), sc_y.inverse_transform(y_test.reshape(len(y_test),1))),1)
print(result)

[[5304.04 5875.  ]
 [2575.6  2868.  ]
 [ 510.58  557.  ]
 ...
 [3052.1  3136.  ]
 [1885.26 2624.  ]
 [ 571.58  624.  ]]


#### Save the results if you wish!

In [187]:
np.set_printoptions(threshold=np.inf)
f = open('BaggingRegressorResults.txt', 'w')
f.write(str(result))
f.close()