# RNN to Predict Traffic Flow

## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

sns.set()

from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

## Data Pre-Processing

### Importing Data

In [2]:
dataset = pd.read_csv('../../Datasets/traffic_data.csv')

display(dataset)
display(dataset.info())

Unnamed: 0,traffic_volume,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,date_time
0,5545,,288.28,0.0,0.0,40,Clouds,scattered clouds,2/10/2012 9:00
1,4516,,289.36,0.0,0.0,75,Clouds,broken clouds,2/10/2012 10:00
2,4767,,289.58,0.0,0.0,90,Clouds,overcast clouds,2/10/2012 11:00
3,5026,,290.13,0.0,0.0,90,Clouds,overcast clouds,2/10/2012 12:00
4,4918,,291.14,0.0,0.0,75,Clouds,broken clouds,2/10/2012 13:00
...,...,...,...,...,...,...,...,...,...
48199,3543,,283.45,0.0,0.0,75,Clouds,broken clouds,30-09-2018 19:00
48200,2781,,282.76,0.0,0.0,90,Clouds,overcast clouds,30-09-2018 20:00
48201,2159,,282.73,0.0,0.0,90,Thunderstorm,proximity thunderstorm,30-09-2018 21:00
48202,1450,,282.09,0.0,0.0,90,Clouds,overcast clouds,30-09-2018 22:00


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48204 entries, 0 to 48203
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   traffic_volume       48204 non-null  int64  
 1   holiday              61 non-null     object 
 2   temp                 48204 non-null  float64
 3   rain_1h              48204 non-null  float64
 4   snow_1h              48204 non-null  float64
 5   clouds_all           48204 non-null  int64  
 6   weather_main         48204 non-null  object 
 7   weather_description  48204 non-null  object 
 8   date_time            48204 non-null  object 
dtypes: float64(3), int64(2), object(4)
memory usage: 3.3+ MB


None

### Converting to DateTime format

In [3]:
dataset['date_time'] = dataset['date_time'].astype(str).str.replace('/', '-', regex=False)
dataset['date_time'] = pd.to_datetime(dataset['date_time'], dayfirst=True)

dataset

Unnamed: 0,traffic_volume,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,date_time
0,5545,,288.28,0.0,0.0,40,Clouds,scattered clouds,2012-10-02 09:00:00
1,4516,,289.36,0.0,0.0,75,Clouds,broken clouds,2012-10-02 10:00:00
2,4767,,289.58,0.0,0.0,90,Clouds,overcast clouds,2012-10-02 11:00:00
3,5026,,290.13,0.0,0.0,90,Clouds,overcast clouds,2012-10-02 12:00:00
4,4918,,291.14,0.0,0.0,75,Clouds,broken clouds,2012-10-02 13:00:00
...,...,...,...,...,...,...,...,...,...
48199,3543,,283.45,0.0,0.0,75,Clouds,broken clouds,2018-09-30 19:00:00
48200,2781,,282.76,0.0,0.0,90,Clouds,overcast clouds,2018-09-30 20:00:00
48201,2159,,282.73,0.0,0.0,90,Thunderstorm,proximity thunderstorm,2018-09-30 21:00:00
48202,1450,,282.09,0.0,0.0,90,Clouds,overcast clouds,2018-09-30 22:00:00


### Breaking down DateTime into Year, Month, Date, Hour

In [4]:
dataset['year'] = dataset['date_time'].dt.year
dataset['month'] = dataset['date_time'].dt.month
dataset['day'] = dataset['date_time'].dt.dayofweek
dataset['hour'] = dataset['date_time'].dt.hour

### Dropping Unnecessary Columns

In [5]:
dataset.drop(columns=['date_time', 'weather_description'], inplace=True)

dataset

Unnamed: 0,traffic_volume,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,year,month,day,hour
0,5545,,288.28,0.0,0.0,40,Clouds,2012,10,1,9
1,4516,,289.36,0.0,0.0,75,Clouds,2012,10,1,10
2,4767,,289.58,0.0,0.0,90,Clouds,2012,10,1,11
3,5026,,290.13,0.0,0.0,90,Clouds,2012,10,1,12
4,4918,,291.14,0.0,0.0,75,Clouds,2012,10,1,13
...,...,...,...,...,...,...,...,...,...,...,...
48199,3543,,283.45,0.0,0.0,75,Clouds,2018,9,6,19
48200,2781,,282.76,0.0,0.0,90,Clouds,2018,9,6,20
48201,2159,,282.73,0.0,0.0,90,Thunderstorm,2018,9,6,21
48202,1450,,282.09,0.0,0.0,90,Clouds,2018,9,6,22


### Changing NA values in Holiday Column to "Non-Holiday Day"

In [6]:
dataset['holiday'] = dataset['holiday'].fillna('Non-Holiday Day')

display(dataset['holiday'].unique())
display(dataset['weather_main'].unique())

array(['Non-Holiday Day', 'Columbus Day', 'Veterans Day',
       'Thanksgiving Day', 'Christmas Day', 'New Years Day',
       'Washingtons Birthday', 'Memorial Day', 'Independence Day',
       'State Fair', 'Labor Day', 'Martin Luther King Jr Day'],
      dtype=object)

array(['Clouds', 'Clear', 'Rain', 'Drizzle', 'Mist', 'Haze', 'Fog',
       'Thunderstorm', 'Snow', 'Squall', 'Smoke'], dtype=object)

### Adding Cyclical Feature to Hour, Day of the Week, Month

In [7]:
dataset['hour_sin'] = np.sin(2 * np.pi * dataset['hour'] / 24)
dataset['hour_cos'] = np.cos(2 * np.pi * dataset['hour'] / 24)

dataset['weekday_sin'] = np.sin(2 * np.pi * dataset['day'] / 7)
dataset['weekday_cos'] = np.cos(2 * np.pi * dataset['day'] / 7)

dataset['month_sin'] = np.sin(2 * np.pi * dataset['month'] / 12)
dataset['month_cos'] = np.cos(2 * np.pi * dataset['month'] / 12)

dataset.drop(columns=['hour', 'day', 'month', 'year'], inplace=True)

dataset

Unnamed: 0,traffic_volume,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,hour_sin,hour_cos,weekday_sin,weekday_cos,month_sin,month_cos
0,5545,Non-Holiday Day,288.28,0.0,0.0,40,Clouds,7.071068e-01,-0.707107,0.781831,0.62349,-0.866025,5.000000e-01
1,4516,Non-Holiday Day,289.36,0.0,0.0,75,Clouds,5.000000e-01,-0.866025,0.781831,0.62349,-0.866025,5.000000e-01
2,4767,Non-Holiday Day,289.58,0.0,0.0,90,Clouds,2.588190e-01,-0.965926,0.781831,0.62349,-0.866025,5.000000e-01
3,5026,Non-Holiday Day,290.13,0.0,0.0,90,Clouds,1.224647e-16,-1.000000,0.781831,0.62349,-0.866025,5.000000e-01
4,4918,Non-Holiday Day,291.14,0.0,0.0,75,Clouds,-2.588190e-01,-0.965926,0.781831,0.62349,-0.866025,5.000000e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
48199,3543,Non-Holiday Day,283.45,0.0,0.0,75,Clouds,-9.659258e-01,0.258819,-0.781831,0.62349,-1.000000,-1.836970e-16
48200,2781,Non-Holiday Day,282.76,0.0,0.0,90,Clouds,-8.660254e-01,0.500000,-0.781831,0.62349,-1.000000,-1.836970e-16
48201,2159,Non-Holiday Day,282.73,0.0,0.0,90,Thunderstorm,-7.071068e-01,0.707107,-0.781831,0.62349,-1.000000,-1.836970e-16
48202,1450,Non-Holiday Day,282.09,0.0,0.0,90,Clouds,-5.000000e-01,0.866025,-0.781831,0.62349,-1.000000,-1.836970e-16
