In [1]:
#Import necessary libraries
import pandas as pd
import numpy as np  
from datetime import datetime, timedelta
pd.options.mode.chained_assignment = None
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [2]:
#Read in all csv files into dataframes
all_temperatures = pd.read_csv('./traffic_datasets/temperature.csv')
all_humidity = pd.read_csv('./traffic_datasets/humidity.csv')
all_pressure = pd.read_csv('./traffic_datasets/pressure.csv')
all_weather_desc = pd.read_csv('./traffic_datasets/weather_description.csv')
all_wind_speed = pd.read_csv('./traffic_datasets/wind_speed.csv')

historical_traffic_data = pd.read_csv('./traffic_datasets/chicago_historical_traffic_data.csv') 
segment_info = pd.read_csv('./traffic_datasets/chicago_traffic_segments.csv') 

In [3]:
#Isolate Chicago information from weather datasets 
chicago_temperature = all_temperatures[["datetime","Chicago"]]
chicago_humidity = all_humidity[["datetime","Chicago"]]
chicago_pressure = all_pressure[["datetime","Chicago"]]
chicago_weather_desc = all_weather_desc[["datetime","Chicago"]]
chicago_wind_speed = all_wind_speed[["datetime","Chicago"]]

chicago_temperature.rename(columns={"datetime":"dt1","Chicago": "temperature"}, inplace=True)
chicago_humidity.rename(columns={"Chicago": "humidity"}, inplace=True)
chicago_pressure.rename(columns={"Chicago": "pressure"}, inplace=True)
chicago_weather_desc.rename(columns={"Chicago": "weather_desc"}, inplace=True)
chicago_wind_speed.rename(columns={"Chicago": "wind_speed"}, inplace=True)

In [4]:
#Join all weather datasets into one and process
concat_raw = pd.concat([chicago_temperature, chicago_humidity, chicago_pressure, chicago_weather_desc, chicago_wind_speed], axis=1)

weather_information = concat_raw[['dt1','temperature','humidity','pressure','weather_desc','wind_speed']]
weather_information.rename(columns={"dt1":"datetime"}, inplace=True)

weather_information["datetime"] = pd.to_datetime(weather_information["datetime"])

weather_information['Year'] = pd.DatetimeIndex(weather_information['datetime']).year
weather_information['Month'] = pd.DatetimeIndex(weather_information['datetime']).month
weather_information['Day'] = pd.DatetimeIndex(weather_information['datetime']).day
weather_information['Hour'] = pd.DatetimeIndex(weather_information['datetime']).hour

weather_information = pd.get_dummies(weather_information)

In [5]:
weather_information.head()

Unnamed: 0,datetime,temperature,humidity,pressure,wind_speed,Year,Month,Day,Hour,weather_desc_broken clouds,...,weather_desc_smoke,weather_desc_snow,weather_desc_squalls,weather_desc_thunderstorm,weather_desc_thunderstorm with drizzle,weather_desc_thunderstorm with heavy rain,weather_desc_thunderstorm with light drizzle,weather_desc_thunderstorm with light rain,weather_desc_thunderstorm with rain,weather_desc_very heavy rain
0,2012-10-01 12:00:00,,,,,2012,10,1,12,0,...,0,0,0,0,0,0,0,0,0,0
1,2012-10-01 13:00:00,284.01,71.0,1014.0,0.0,2012,10,1,13,0,...,0,0,0,0,0,0,0,0,0,0
2,2012-10-01 14:00:00,284.054691,70.0,1014.0,0.0,2012,10,1,14,0,...,0,0,0,0,0,0,0,0,0,0
3,2012-10-01 15:00:00,284.177412,70.0,1014.0,0.0,2012,10,1,15,0,...,0,0,0,0,0,0,0,0,0,0
4,2012-10-01 16:00:00,284.300133,70.0,1014.0,0.0,2012,10,1,16,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
#Clean and process Traffic Data
traffic_data = historical_traffic_data.loc[historical_traffic_data["SPEED"] > 0]

In [7]:
#Clean and process Traffic Data
#traffic_data = traffic_data.loc[traffic_data["Year"] == 2013]

traffic_data["TIME"] = pd.to_datetime(traffic_data["TIME"], errors='coerce')

traffic_data['Year'] = pd.DatetimeIndex(traffic_data['TIME']).year
traffic_data['Month'] = pd.DatetimeIndex(traffic_data['TIME']).month
traffic_data['Day'] = pd.DatetimeIndex(traffic_data['TIME']).day
traffic_data['Hour'] = pd.DatetimeIndex(traffic_data['TIME']).hour

In [None]:
#Merge Traffic and Weather information into one final table 
traffic_weather_merged = pd.merge(traffic_data, weather_information, left_on=['Year','Month','Day','Hour'], right_on=['Year','Month','Day','Hour'], how='left')

traffic_weather2 = pd.merge(traffic_weather_merged, segment_info, left_on = 'SEGMENTID', right_on = 'SEGMENTID', how='left')

traffic_weather3 = traffic_weather2[['SEGMENTID','Year','Month','Day','Hour','LENGTH','temperature', 'humidity', 'pressure', 'wind_speed','weather_desc_broken clouds',
       'weather_desc_drizzle', 'weather_desc_dust', 'weather_desc_few clouds',
       'weather_desc_fog', 'weather_desc_freezing rain', 'weather_desc_haze',
       'weather_desc_heavy intensity drizzle',
       'weather_desc_heavy intensity rain', 'weather_desc_heavy snow',
       'weather_desc_light intensity drizzle', 'weather_desc_light rain',
       'weather_desc_light rain and snow', 'weather_desc_light snow',
       'weather_desc_mist', 'weather_desc_moderate rain',
       'weather_desc_overcast clouds', 'weather_desc_proximity shower rain',
       'weather_desc_proximity thunderstorm',
       'weather_desc_proximity thunderstorm with drizzle',
       'weather_desc_proximity thunderstorm with rain',
       'weather_desc_scattered clouds', 'weather_desc_sky is clear',
       'weather_desc_smoke', 'weather_desc_snow', 'weather_desc_squalls',
       'weather_desc_thunderstorm', 'weather_desc_thunderstorm with drizzle',
       'weather_desc_thunderstorm with heavy rain',
       'weather_desc_thunderstorm with light drizzle',
       'weather_desc_thunderstorm with light rain',
       'weather_desc_thunderstorm with rain', 'weather_desc_very heavy rain','DIRECTION','SPEED']]

df = pd.get_dummies(traffic_weather3)

In [None]:
df.dropna(axis=0, inplace=True)

In [None]:
#Start PCA Analysis
features = ['SEGMENTID', 'Year', 'Month', 'Day', 'Hour', 'LENGTH',
       'temperature', 'humidity', 'pressure', 'wind_speed',
       'weather_desc_broken clouds', 'weather_desc_drizzle',
       'weather_desc_dust', 'weather_desc_few clouds', 'weather_desc_fog',
       'weather_desc_freezing rain', 'weather_desc_haze',
       'weather_desc_heavy intensity drizzle',
       'weather_desc_heavy intensity rain', 'weather_desc_heavy snow',
       'weather_desc_light intensity drizzle', 'weather_desc_light rain',
       'weather_desc_light rain and snow', 'weather_desc_light snow',
       'weather_desc_mist', 'weather_desc_moderate rain',
       'weather_desc_overcast clouds', 'weather_desc_proximity shower rain',
       'weather_desc_proximity thunderstorm',
       'weather_desc_proximity thunderstorm with drizzle',
       'weather_desc_proximity thunderstorm with rain',
       'weather_desc_scattered clouds', 'weather_desc_sky is clear',
       'weather_desc_smoke', 'weather_desc_snow', 'weather_desc_squalls',
       'weather_desc_thunderstorm', 'weather_desc_thunderstorm with drizzle',
       'weather_desc_thunderstorm with heavy rain',
       'weather_desc_thunderstorm with light drizzle',
       'weather_desc_thunderstorm with light rain',
       'weather_desc_thunderstorm with rain', 'weather_desc_very heavy rain','DIRECTION_EB', 'DIRECTION_NB', 'DIRECTION_NW', 'DIRECTION_SB',
       'DIRECTION_SE', 'DIRECTION_WB']

# Separating out the features
X = df.loc[:, features].values
# Separating out the target
y = df.loc[:,['SPEED']].values
# Standardizing the features
X = StandardScaler().fit_transform(X)

In [None]:
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(X)
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['principal component 1', 'principal component 2'])
print(principalDf)

In [None]:

finalDf = pd.concat([principalDf, df[['SPEED']]], axis = 1)
finalDf= finalDf.dropna()
print(finalDf)
X= finalDf[['principal component 1','principal component 2']]
y= np.where(finalDf['SPEED']<=9,-1, np.where((finalDf['SPEED']>9) & (finalDf['SPEED']<=20),0,1))
print(X)

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)
print(len(y_test))
print(len(X_test))

#0-9 slow
# 10-20 med
# 21 > fast

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV
from sklearn import metrics


In [None]:
model = RandomForestClassifier(max_depth=2, random_state=0)
model.fit(X_train, y_train)
y_hat= model.predict(X_test)
from sklearn.metrics import accuracy_score
#yhat = model.predict(X_test)
# evaluate predictions
acc = accuracy_score(y_test, y_hat)
print('Accuracy: %.3f' % acc)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_hat)
print('Confusion matrix\n\n', cm)

In [None]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_hat= model.predict(X_test)
from sklearn.metrics import accuracy_score
#yhat = model.predict(X_test)
# evaluate predictions
acc = accuracy_score(y_test, y_hat)
print('Accuracy: %.3f' % acc)
cm = confusion_matrix(y_test, y_hat)
print('Confusion matrix\n\n', cm)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(12,8))
from sklearn import tree
tree.plot_tree(model.fit(X_train, y_train))

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_hat)
print('Confusion matrix\n\n', cm)