In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import datetime
from datetime import timedelta 
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv('data/df_filtered.csv')

## Check Data Type

In [3]:
df.dtypes

Unnamed: 0      int64
CRASH_ID       object
DATE           object
DAY            object
TIME           object
KILLED          int64
INJURED         int64
PED_KILL        int64
PED_INJ         int64
SEVERITY       object
ALCOHOL        object
HAZMAT         object
CRASH_TYPE    float64
VEH_COUNT       int64
LOCATION       object
LOC_DIR        object
ROUTE           int64
MP            float64
RD_SYS          int64
RD_SURF       float64
SURF_COND     float64
LIGHT         float64
WEATHER       float64
RD_DIV        float64
RD_WRKZN      float64
CELL_USE       object
TIME_ROUND     object
TMC            object
timestamp      object
dtype: object

## Check Missingness

In [4]:
for col in df.columns:
    miss_col = df[col].isna().sum(axis=0)
    if miss_col > 0:
        print('{:<20s}{:>4d}'.format(col, miss_col))

CRASH_TYPE             1
LOC_DIR              404
RD_SURF                4
SURF_COND              9
LIGHT                  6
WEATHER                7
RD_DIV                22
RD_WRKZN              80


No related information was available for the missingness. These will be removed.

## Merge INRIX TMC and Crash df

Note that the INRIX TMC (Traffic Message Channel) will be used as the base road link.
INRIX TMC link was retrieved from [RITIS](https://ritis.org/login?r=Lw==)

In [5]:
#Load INRIX TMC
df_tmc = pd.read_csv('data/TMC_up_and_down_with_distance.csv')

In [6]:
#Load NJ 3 INRIX TMC where crash occurred in 2014
n2014 = pd.read_csv('data/n2014.csv')

In [7]:
#Merge
n2014 = pd.merge(n2014, df_tmc, how = 'left', left_on = 'TMC', right_on = 'tmc')

In [8]:
n2014.head()

Unnamed: 0.1,Unnamed: 0,timestamp,TMC,tmc,tmc_dn1,tmc_dn2,tmc_dn3,tmc_up1,tmc_up2,tmc_up3,tmc_miles,tmc_dn1.1,tmc_dn2.1,tmc_dn3.1,tmc_up1.1,tmc_up2.1,tmc_up3.1
0,0,2014-12-28 09:49:00,120+07444,120+07444,120P07444,120+07445,120+07446,120P07443,120+07443,120P07442,0.670043,0.121049,0.92112,0.005611,0.100841,0.297769,0.090041
1,1,2014-10-03 22:34:00,120N07447,120N07447,120-07446,120-07445,120-07444,120-07447,120N07448,120-07448,0.147614,0.067757,0.013286,0.83298,0.394906,0.332592,0.401996
2,2,2014-09-22 08:52:00,120+07449,120+07449,120-06051,120N06052,120-06051,120P07448,120+07448,120P07447,0.57029,0.097246,0.383109,0.414796,0.139591,0.434495,0.136118
3,3,2014-03-03 13:11:00,120-04740,120-04740,120N04740,120-04739,120N04739,120N07442,120-07442,120N07443,0.52233,0.434241,0.086623,0.158097,0.074605,0.346611,0.068105
4,4,2014-11-13 12:17:00,120+07442,120+07442,120P07442,120+07443,120P07443,120P04740,120+04740,120P04739,0.540096,0.090041,0.297769,0.100841,0.151087,0.335239,0.26714


In [9]:
#Remove unnessary columns
n2014.drop(columns =['Unnamed: 0','tmc','tmc_dn3','tmc_up3', 'tmc_dn3.1', 'tmc_up3.1'], inplace = True)

In [10]:
n2014.rename(columns ={'tmc_dn1.1':'tmc_dn1_miles', 'tmc_dn2.1':'tmc_dn2_miles', 
                    'tmc_up1.1':'tmc_up1_miles', 'tmc_up2.1':'tmc_up2_miles'}, inplace = True)

It was assumed that when crash caused lane closure, there must have been significant speed difference between upstream and downstream link. To this end, a table including two consecutive upstream and downstream links was prepared. 

In [11]:
n2014.head()

Unnamed: 0,timestamp,TMC,tmc_dn1,tmc_dn2,tmc_up1,tmc_up2,tmc_miles,tmc_dn1_miles,tmc_dn2_miles,tmc_up1_miles,tmc_up2_miles
0,2014-12-28 09:49:00,120+07444,120P07444,120+07445,120P07443,120+07443,0.670043,0.121049,0.92112,0.100841,0.297769
1,2014-10-03 22:34:00,120N07447,120-07446,120-07445,120-07447,120N07448,0.147614,0.067757,0.013286,0.394906,0.332592
2,2014-09-22 08:52:00,120+07449,120-06051,120N06052,120P07448,120+07448,0.57029,0.097246,0.383109,0.139591,0.434495
3,2014-03-03 13:11:00,120-04740,120N04740,120-04739,120N07442,120-07442,0.52233,0.434241,0.086623,0.074605,0.346611
4,2014-11-13 12:17:00,120+07442,120P07442,120+07443,120P04740,120+04740,0.540096,0.090041,0.297769,0.151087,0.335239


In [12]:
n2014.dtypes

timestamp         object
TMC               object
tmc_dn1           object
tmc_dn2           object
tmc_up1           object
tmc_up2           object
tmc_miles        float64
tmc_dn1_miles    float64
tmc_dn2_miles    float64
tmc_up1_miles    float64
tmc_up2_miles    float64
dtype: object

In [13]:
n2014['TMC_'] = n2014['TMC'] +" "+ n2014['timestamp']
n2014['tmc_dn1_'] = n2014['TMC'] +" "+ n2014['timestamp']
n2014['tmc_dn2_'] = n2014['TMC'] +" "+ n2014['timestamp']
n2014['tmc_up1_'] = n2014['TMC'] +" "+ n2014['timestamp']
n2014['tmc_up2_'] = n2014['TMC'] +" "+ n2014['timestamp']

In [14]:
n2014['timestamp'] = pd.to_datetime(n2014['timestamp'])

n2014['pre_1min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 1))
n2014['pre_2min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 2))
n2014['pre_3min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 3))
n2014['pre_4min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 4))
n2014['pre_5min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 5))
n2014['pre_6min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 6))
n2014['pre_7min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 7))
n2014['pre_8min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 8))
n2014['pre_9min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 9))
n2014['pre_10min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 10))
n2014['pre_11min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 11))
n2014['pre_12min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 12))
n2014['pre_13min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 13))
n2014['pre_14min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 14))
n2014['pre_15min'] = n2014['timestamp'].apply(lambda t: dt.datetime(t.year, t.month, t.day, t.hour, t.minute) - timedelta(minutes = 15))


In [15]:
n2014.head()

Unnamed: 0,timestamp,TMC,tmc_dn1,tmc_dn2,tmc_up1,tmc_up2,tmc_miles,tmc_dn1_miles,tmc_dn2_miles,tmc_up1_miles,...,pre_6min,pre_7min,pre_8min,pre_9min,pre_10min,pre_11min,pre_12min,pre_13min,pre_14min,pre_15min
0,2014-12-28 09:49:00,120+07444,120P07444,120+07445,120P07443,120+07443,0.670043,0.121049,0.92112,0.100841,...,2014-12-28 09:43:00,2014-12-28 09:42:00,2014-12-28 09:41:00,2014-12-28 09:40:00,2014-12-28 09:39:00,2014-12-28 09:38:00,2014-12-28 09:37:00,2014-12-28 09:36:00,2014-12-28 09:35:00,2014-12-28 09:34:00
1,2014-10-03 22:34:00,120N07447,120-07446,120-07445,120-07447,120N07448,0.147614,0.067757,0.013286,0.394906,...,2014-10-03 22:28:00,2014-10-03 22:27:00,2014-10-03 22:26:00,2014-10-03 22:25:00,2014-10-03 22:24:00,2014-10-03 22:23:00,2014-10-03 22:22:00,2014-10-03 22:21:00,2014-10-03 22:20:00,2014-10-03 22:19:00
2,2014-09-22 08:52:00,120+07449,120-06051,120N06052,120P07448,120+07448,0.57029,0.097246,0.383109,0.139591,...,2014-09-22 08:46:00,2014-09-22 08:45:00,2014-09-22 08:44:00,2014-09-22 08:43:00,2014-09-22 08:42:00,2014-09-22 08:41:00,2014-09-22 08:40:00,2014-09-22 08:39:00,2014-09-22 08:38:00,2014-09-22 08:37:00
3,2014-03-03 13:11:00,120-04740,120N04740,120-04739,120N07442,120-07442,0.52233,0.434241,0.086623,0.074605,...,2014-03-03 13:05:00,2014-03-03 13:04:00,2014-03-03 13:03:00,2014-03-03 13:02:00,2014-03-03 13:01:00,2014-03-03 13:00:00,2014-03-03 12:59:00,2014-03-03 12:58:00,2014-03-03 12:57:00,2014-03-03 12:56:00
4,2014-11-13 12:17:00,120+07442,120P07442,120+07443,120P04740,120+04740,0.540096,0.090041,0.297769,0.151087,...,2014-11-13 12:11:00,2014-11-13 12:10:00,2014-11-13 12:09:00,2014-11-13 12:08:00,2014-11-13 12:07:00,2014-11-13 12:06:00,2014-11-13 12:05:00,2014-11-13 12:04:00,2014-11-13 12:03:00,2014-11-13 12:02:00


## Load Inrix Speed Data

Traffic speeds (e.g., INRIX) observed in both upstream and downstream links were integrated. In addition, speeds before the crash events were also integrated. Link speeds for the previous 15-minute before the crashe occurrence were integrated. 

In [16]:
inrix2014 = pd.read_csv('data/NJ3_INRIX_2014_re.csv')

In [17]:
inrix2014.head()

Unnamed: 0,tmc_code,measurement_tstamp,speed,average_speed,reference_speed,travel_time_seconds,confidence_score,cvalue
0,120+04729,2014-01-01 00:00:00,46.0,41.0,41.0,7.47,30.0,100.0
1,120+04729,2014-01-01 00:01:00,46.0,41.0,41.0,7.47,30.0,100.0
2,120+04729,2014-01-01 00:02:00,46.0,41.0,41.0,7.47,30.0,100.0
3,120+04729,2014-01-01 00:03:00,46.0,41.0,41.0,7.47,30.0,100.0
4,120+04729,2014-01-01 00:04:00,43.0,41.0,41.0,8.0,30.0,84.0


In [18]:
inrix2014.drop(columns=['average_speed', 'reference_speed', 'travel_time_seconds', 'confidence_score', 'cvalue'], inplace=True)

In [19]:
inrix2014['tmc_code_'] = inrix2014['tmc_code']+" "+inrix2014['measurement_tstamp']

In [20]:
inrix2014 = inrix2014[['tmc_code_', 'speed']]

In [21]:
inrix2014.head()

Unnamed: 0,tmc_code_,speed
0,120+04729 2014-01-01 00:00:00,46.0
1,120+04729 2014-01-01 00:01:00,46.0
2,120+04729 2014-01-01 00:02:00,46.0
3,120+04729 2014-01-01 00:03:00,46.0
4,120+04729 2014-01-01 00:04:00,43.0


In [22]:
inrix2014.drop_duplicates(subset=['tmc_code_'], inplace = True)

In [23]:
n2014.drop_duplicates(subset=['TMC_'], inplace = True)

In [24]:
n2014.shape

(9996, 31)

In [25]:
# Convert 'timestamp' to string 
n2014['timestamp'] = n2014['timestamp'].astype(str)

## Join INRIX Speed Data in TMC Links

Speed and TMC data joining is repeated for the years between 2013 and 2018

In [26]:
#Speed in the crash link 
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'spd'}, inplace = True)

In [27]:
#Speed in the first downstream link
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'dn1_spd'}, inplace = True)

In [28]:
#Speed in the second downstream link
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'dn2_spd'}, inplace = True)

In [29]:
#Speed in the first upstream link
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up1_', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'up1_spd'}, inplace = True)

In [30]:
#Speed in the second upstream link
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'up2_spd'}, inplace = True)

### Pre-Speeds at each TMC link

#### Pre_1minute

In [31]:
n2014['pre_1min'] = n2014['pre_1min'].astype(str)

n2014['TMC_1'] = n2014['TMC'] +" "+ n2014['pre_1min']
n2014['tmc_dn1_1'] = n2014['TMC'] +" "+ n2014['pre_1min']
n2014['tmc_dn2_1'] = n2014['TMC'] +" "+ n2014['pre_1min']
n2014['tmc_up1_1'] = n2014['TMC'] +" "+ n2014['pre_1min']
n2014['tmc_up2_1'] = n2014['TMC'] +" "+ n2014['pre_1min']

In [32]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_1', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_1', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_1', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_1', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_1', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_2minute

In [33]:
n2014['pre_2min'] = n2014['pre_2min'].astype(str)

n2014['TMC_2'] = n2014['TMC'] +" "+ n2014['pre_2min']
n2014['tmc_dn1_2'] = n2014['TMC'] +" "+ n2014['pre_2min']
n2014['tmc_dn2_2'] = n2014['TMC'] +" "+ n2014['pre_2min']
n2014['tmc_up1_2'] = n2014['TMC'] +" "+ n2014['pre_2min']
n2014['tmc_up2_2'] = n2014['TMC'] +" "+ n2014['pre_2min']

In [34]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_2', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_2', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_2', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_2', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_2', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_3minute

In [35]:
n2014['pre_3min'] = n2014['pre_3min'].astype(str)

n2014['TMC_3'] = n2014['TMC'] +" "+ n2014['pre_3min']
n2014['tmc_dn1_3'] = n2014['TMC'] +" "+ n2014['pre_3min']
n2014['tmc_dn2_3'] = n2014['TMC'] +" "+ n2014['pre_3min']
n2014['tmc_up1_3'] = n2014['TMC'] +" "+ n2014['pre_3min']
n2014['tmc_up2_3'] = n2014['TMC'] +" "+ n2014['pre_3min']

In [36]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_3', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_3', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_3', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_3', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_3', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_4minute

In [37]:
n2014['pre_4min'] = n2014['pre_4min'].astype(str)

n2014['TMC_4'] = n2014['TMC'] +" "+ n2014['pre_4min']
n2014['tmc_dn1_4'] = n2014['TMC'] +" "+ n2014['pre_4min']
n2014['tmc_dn2_4'] = n2014['TMC'] +" "+ n2014['pre_4min']
n2014['tmc_up1_4'] = n2014['TMC'] +" "+ n2014['pre_4min']
n2014['tmc_up2_4'] = n2014['TMC'] +" "+ n2014['pre_4min']

In [38]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_4', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_4', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_4', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_4', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_4', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_5minute

In [39]:
n2014['pre_5min'] = n2014['pre_5min'].astype(str)

n2014['TMC_5'] = n2014['TMC'] +" "+ n2014['pre_5min']
n2014['tmc_dn1_5'] = n2014['TMC'] +" "+ n2014['pre_5min']
n2014['tmc_dn2_5'] = n2014['TMC'] +" "+ n2014['pre_5min']
n2014['tmc_up1_5'] = n2014['TMC'] +" "+ n2014['pre_5min']
n2014['tmc_up2_5'] = n2014['TMC'] +" "+ n2014['pre_5min']

In [40]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_5', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_5', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_5', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_5', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_5', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_6minute

In [41]:
n2014['pre_6min'] = n2014['pre_6min'].astype(str)

n2014['TMC_6'] = n2014['TMC'] +" "+ n2014['pre_6min']
n2014['tmc_dn1_6'] = n2014['TMC'] +" "+ n2014['pre_6min']
n2014['tmc_dn2_6'] = n2014['TMC'] +" "+ n2014['pre_6min']
n2014['tmc_up1_6'] = n2014['TMC'] +" "+ n2014['pre_6min']
n2014['tmc_up2_6'] = n2014['TMC'] +" "+ n2014['pre_6min']

In [42]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_6', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_6', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_6', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_6', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_6', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_7minute

In [43]:
n2014['pre_7min'] = n2014['pre_7min'].astype(str)

n2014['TMC_7'] = n2014['TMC'] +" "+ n2014['pre_7min']
n2014['tmc_dn1_7'] = n2014['TMC'] +" "+ n2014['pre_7min']
n2014['tmc_dn2_7'] = n2014['TMC'] +" "+ n2014['pre_7min']
n2014['tmc_up1_7'] = n2014['TMC'] +" "+ n2014['pre_7min']
n2014['tmc_up2_7'] = n2014['TMC'] +" "+ n2014['pre_7min']

In [44]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_7', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_7', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_7', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_7', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_7', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_8minute

In [45]:
n2014['pre_8min'] = n2014['pre_8min'].astype(str)

n2014['TMC_8'] = n2014['TMC'] +" "+ n2014['pre_8min']
n2014['tmc_dn1_8'] = n2014['TMC'] +" "+ n2014['pre_8min']
n2014['tmc_dn2_8'] = n2014['TMC'] +" "+ n2014['pre_8min']
n2014['tmc_up1_8'] = n2014['TMC'] +" "+ n2014['pre_8min']
n2014['tmc_up2_8'] = n2014['TMC'] +" "+ n2014['pre_8min']

In [46]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_8', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_8', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_8', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_8', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_8', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_9minute

In [47]:
n2014['pre_9min'] = n2014['pre_9min'].astype(str)

n2014['TMC_9'] = n2014['TMC'] +" "+ n2014['pre_9min']
n2014['tmc_dn1_9'] = n2014['TMC'] +" "+ n2014['pre_9min']
n2014['tmc_dn2_9'] = n2014['TMC'] +" "+ n2014['pre_9min']
n2014['tmc_up1_9'] = n2014['TMC'] +" "+ n2014['pre_9min']
n2014['tmc_up2_9'] = n2014['TMC'] +" "+ n2014['pre_9min']

In [48]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_9', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_9', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_9', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_9', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_9', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_10minute

In [49]:
n2014['pre_10min'] = n2014['pre_10min'].astype(str)

n2014['TMC_10'] = n2014['TMC'] +" "+ n2014['pre_10min']
n2014['tmc_dn1_10'] = n2014['TMC'] +" "+ n2014['pre_10min']
n2014['tmc_dn2_10'] = n2014['TMC'] +" "+ n2014['pre_10min']
n2014['tmc_up1_10'] = n2014['TMC'] +" "+ n2014['pre_10min']
n2014['tmc_up2_10'] = n2014['TMC'] +" "+ n2014['pre_10min']

In [50]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_10', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_10', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_10', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_10', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_10', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_11minute

In [51]:
n2014['pre_11min'] = n2014['pre_11min'].astype(str)

n2014['TMC_11'] = n2014['TMC'] +" "+ n2014['pre_11min']
n2014['tmc_dn1_11'] = n2014['TMC'] +" "+ n2014['pre_11min']
n2014['tmc_dn2_11'] = n2014['TMC'] +" "+ n2014['pre_11min']
n2014['tmc_up1_11'] = n2014['TMC'] +" "+ n2014['pre_11min']
n2014['tmc_up2_11'] = n2014['TMC'] +" "+ n2014['pre_11min']

In [52]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_11', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_11', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_11', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_11', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_11', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_12minute

In [53]:
n2014['pre_12min'] = n2014['pre_12min'].astype(str)

n2014['TMC_12'] = n2014['TMC'] +" "+ n2014['pre_12min']
n2014['tmc_dn1_12'] = n2014['TMC'] +" "+ n2014['pre_12min']
n2014['tmc_dn2_12'] = n2014['TMC'] +" "+ n2014['pre_12min']
n2014['tmc_up1_12'] = n2014['TMC'] +" "+ n2014['pre_12min']
n2014['tmc_up2_12'] = n2014['TMC'] +" "+ n2014['pre_12min']

In [54]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_12', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_12', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_12', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_12', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_12', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_13minute

In [55]:
n2014['pre_13min'] = n2014['pre_13min'].astype(str)

n2014['TMC_13'] = n2014['TMC'] +" "+ n2014['pre_13min']
n2014['tmc_dn1_13'] = n2014['TMC'] +" "+ n2014['pre_13min']
n2014['tmc_dn2_13'] = n2014['TMC'] +" "+ n2014['pre_13min']
n2014['tmc_up1_13'] = n2014['TMC'] +" "+ n2014['pre_13min']
n2014['tmc_up2_13'] = n2014['TMC'] +" "+ n2014['pre_13min']

In [56]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_13', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_13', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_13', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_13', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_13', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_14minute

In [57]:
n2014['pre_14min'] = n2014['pre_14min'].astype(str)

n2014['TMC_14'] = n2014['TMC'] +" "+ n2014['pre_14min']
n2014['tmc_dn1_14'] = n2014['TMC'] +" "+ n2014['pre_14min']
n2014['tmc_dn2_14'] = n2014['TMC'] +" "+ n2014['pre_14min']
n2014['tmc_up1_14'] = n2014['TMC'] +" "+ n2014['pre_14min']
n2014['tmc_up2_14'] = n2014['TMC'] +" "+ n2014['pre_14min']

In [58]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_14', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_14', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_14', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_14', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_14', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

#### Pre_15minute

In [59]:
n2014['pre_15min'] = n2014['pre_15min'].astype(str)

n2014['TMC_15'] = n2014['TMC'] +" "+ n2014['pre_15min']
n2014['tmc_dn1_15'] = n2014['TMC'] +" "+ n2014['pre_15min']
n2014['tmc_dn2_15'] = n2014['TMC'] +" "+ n2014['pre_15min']
n2014['tmc_up1_15'] = n2014['TMC'] +" "+ n2014['pre_15min']
n2014['tmc_up2_15'] = n2014['TMC'] +" "+ n2014['pre_15min']

In [60]:
n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'TMC_15', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn1_15', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_dn2_15', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_dn2_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_15', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up1_spd'}, inplace = True)

n2014 = pd.merge(n2014, inrix2014, how = 'left', left_on = 'tmc_up2_15', right_on = 'tmc_code_')
n2014.rename(columns = {'speed':'pre1_up2_spd'}, inplace = True)

In [61]:
n2014.to_csv('n2014_.csv')