In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import datetime as dt
import warnings
warnings.filterwarnings("ignore")

#### 2019 + 2020

In [2]:
data19 = pd.read_csv("Ohio_Crash_Data_2019.csv")
data20 = pd.read_csv("Ohio_Crash_Data_2020.csv")

In [3]:
data = data19.append(data20, sort = False)

In [4]:
data['Crash_Fatal_Fl'] = np.where((data['TotalKilled'] > 0) | (data['CrashSeverity'] == 'Fatal'),1,0)
dataSub = data[['DocumentNumber', 'Crash_Fatal_Fl','CrashDateTime','TotalKilled']]
dataSub['CrashDateTime'] = pd.to_datetime(dataSub['CrashDateTime'])

In [5]:
dataSub.head()

Unnamed: 0,DocumentNumber,Crash_Fatal_Fl,CrashDateTime,TotalKilled
0,20192000310,1,2019-09-20 20:37:00,0
1,20192001035,1,2019-09-20 14:23:00,0
2,20192001100,0,2019-08-07 11:05:00,0
3,20192002300,1,2019-01-09 18:03:00,0
4,20192003383,0,2019-01-03 14:06:00,0


In [6]:
dataSubAgg = dataSub.copy()

In [7]:
dataSubAgg['yearMonth'] = dataSubAgg['CrashDateTime'].dt.strftime('%Y-%m')
dataSubAgg['Year'] = dataSubAgg['CrashDateTime'].dt.strftime('%Y')
dataSubAgg['Month'] = dataSubAgg['CrashDateTime'].dt.strftime('%m')
dataSubAgg = dataSubAgg.drop(['CrashDateTime'], axis = 1)

In [8]:
dataSubAgg.rename(columns={"DocumentNumber": "Crash_ID", "TotalKilled": "Death_Cnt"}, inplace = True)

In [9]:
dataSubAgg.head()

Unnamed: 0,Crash_ID,Crash_Fatal_Fl,Death_Cnt,yearMonth,Year,Month
0,20192000310,1,0,2019-09,2019,9
1,20192001035,1,0,2019-09,2019,9
2,20192001100,0,0,2019-08,2019,8
3,20192002300,1,0,2019-01,2019,1
4,20192003383,0,0,2019-01,2019,1


## YearMonth

In [10]:
dataSubYearMonth = dataSubAgg.groupby(['Year','Month']).agg({'Crash_ID': 'count',
                                                                   'Crash_Fatal_Fl': 'sum',
                                                                   'Death_Cnt': 'sum',
                                                                   'yearMonth': 'max'}).reset_index()

In [11]:
dataSubYearMonth['crashPctChange'] = dataSubYearMonth.Crash_ID.pct_change()
dataSubYearMonth['deathCntPctChange'] = dataSubYearMonth.Death_Cnt.pct_change()
dataSubYearMonth['fatalPctChange'] = dataSubYearMonth.Crash_Fatal_Fl.pct_change()
dataSubYearMonth['State']='Ohio'
dataSubYearMonth['Country']='United States'

In [12]:
dataSubYearMonth['deathRate']=dataSubYearMonth['Death_Cnt']/dataSubYearMonth['Crash_ID']
dataSubYearMonth['fatRate']=dataSubYearMonth['Crash_Fatal_Fl']/dataSubYearMonth['Crash_ID']

In [13]:
dataSubYearMonth['deathRatePctChange'] = dataSubYearMonth.deathRate.pct_change()
dataSubYearMonth['fatRatePctChange'] = dataSubYearMonth.fatRate.pct_change()

In [14]:
with open("OH_dataSubYearMonth.csv",'w') as outputfile:
    dataSubYearMonth.to_csv(outputfile, header=True, index = False) 

In [15]:
datacheck = pd.read_csv('OH_dataSubYearMonth.csv')

In [16]:
datacheck.shape

(16, 15)

## MonthYear

In [17]:
dataSubMonthYear = dataSubAgg.groupby(['Month','Year']).agg({'Crash_ID': 'count',
                                                                   'Crash_Fatal_Fl': 'sum',
                                                                   'Death_Cnt': 'sum',
                                                                   'yearMonth': 'max'}).reset_index()

In [18]:
dataSubMonthYear['crashPctChange'] = dataSubMonthYear.groupby('Month')['Crash_ID'].apply(lambda x: x.div(x.iloc[0]).subtract(1))
dataSubMonthYear['deathCntPctChange'] = dataSubMonthYear.groupby('Month')['Death_Cnt'].apply(lambda x: x.div(x.iloc[0]).subtract(1))
dataSubMonthYear['fatalPctChange'] = dataSubMonthYear.groupby('Month')['Crash_Fatal_Fl'].apply(lambda x: x.div(x.iloc[0]).subtract(1))
dataSubMonthYear['State']='Ohio'
dataSubMonthYear['Country']='United States'

In [19]:
dataSubMonthYear['deathRate']=dataSubMonthYear['Death_Cnt']/dataSubMonthYear['Crash_ID']
dataSubMonthYear['fatRate']=dataSubMonthYear['Crash_Fatal_Fl']/dataSubMonthYear['Crash_ID']

In [20]:
dataSubMonthYear['deathRatePctChange'] = dataSubMonthYear.groupby('Month')['deathRate'].apply(lambda x: x.div(x.iloc[0]).subtract(1))
dataSubMonthYear['fatRatePctChange'] = dataSubMonthYear.groupby('Month')['fatRate'].apply(lambda x: x.div(x.iloc[0]).subtract(1))

In [21]:
with open("OH_dataSubMonthYear.csv",'w') as outputfile:
    dataSubMonthYear.to_csv(outputfile, header=True, index = False) 

In [22]:
datacheck = pd.read_csv('OH_dataSubMonthYear.csv')
datacheck.shape

(16, 15)

#### The december tied data
In this section we compare the all the months we have in 2020 to the most recent "normal month" which is December.

In [23]:
compare = dataSubYearMonth[['yearMonth', 'Crash_ID', 'Crash_Fatal_Fl']][dataSubYearMonth.yearMonth >= '2019-12']

compare['crashPctChangeCompareDec'] = compare.Crash_ID / compare.Crash_ID.iloc[0] - 1
compare['fatalPctChangeCompareDec'] = compare.Crash_Fatal_Fl / compare.Crash_Fatal_Fl.iloc[0] - 1

compare['State']='Ohio'
compare['Country']='United States'

In [24]:
compare

Unnamed: 0,yearMonth,Crash_ID,Crash_Fatal_Fl,crashPctChangeCompareDec,fatalPctChangeCompareDec,State,Country
11,2019-12,26001,72,0.0,0.0,Ohio,United States
12,2020-01,22558,74,-0.132418,0.027778,Ohio,United States
13,2020-02,23017,64,-0.114765,-0.111111,Ohio,United States
14,2020-03,14835,76,-0.429445,0.055556,Ohio,United States
15,2020-04,8886,40,-0.658244,-0.444444,Ohio,United States


In [25]:
with open("OH_PctChangeCompareDec.csv",'w') as outputfile:
    compare.to_csv(outputfile, header=True, index = False) 