In [2]:
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import datetime as dt
import warnings
warnings.filterwarnings("ignore")

#### 2019 + 2020

In [3]:
data19 = pd.read_csv("Texas_Crash_Data_2019.csv")
data20 = pd.read_csv("Texas_Crash_Data_2020.csv")

In [4]:
data = data19.append(data20, sort = False)

In [21]:
data.shape

(845855, 171)

In [22]:
dataSub = data[['Crash_ID', 'Crash_Fatal_Fl','Crash_Date','Death_Cnt']]
data['Crash_Fatal_Fl'] = np.where((data['Crash_Fatal_Fl'] == 'Y'),1,0)
dataSub['Crash_Date'] = pd.to_datetime(dataSub['Crash_Date'])

In [23]:
dataSub.head()

Unnamed: 0,Crash_ID,Crash_Fatal_Fl,Crash_Date,Death_Cnt
0,15581806,0,2019-02-10,0
1,15585669,0,2019-02-10,0
2,15585883,0,2019-02-10,0
3,16585839,0,2019-01-26,0
4,16822437,0,2018-12-31,0


In [24]:
dataSubAgg = dataSub.copy()

In [25]:
dataSubAgg['yearMonth'] = dataSubAgg['Crash_Date'].dt.strftime('%Y-%m')
dataSubAgg['Year'] = dataSubAgg['Crash_Date'].dt.strftime('%Y')
dataSubAgg['Month'] = dataSubAgg['Crash_Date'].dt.strftime('%m')
dataSubAgg = dataSubAgg.drop(['Crash_Date'], axis = 1)

In [26]:
dataSubAgg.rename(columns={"DocumentNumber": "Crash_ID", "TotalKilled": "Death_Cnt"}, inplace = True)

In [27]:
dataSubAgg.head()

Unnamed: 0,Crash_ID,Crash_Fatal_Fl,Death_Cnt,yearMonth,Year,Month
0,15581806,0,0,2019-02,2019,2
1,15585669,0,0,2019-02,2019,2
2,15585883,0,0,2019-02,2019,2
3,16585839,0,0,2019-01,2019,1
4,16822437,0,0,2018-12,2018,12


In [46]:
dataSubAgg = dataSubAgg[(dataSubAgg['Year'] != '2018')]

## YearMonth

In [47]:
dataSubYearMonth = dataSubAgg.groupby(['Year','Month']).agg({'Crash_ID': 'count',
                                                                   'Crash_Fatal_Fl': 'sum',
                                                                   'Death_Cnt': 'sum',
                                                                   'yearMonth': 'max'}).reset_index()

In [48]:
dataSubYearMonth['crashPctChange'] = dataSubYearMonth.Crash_ID.pct_change()
dataSubYearMonth['deathCntPctChange'] = dataSubYearMonth.Death_Cnt.pct_change()
dataSubYearMonth['fatalPctChange'] = dataSubYearMonth.Crash_Fatal_Fl.pct_change()
dataSubYearMonth['State']='Texas'
dataSubYearMonth['Country']='United States'

In [49]:
dataSubYearMonth['deathRate']=dataSubYearMonth['Death_Cnt']/dataSubYearMonth['Crash_ID']
dataSubYearMonth['fatRate']=dataSubYearMonth['Crash_Fatal_Fl']/dataSubYearMonth['Crash_ID']

In [50]:
dataSubYearMonth['deathRatePctChange'] = dataSubYearMonth.deathRate.pct_change()
dataSubYearMonth['fatRatePctChange'] = dataSubYearMonth.fatRate.pct_change()

In [51]:
with open("TX_dataSubYearMonth.csv",'w',newline='') as outputfile:
    dataSubYearMonth.to_csv(outputfile, header=True, index = False) 

In [52]:
datacheck = pd.read_csv('TX_dataSubYearMonth.csv')

In [53]:
datacheck

Unnamed: 0,Year,Month,Crash_ID,Crash_Fatal_Fl,Death_Cnt,yearMonth,crashPctChange,deathCntPctChange,fatalPctChange,State,Country,deathRate,fatRate,deathRatePctChange,fatRatePctChange
0,2019,1,48832,248,279,2019-01,,,,Texas,United States,0.005713,0.005079,,
1,2019,2,48660,232,244,2019-02,-0.003522,-0.125448,-0.064516,Texas,United States,0.005014,0.004768,-0.122357,-0.061209
2,2019,3,55180,264,305,2019-03,0.133991,0.25,0.137931,Texas,United States,0.005527,0.004784,0.102302,0.003475
3,2019,4,56117,288,316,2019-04,0.016981,0.036066,0.090909,Texas,United States,0.005631,0.005132,0.018766,0.072694
4,2019,5,55967,277,298,2019-05,-0.002673,-0.056962,-0.038194,Texas,United States,0.005325,0.004949,-0.054435,-0.035617
5,2019,6,51697,277,304,2019-06,-0.076295,0.020134,0.0,Texas,United States,0.00588,0.005358,0.104394,0.082597
6,2019,7,50274,290,318,2019-07,-0.027526,0.046053,0.046931,Texas,United States,0.006325,0.005768,0.075661,0.076565
7,2019,8,55201,279,310,2019-08,0.098003,-0.025157,-0.037931,Texas,United States,0.005616,0.005054,-0.112167,-0.123801
8,2019,9,54035,289,311,2019-09,-0.021123,0.003226,0.035842,Texas,United States,0.005756,0.005348,0.024874,0.058194
9,2019,10,59370,294,316,2019-10,0.098732,0.016077,0.017301,Texas,United States,0.005323,0.004952,-0.075228,-0.074114


In [54]:
datacheck.shape

(17, 15)

## MonthYear

In [55]:
dataSubMonthYear = dataSubAgg.groupby(['Month','Year']).agg({'Crash_ID': 'count',
                                                                   'Crash_Fatal_Fl': 'sum',
                                                                   'Death_Cnt': 'sum',
                                                                   'yearMonth': 'max'}).reset_index()

In [56]:
dataSubMonthYear['crashPctChange'] = dataSubMonthYear.groupby('Month')['Crash_ID'].apply(lambda x: x.div(x.iloc[0]).subtract(1))
dataSubMonthYear['deathCntPctChange'] = dataSubMonthYear.groupby('Month')['Death_Cnt'].apply(lambda x: x.div(x.iloc[0]).subtract(1))
dataSubMonthYear['fatalPctChange'] = dataSubMonthYear.groupby('Month')['Crash_Fatal_Fl'].apply(lambda x: x.div(x.iloc[0]).subtract(1))
dataSubMonthYear['State']='Texas'
dataSubMonthYear['Country']='United States'

In [57]:
dataSubMonthYear['deathRate']=dataSubMonthYear['Death_Cnt']/dataSubMonthYear['Crash_ID']
dataSubMonthYear['fatRate']=dataSubMonthYear['Crash_Fatal_Fl']/dataSubMonthYear['Crash_ID']

In [58]:
dataSubMonthYear['deathRatePctChange'] = dataSubMonthYear.groupby('Month')['deathRate'].apply(lambda x: x.div(x.iloc[0]).subtract(1))
dataSubMonthYear['fatRatePctChange'] = dataSubMonthYear.groupby('Month')['fatRate'].apply(lambda x: x.div(x.iloc[0]).subtract(1))

In [59]:
with open("TX_dataSubMonthYear.csv",'w',newline='') as outputfile:
    dataSubMonthYear.to_csv(outputfile, header=True, index = False) 

In [60]:
datacheck = pd.read_csv('TX_dataSubMonthYear.csv')
datacheck.shape

(17, 15)

#### The december tied data
In this section we compare the all the months we have in 2020 to the most recent "normal month" which is December.

In [61]:
compare = dataSubYearMonth[['yearMonth', 'Crash_ID', 'Crash_Fatal_Fl']][dataSubYearMonth.yearMonth >= '2019-12']

compare['crashPctChangeCompareDec'] = compare.Crash_ID / compare.Crash_ID.iloc[0] - 1
compare['fatalPctChangeCompareDec'] = compare.Crash_Fatal_Fl / compare.Crash_Fatal_Fl.iloc[0] - 1

compare['State']='Texas'
compare['Country']='United States'

In [62]:
compare

Unnamed: 0,yearMonth,Crash_ID,Crash_Fatal_Fl,crashPctChangeCompareDec,fatalPctChangeCompareDec,State,Country
11,2019-12,55701,314,0.0,0.0,Texas,United States
12,2020-01,52261,283,-0.061758,-0.098726,Texas,United States
13,2020-02,51951,284,-0.067324,-0.095541,Texas,United States
14,2020-03,43765,240,-0.214287,-0.235669,Texas,United States
15,2020-04,28227,217,-0.493241,-0.308917,Texas,United States
16,2020-05,21527,115,-0.613526,-0.633758,Texas,United States


In [63]:
with open("TX_PctChangeCompareDec.csv",'w',newline='') as outputfile:
    compare.to_csv(outputfile, header=True, index = False) 