###### Imports and Settings

In [1]:
import pandas as pd
import numpy as np
import requests
from functools import reduce
import matplotlib.pyplot as plt
import pickle
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 150)
import sys
sys.path.append("..") # Adds higher directory to python modules path
import geodict
import geodict
namestocommon = geodict.namestocommon
geotogeoid = geodict.geotogeoid
tofullcensus = geodict.tofullcensus
GNRC = geodict.GNRC
KY = geodict.KY
censusplaces = geodict.censusplaces
import sqlite3 as sq
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

In [7]:
#functions
def percent(x, y):
    return (x/y)*100
def percentchange(x, y):
    try:
        return ((x - y)*100/y)
    except ZeroDivisionError:
        return 0
def realchange(x, y):
    return x-y
#calculate real and percent change between all columns for all possible time frames
def calculate_changes(df, columns, time_frames, years):
    for column in columns:
        for time_frame in time_frames:
            start_year, end_year = time_frame.split('-')
            df[f'{column} % Change', 'None', f'{time_frame}'] = percentchange(df[(column, int(end_year), 'None')], df[(column, int(start_year), 'None')])
            df[f'{column} Change', 'None', f'{time_frame}'] = (df[(column, int(end_year), 'None')] - df[(column, int(start_year), 'None')])

    return df
#generate all possible time frames from a list of years
def generate_time_frames(years):
    time_frames = []
    for i in range(len(years)-1):
        for j in range(i+1, len(years)):
            time_frames.append(f"{years[i]}-{years[j]}")
    return time_frames

# This notebook outlines the download and formatting process for the vehicular crash data coming from other research staff. There is minimal formatting this document is more of a placeholder in case there ever is formatting needed.

In [8]:
#import and examine data
data = pd.read_csv('../../Data Downloads/Crashes.csv')
data.head(3)

Unnamed: 0,NAME,Year,Crashes Total,Crashes:Fatal,Crashes:Serious Injury
0,Portland,2011,254,3,12
1,Sumner,2011,3787,21,128
2,Wilson,2011,3209,21,109


In [9]:
#calculate the percentage of all crashes that are fatal or result in serious injury
data['Crashes%:Fatal'] = percent(data['Crashes:Fatal'], data['Crashes Total'])
data['Crashes%:Serious Injury'] = percent(data['Crashes:Serious Injury'], data['Crashes Total'])

In [10]:
#set index to name and transpose to rename geographies using custom module then transpose back and reset index without dropping
data = data.set_index('NAME').transpose()
data = data.rename(columns = tofullcensus)
data = data.transpose().reset_index(drop = False)

In [11]:
data.head()

Unnamed: 0,NAME,Year,Crashes Total,Crashes:Fatal,Crashes:Serious Injury,Crashes%:Fatal,Crashes%:Serious Injury
0,"Portland city, Tennessee",2011.0,254.0,3.0,12.0,1.181102,4.724409
1,Sumner,2011.0,3787.0,21.0,128.0,0.554529,3.379984
2,Wilson,2011.0,3209.0,21.0,109.0,0.654409,3.396697
3,Rutherford,2011.0,8626.0,21.0,281.0,0.24345,3.257593
4,MPO Region,2011.0,59880.0,167.0,1100.0,0.278891,1.837007


In [12]:
#make sure year is formatted as an integer
data['Year'] = data['Year'].astype(int)
#create a list of years from the dataframe to pass through our "generate time frames" function to create a list of all possible time frames - need this here for later
years = list(data['Year'].unique().astype(int))
time_frames = generate_time_frames(years)

In [13]:
#create a multilevel column header with year and placeholder for time frames
#pivot the table and create a multiindex of year and column header
cols = list(data.columns)
cols.remove('NAME')
cols.remove('Year')
df_pivot = data.pivot_table(index = 'NAME', columns = ['Year'], values = cols)
df_pivot.head(2)

Unnamed: 0_level_0,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury
Year,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
NAME,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2
MPO Region,59880.0,62634.0,65248.0,69795.0,69003.0,72987.0,72798.0,71880.0,73355.0,56517.0,51459.0,0.278891,0.258645,0.274338,0.257898,0.226077,0.282242,0.271985,0.264329,0.317633,0.406957,0.460561,1.837007,1.890347,1.863659,2.166344,2.386853,2.156548,2.089343,1.455203,1.310067,1.641984,2.17066,167.0,162.0,179.0,180.0,156.0,206.0,198.0,190.0,233.0,230.0,237.0,1100.0,1184.0,1216.0,1512.0,1647.0,1574.0,1521.0,1046.0,961.0,928.0,1117.0
"Portland city, Tennessee",254.0,277.0,262.0,256.0,270.0,304.0,236.0,219.0,222.0,215.0,275.0,1.181102,0.0,0.381679,0.78125,0.37037,0.328947,0.0,0.0,0.45045,0.465116,1.818182,4.724409,3.971119,4.198473,3.90625,2.962963,3.289474,2.118644,3.196347,4.504505,2.790698,2.909091,3.0,0.0,1.0,2.0,1.0,1.0,0.0,0.0,1.0,1.0,5.0,12.0,11.0,11.0,10.0,8.0,10.0,5.0,7.0,10.0,6.0,8.0


In [14]:
#add a level to the multiindex to accomodate the time period metrics
df_pivot.columns = pd.MultiIndex.from_tuples([(col[0], col[1], 'None') for col in df_pivot.columns])
df_pivot.head(3)

Unnamed: 0_level_0,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes Total,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Fatal,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes%:Serious Injury,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Fatal,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury,Crashes:Serious Injury
Unnamed: 0_level_1,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
Unnamed: 0_level_2,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None
NAME,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3,Unnamed: 38_level_3,Unnamed: 39_level_3,Unnamed: 40_level_3,Unnamed: 41_level_3,Unnamed: 42_level_3,Unnamed: 43_level_3,Unnamed: 44_level_3,Unnamed: 45_level_3,Unnamed: 46_level_3,Unnamed: 47_level_3,Unnamed: 48_level_3,Unnamed: 49_level_3,Unnamed: 50_level_3,Unnamed: 51_level_3,Unnamed: 52_level_3,Unnamed: 53_level_3,Unnamed: 54_level_3,Unnamed: 55_level_3
MPO Region,59880.0,62634.0,65248.0,69795.0,69003.0,72987.0,72798.0,71880.0,73355.0,56517.0,51459.0,0.278891,0.258645,0.274338,0.257898,0.226077,0.282242,0.271985,0.264329,0.317633,0.406957,0.460561,1.837007,1.890347,1.863659,2.166344,2.386853,2.156548,2.089343,1.455203,1.310067,1.641984,2.17066,167.0,162.0,179.0,180.0,156.0,206.0,198.0,190.0,233.0,230.0,237.0,1100.0,1184.0,1216.0,1512.0,1647.0,1574.0,1521.0,1046.0,961.0,928.0,1117.0
"Portland city, Tennessee",254.0,277.0,262.0,256.0,270.0,304.0,236.0,219.0,222.0,215.0,275.0,1.181102,0.0,0.381679,0.78125,0.37037,0.328947,0.0,0.0,0.45045,0.465116,1.818182,4.724409,3.971119,4.198473,3.90625,2.962963,3.289474,2.118644,3.196347,4.504505,2.790698,2.909091,3.0,0.0,1.0,2.0,1.0,1.0,0.0,0.0,1.0,1.0,5.0,12.0,11.0,11.0,10.0,8.0,10.0,5.0,7.0,10.0,6.0,8.0
Rutherford,8626.0,9523.0,9509.0,10036.0,11298.0,11912.0,12305.0,12179.0,12211.0,10024.0,10020.0,0.24345,0.304526,0.283942,0.348745,0.221278,0.360981,0.308818,0.303802,0.253869,0.438947,0.279441,3.257593,2.614722,2.849932,3.726584,2.345548,2.770316,2.559935,1.716069,1.580542,1.556265,1.986028,21.0,29.0,27.0,35.0,25.0,43.0,38.0,37.0,31.0,44.0,28.0,281.0,249.0,271.0,374.0,265.0,330.0,315.0,209.0,193.0,156.0,199.0


In [15]:
#get a list of the varaibles to loop through by indexing into the first level only of the column headers
first_level = df_pivot.columns.get_level_values(0).unique().tolist()
#remove percentages - don't want change metrics on them
first_level = [item for item in first_level if '%' not in item]

In [16]:
#pass the dataframe, the list of variables, time frames, and years through the "calculate change" function
data = calculate_changes(df_pivot, first_level, time_frames = time_frames, years = years)

In [17]:
#reformat and rename columns
data = data.stack([1, 1])
data = data.reset_index(drop = False)
data = data.rename(columns = {'level_1':'Year', 'level_2':'Time Frame'})

In [18]:
#map to geoid and set source name
data['GEO_ID'] = data['NAME'].map(geotogeoid)
data['Source'] = 'Internal Crash Data'

In [19]:
#examine
data.head()

Unnamed: 0,NAME,Year,Time Frame,Crashes Total,Crashes Total % Change,Crashes Total Change,Crashes%:Fatal,Crashes%:Serious Injury,Crashes:Fatal,Crashes:Fatal % Change,Crashes:Fatal Change,Crashes:Serious Injury,Crashes:Serious Injury % Change,Crashes:Serious Injury Change,GEO_ID,Source
0,MPO Region,2011,,59880.0,,,0.278891,1.837007,167.0,,,1100.0,,,,Internal Crash Data
1,MPO Region,2012,,62634.0,,,0.258645,1.890347,162.0,,,1184.0,,,,Internal Crash Data
2,MPO Region,2013,,65248.0,,,0.274338,1.863659,179.0,,,1216.0,,,,Internal Crash Data
3,MPO Region,2014,,69795.0,,,0.257898,2.166344,180.0,,,1512.0,,,,Internal Crash Data
4,MPO Region,2015,,69003.0,,,0.226077,2.386853,156.0,,,1647.0,,,,Internal Crash Data


In [21]:
data['NAME'].unique()

array(['MPO Region', 'Portland city, Tennessee', 'Rutherford', 'Sumner',
       'Wilson'], dtype=object)

In [20]:
#export to the SQLite database as only the JobsEQ annual data
conn = sq.connect('../../Outputs/Dem_Transpo_Housing_Collection.db')
data.to_sql('Crashes_Annual', conn, if_exists = 'replace', index = False)

330