In [8]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pprint import pprint
import requests
import time
from scipy.stats import linregress
import os
import csv

# Import API key
#from api_keys import api_key_file

# Output File (CSV)
output_data_file = "output_data/duistats.csv"

# File to Load (**Remember to Change These file names to DUI saved file names)
dui_2013_data = "nrs_2013_combined.csv"
dui_2007_data = "nrs_2007_final.csv"

col2007_list = ["STATE", "DATE","NRS_ZIPCODE","session","NRS_RACE","NRS_DSEX","NRS_AGE","NRS_SCHOOL","NRS_EMPLOY","NRS_VTYPE",
            "NRS_FROM","NRS_HEADED", "NRS_BETMI", "NRS_TODRK","resulbac2"]

col2013_list = ["State","MyZipCodeIs","Time_Stamp","data_session_original","race_n", "DriverAgeYears","EducationLevel","AreYouEmployed",
                "VehicleType", "FromWhere", "ToWhere","MilesTraveling", "AlcoholToday", "bac_from_blood"]

# Read DUI 2007 and 2013 Data Files and store into Pandas DataFrames(can't start with a number)
dui2013_data = pd.read_csv(dui_2013_data,low_memory=False, usecols=col2013_list)
dui2007_data = pd.read_csv(dui_2007_data,low_memory=False, usecols=col2007_list)



In [12]:
Student_2007 = []

dui2007_data["Student_2007"] = dui2007_data.apply(lambda row: "Student" if row.NRS_EMPLOY == "Student" else "No", axis = 1) 


In [13]:
dui2013_data

Unnamed: 0,data_session_original,State,Time_Stamp,race_n,VehicleType,FromWhere,ToWhere,MilesTraveling,AlcoholToday,DriverAgeYears,MyZipCodeIs,EducationLevel,AreYouEmployed,bac_from_blood
0,3,CA,12/07/2013 01:36:27,8,Car,Other,Other,0-5,No,43.0,93003.0,High school graduate,Employed Full-time,#NULL!
1,3,CA,12/07/2013 01:56:35,1,Car,Other,Own home,More than 20,No,21.0,93033.0,Some college - no degree,Employed Full-time,#NULL!
2,3,CA,12/07/2013 02:17:15,8,Car,Other,Own home,0-5,Yes,23.0,93035.0,Some college - no degree,Other __________________________,#NULL!
3,3,CA,12/07/2013 02:34:15,8,Car,Someone else's home,Own home,0-5,No,19.0,93030.0,Some college - no degree,Employed Part-time,0
4,3,CA,12/07/2013 02:41:16,#NULL!,SUV/ Crossover,,,,,,,,,#NULL!
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11317,4,UT,02/08/2014 23:46:43,1,Car,Work,Own home,11-20,,56.0,84010.0,Associate's degree,Employed Full-time,0
11318,5,UT,02/09/2014 01:29:47,1,SUV/ Crossover,Someone else's home,Own home,11-20,,18.0,84054.0,High school graduate,Employed Part-time,0
11319,5,UT,02/09/2014 01:43:25,1,Car,Someone else's home,Own home,11-20,,22.0,84037.0,Some college - no degree,Employed Part-time,0
11320,5,UT,02/09/2014 02:03:32,1,Car,Store or gas station,Other,6-10,No,20.0,84606.0,Some college - no degree,Employed Part-time,0


In [14]:
#renames for merge
dui2007_data.rename(columns = {"STATE": "State", "NRS_ZIPCODE": "Zipcode", "DATE":"Date","session": "Day or Night?","NRS_RACE":"Race", 
                               "NRS_DSEX":"Gender", "NRS_AGE":"Age Range","NRS_SCHOOL": "Education Level","NRS_EMPLOY": "Employed", 
                               "NRS_VTYPE": "Vehicle Type", "NRS_FROM": "From Where", "NRS_HEADED": "To Where","NRS_BETMI": "Trip distance", 
                               "NRS_TODRK": "Alcohol Today", "resulbac2": "Blood Alcohol Content"}, inplace = True)



In [15]:
dui2007_data.add_suffix("_2007")

Unnamed: 0,Day or Night?_2007,State_2007,Date_2007,From Where_2007,To Where_2007,Trip distance_2007,Alcohol Today_2007,Age Range_2007,Zipcode_2007,Education Level_2007,Employed_2007,Race_2007,Vehicle Type_2007,Gender_2007,Blood Alcohol Content_2007,Student_2007_2007
0,3,TN,11/3/2007,,,,,#NULL!,,,,,,,0,No
1,1,IL,,,,,,#NULL!,,,,,,,#NULL!,No
2,1,IN,,,,,,#NULL!,,,,,,,0,No
3,1,MI,10/12/2007,,,,,#NULL!,,,,,,,0,No
4,5,IA,,,,,,#NULL!,,,,,,,#NULL!,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11115,2,IN,,Sport or rec facility / park,Home (own home),0 - 5,No,48,46373,Some college,Employed/self-employed,White,Car,1.0,0,No
11116,3,PA,,Restaurant / eating place,Home (own home),6 - 10,Yes,39,19038,College graduate,Employed/self-employed,White,Car,2.0,#NULL!,No
11117,5,FL,,Other,Home (own home),11 - 20,No,19,33065,Some college,Student,White,Car,2.0,#NULL!,Student
11118,1,PA,,Restaurant / eating place,Home (own home),16 - 20,,#NULL!,19312,College graduate,Retired,White,Car,2.0,#NULL!,No


In [16]:
dui2007_clean = dui2007_data.dropna()
dui2007_clean

Unnamed: 0,Day or Night?,State,Date,From Where,To Where,Trip distance,Alcohol Today,Age Range,Zipcode,Education Level,Employed,Race,Vehicle Type,Gender,Blood Alcohol Content,Student_2007
1968,4,IL,7/22/2007,Work,Home (own home),0 - 5,No,20,60154,Some college,Employed/self-employed,Hispanic,Car,1.0,0,No
1990,3,CA,7/28/2022,Other,Home (own home),6 - 10,Yes,23,90732,High school graduate,Employed/self-employed,White,Car,2.0,0.043,No
1992,1,CA,7/27/2007,Home (own home),Restaurant / eating place,0 - 5,No,31,90503,Some college,Employed/self-employed,White,Minivan,2.0,0,No
1993,3,WI,8/4/2007,Work,Someone else's home,6 - 10,No,46,53018,Some college,Employed/self-employed,White,Car,2.0,0,No
1994,2,IL,7/21/2007,Work,Someone else's home,0 - 5,No,21,60613,College graduate,Employed/self-employed,Asian,Car,2.0,0,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11091,4,IL,7/22/2022,Other,Someone else's home,6 - 10,Yes,46,60077,High school graduate,Employed/self-employed,White,SUV,2.0,0,No
11095,5,NE,8/5/2007,Someone else's home,Someone else's home,0 - 5,Yes,25,68154,College graduate,Employed/self-employed,White,SUV,2.0,0,No
11097,2,AL,7/21/2007,Home (own home),Restaurant / eating place,More than 20,No,21,35150,High school graduate,Employed/self-employed,Black or African American,SUV,1.0,0,No
11101,2,CA,7/28/2007,Someone else's home,Home (own home),More than 20,No,28,90810,Some college,Employed/self-employed,Black or African American,Car,2.0,0.033,No


In [17]:
dui2013_data.rename(columns = {"MyZipCodeIs": "Zipcode", "Time_Stamp":"Date","data_session_original": "Day or Night?","race_n":"Race", 
                               "DriverAgeYears":"Age Range","EducationLevel": "Education Level","AreYouEmployed": "Employed", 
                               "VehicleType": "Vehicle Type", "FromWhere": "From Where", "ToWhere": "To Where","MilesTraveling": "Trip distance", 
                               "AlcoholToday": "Alcohol Today", "bac_from_blood": "Blood Alcohol Content"}, inplace = True)


In [18]:
dui2013_data.add_suffix("_2013")

Unnamed: 0,Day or Night?_2013,State_2013,Date_2013,Race_2013,Vehicle Type_2013,From Where_2013,To Where_2013,Trip distance_2013,Alcohol Today_2013,Age Range_2013,Zipcode_2013,Education Level_2013,Employed_2013,Blood Alcohol Content_2013
0,3,CA,12/07/2013 01:36:27,8,Car,Other,Other,0-5,No,43.0,93003.0,High school graduate,Employed Full-time,#NULL!
1,3,CA,12/07/2013 01:56:35,1,Car,Other,Own home,More than 20,No,21.0,93033.0,Some college - no degree,Employed Full-time,#NULL!
2,3,CA,12/07/2013 02:17:15,8,Car,Other,Own home,0-5,Yes,23.0,93035.0,Some college - no degree,Other __________________________,#NULL!
3,3,CA,12/07/2013 02:34:15,8,Car,Someone else's home,Own home,0-5,No,19.0,93030.0,Some college - no degree,Employed Part-time,0
4,3,CA,12/07/2013 02:41:16,#NULL!,SUV/ Crossover,,,,,,,,,#NULL!
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11317,4,UT,02/08/2014 23:46:43,1,Car,Work,Own home,11-20,,56.0,84010.0,Associate's degree,Employed Full-time,0
11318,5,UT,02/09/2014 01:29:47,1,SUV/ Crossover,Someone else's home,Own home,11-20,,18.0,84054.0,High school graduate,Employed Part-time,0
11319,5,UT,02/09/2014 01:43:25,1,Car,Someone else's home,Own home,11-20,,22.0,84037.0,Some college - no degree,Employed Part-time,0
11320,5,UT,02/09/2014 02:03:32,1,Car,Store or gas station,Other,6-10,No,20.0,84606.0,Some college - no degree,Employed Part-time,0


In [19]:
dui2013_drop = dui2013_data.dropna()

dui2013_clean = dui2013_drop[dui2013_drop["Blood Alcohol Content"] != "#NULL!"]

dui2013_clean
                                

Unnamed: 0,Day or Night?,State,Date,Race,Vehicle Type,From Where,To Where,Trip distance,Alcohol Today,Age Range,Zipcode,Education Level,Employed,Blood Alcohol Content
3,3,CA,12/07/2013 02:34:15,8,Car,Someone else's home,Own home,0-5,No,19.0,93030.0,Some college - no degree,Employed Part-time,0
5,3,CA,12/07/2013 13:28:52,1,Car,Other,Other,More than 20,No,23.0,93033.0,Some college - no degree,Employed Full-time,0
9,1,FL,06/28/2013 10:54:21,1,SUV/ Crossover,Own home,Other,6-10,No,46.0,33125.0,9th - 11th grade,Employed Full-time,0
15,4,FL,06/29/2013 23:13:17,1,Car,Own home,Own home,11-20,Yes,25.0,33146.0,Professional degree,Employed Full-time,0
22,5,FL,06/30/2013 02:30:54,6,Car,Someone else's home,Own home,0-5,No,23.0,33137.0,Associate's degree,Employed Full-time,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11287,4,OK,09/07/2013 23:42:59,1,Pickup,Someone else's home,Own home,6-10,No,27.0,73159.0,High school graduate,Employed Full-time,0
11292,5,OK,09/08/2013 03:08:10,1,Pickup,Restaurant/eating place,Own home,0-5,Did not answer,22.0,73728.0,Professional degree,Other __________________________,0
11299,1,UT,02/07/2014 16:37:07,1,Car,Store or gas station,Own home,0-5,No,22.0,84010.0,High school graduate,Unemployed,0
11314,4,UT,02/08/2014 23:01:07,5,Car,Restaurant/eating place,Own home,0-5,No,40.0,84010.0,High school graduate,Employed Full-time,0


In [58]:
# Combine the data into a single dataset.  
dui_data_complete = pd.merge(dui2013_clean,dui2007_clean, how="left", on="State")

In [59]:
dui_data_complete.head()

Unnamed: 0,Day or Night?_x,State,Date_x,Race_x,Vehicle Type_x,From Where_x,To Where_x,Trip distance_x,Alcohol Today_x,Age Range_x,...,Trip distance_y,Alcohol Today_y,Age Range_y,Zipcode_y,Education Level_y,Employed_y,Race_y,Vehicle Type_y,Gender,Blood Alcohol Content_y
0,3,CA,12/07/2013 02:34:15,8,Car,Someone else's home,Own home,0-5,No,19.0,...,6 - 10,Yes,23,90732,High school graduate,Employed/self-employed,White,Car,2.0,0.043
1,3,CA,12/07/2013 02:34:15,8,Car,Someone else's home,Own home,0-5,No,19.0,...,0 - 5,No,31,90503,Some college,Employed/self-employed,White,Minivan,2.0,0.0
2,3,CA,12/07/2013 02:34:15,8,Car,Someone else's home,Own home,0-5,No,19.0,...,0 - 5,Yes,21,90503,Some college,Employed/self-employed,Hispanic,SUV,2.0,0.038
3,3,CA,12/07/2013 02:34:15,8,Car,Someone else's home,Own home,0-5,No,19.0,...,6 - 10,No,46,90701,High school graduate,Employed/self-employed,Hispanic,Pickup,1.0,0.0
4,3,CA,12/07/2013 02:34:15,8,Car,Someone else's home,Own home,0-5,No,19.0,...,0 - 5,Yes,30,90505,Some college,Employed/self-employed,White,Car,2.0,0.0


In [60]:
# Create an overview table grouped by State
per2007_state = dui2007_clean.set_index("State").groupby(["State"])
per2007_state.head()

Unnamed: 0_level_0,Day or Night?,Date,From Where,To Where,Trip distance,Alcohol Today,Age Range,Zipcode,Education Level,Employed,Race,Vehicle Type,Gender,Blood Alcohol Content
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
IL,4,7/22/2007,Work,Home (own home),0 - 5,No,20,60154,Some college,Employed/self-employed,Hispanic,Car,1.0,0
CA,3,7/28/2022,Other,Home (own home),6 - 10,Yes,23,90732,High school graduate,Employed/self-employed,White,Car,2.0,0.043
CA,1,7/27/2007,Home (own home),Restaurant / eating place,0 - 5,No,31,90503,Some college,Employed/self-employed,White,Minivan,2.0,0
WI,3,8/4/2007,Work,Someone else's home,6 - 10,No,46,53018,Some college,Employed/self-employed,White,Car,2.0,0
IL,2,7/21/2007,Work,Someone else's home,0 - 5,No,21,60613,College graduate,Employed/self-employed,Asian,Car,2.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NM,3,9/15/2007,Someone else's home,Home (own home),6 - 10,No,54,87105,Not a high school graduate,Employed/self-employed,Hispanic,Pickup,2.0,0
TX,3,10/13/2007,Someone else's home,Someone else's home,0 - 5,Yes,23,78221,Some college,Other,Hispanic,Car,1.0,0.065
TX,3,9/22/2007,Someone else's home,Home (own home),0 - 5,No,20,77099,Some college,Employed/self-employed,Black or African American,Car,1.0,0
NJ,3,11/3/2007,Other,Other,More than 20,Yes,27,7276,Some college,Employed/self-employed,Black or African American,Car,2.0,0


In [61]:
per2013_state = dui2013_clean.set_index("State").groupby(["State"])
per2013_state.head()

Unnamed: 0_level_0,Day or Night?,Date,Race,Vehicle Type,From Where,To Where,Trip distance,Alcohol Today,Age Range,Zipcode,Education Level,Employed,Blood Alcohol Content
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
CA,3,12/07/2013 02:34:15,8,Car,Someone else's home,Own home,0-5,No,19.0,93030.0,Some college - no degree,Employed Part-time,0
CA,3,12/07/2013 13:28:52,1,Car,Other,Other,More than 20,No,23.0,93033.0,Some college - no degree,Employed Full-time,0
FL,1,06/28/2013 10:54:21,1,SUV/ Crossover,Own home,Other,6-10,No,46.0,33125.0,9th - 11th grade,Employed Full-time,0
FL,4,06/29/2013 23:13:17,1,Car,Own home,Own home,11-20,Yes,25.0,33146.0,Professional degree,Employed Full-time,0
FL,5,06/30/2013 02:30:54,6,Car,Someone else's home,Own home,0-5,No,23.0,33137.0,Associate's degree,Employed Full-time,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
IA,1,10/18/2013 14:30:24,1,SUV/ Crossover,Own home,Someone else's home,More than 20,No,59.0,56340.0,High school graduate,Employed Full-time,0
IA,1,10/18/2013 15:47:22,1,Car,Own home,Other,More than 20,No,56.0,54801.0,Associate's degree,Employed Full-time,0
IA,2,10/18/2013 22:31:54,1,SUV/ Crossover,Own home,Someone else's home,More than 20,No,49.0,54868.0,Some college - no degree,Employed Full-time,0
IA,2,10/18/2013 22:48:32,1,SUV/ Crossover,Own home,Someone else's home,More than 20,No,27.0,62363.0,Some college - no degree,Employed Full-time,0


In [None]:
#Declare columns/variables to keep and create a summary dataframe
#links to data dictionaries for dui studies (help decided relevant columns)
#https://www.nhtsa.gov/sites/nhtsa.dot.gov/files/documents/2007_nrsdatadictionary.pdf
#https://www.nhtsa.gov/sites/nhtsa.dot.gov/files/documents/nrs_2013datadictionary.pdf

timeday
STATE
NRS_AREA
DATE

dui_summary = pd.DataFrame({})

dui_summary.head()

In [None]:
#census data link to retrieve population information per state
#https://www.census.gov/quickfacts/fact/table/US/PST040219