In [63]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pprint import pprint
import requests
import time
from scipy.stats import linregress
import os
import csv

#Declare columns/variables to keep and create a summary dataframe
#links to data dictionaries for dui studies (help decided relevant columns)
#https://www.nhtsa.gov/sites/nhtsa.dot.gov/files/documents/2007_nrsdatadictionary.pdf
#https://www.nhtsa.gov/sites/nhtsa.dot.gov/files/documents/nrs_2013datadictionary.pdf

#census data link to retrieve population information per state
#https://www.census.gov/quickfacts/fact/table/US/PST040219

# Output File (CSV), if needed
output_2007data_file = "output_data_file/dui2007stats.csv"
output_2013data_file = "output_data_file/dui2013stats.csv"

# File to Load (**Remember to Change These file names to DUI saved file names and the path where they are finalized)
dui_2013_data = "nrs_2013_combined.csv"
dui_2007_data = "nrs_2007_final.csv"

col2007_list = ["STATE", "DATE","NRS_ZIPCODE","session","NRS_RACE","NRS_DSEX","NRS_AGE","NRS_SCHOOL","NRS_EMPLOY","NRS_VTYPE",
            "NRS_FROM","NRS_HEADED", "NRS_BETMI", "NRS_TODRK","resulbac2"]

col2013_list = ["State","MyZipCodeIs","Time_Stamp","data_session_original","race_n", "DriverAgeYears","EducationLevel","AreYouEmployed",
                "VehicleType", "FromWhere", "ToWhere","MilesTraveling", "AlcoholToday", "bac_from_blood"]

# Read DUI 2007 and 2013 Data Files and store into Pandas DataFrames(can't start with a number)
dui2013_data = pd.read_csv(dui_2013_data,low_memory=False, usecols=col2013_list)
dui2007_data = pd.read_csv(dui_2007_data,low_memory=False, usecols=col2007_list)

In [64]:
#Break out 2007 NRS_EMPLOY column to match 2013 data that is 2 columns of same info

Student_2007 = []

dui2007_data["Student_2007"] = dui2007_data.apply(lambda row: "Student" if row.NRS_EMPLOY == "Student" else "No", axis = 1)

In [65]:
#Rename column headers for uniformity

dui2007_data.rename(columns = {"STATE": "State_2007", "NRS_ZIPCODE": "Zipcode_2007", "DATE":"Date_2007","session": "Day or Night?_2007","NRS_RACE":"Race_2007", 
                               "NRS_DSEX":"Gender_2007", "NRS_AGE":"Age Range_2007","NRS_SCHOOL": "Education Level_2007","NRS_EMPLOY": "Employed_2007", 
                               "NRS_VTYPE": "Vehicle Type_2007", "NRS_FROM": "From Where_2007", "NRS_HEADED": "To Where_2007","NRS_BETMI": "Trip distance_2007", 
                               "NRS_TODRK": "Alcohol Today_2007", "resulbac2": "Blood Alcohol Content_2007"}, inplace = True)

In [66]:
#Clean up null values

dui2007_clean = dui2007_data.dropna()

In [67]:
dui2007_clean

Unnamed: 0,Day or Night?_2007,State_2007,Date_2007,From Where_2007,To Where_2007,Trip distance_2007,Alcohol Today_2007,Age Range_2007,Zipcode_2007,Education Level_2007,Employed_2007,Race_2007,Vehicle Type_2007,Gender_2007,Blood Alcohol Content_2007,Student_2007
1968,4,IL,7/22/2007,Work,Home (own home),0 - 5,No,20,60154,Some college,Employed/self-employed,Hispanic,Car,1.0,0,No
1990,3,CA,7/28/2022,Other,Home (own home),10-Jun,Yes,23,90732,High school graduate,Employed/self-employed,White,Car,2.0,0.043,No
1992,1,CA,7/27/2007,Home (own home),Restaurant / eating place,0 - 5,No,31,90503,Some college,Employed/self-employed,White,Minivan,2.0,0,No
1993,3,WI,8/4/2007,Work,Someone else's home,10-Jun,No,46,53018,Some college,Employed/self-employed,White,Car,2.0,0,No
1994,2,IL,7/21/2007,Work,Someone else's home,0 - 5,No,21,60613,College graduate,Employed/self-employed,Asian,Car,2.0,0,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11091,4,IL,7/22/2022,Other,Someone else's home,10-Jun,Yes,46,60077,High school graduate,Employed/self-employed,White,SUV,2.0,0,No
11095,5,NE,8/5/2007,Someone else's home,Someone else's home,0 - 5,Yes,25,68154,College graduate,Employed/self-employed,White,SUV,2.0,0,No
11097,2,AL,7/21/2007,Home (own home),Restaurant / eating place,More than 20,No,21,35150,High school graduate,Employed/self-employed,Black or African American,SUV,1.0,0,No
11101,2,CA,7/28/2007,Someone else's home,Home (own home),More than 20,No,28,90810,Some college,Employed/self-employed,Black or African American,Car,2.0,0.033,No


In [68]:
#Rename column headers for uniformity

dui2013_data.rename(columns = {"MyZipCodeIs": "Zipcode_2013", "Time_Stamp":"Date_2013","data_session_original": "Day or Night?_2013","race_n":"Race_2013", 
                               "DriverAgeYears":"Age Range_2013","EducationLevel": "Education Level_2013","AreYouEmployed": "Employed_2013", 
                               "VehicleType": "Vehicle Type_2013", "FromWhere": "From Where_2013", "ToWhere": "To Where_2013","MilesTraveling": "Trip distance_2013", 
                               "AlcoholToday": "Alcohol Today_2013", "bac_from_blood": "Blood Alcohol Content_2013"}, inplace = True)

In [69]:
#Clean up null values

dui2013_drop = dui2013_data.dropna()

dui2013_clean = dui2013_drop[dui2013_drop["Blood Alcohol Content"] != "#NULL!"]                              

KeyError: 'Blood Alcohol Content'

In [70]:
dui2013_clean

Unnamed: 0,Day or Night?,State,Date,Race,Vehicle Type,From Where,To Where,Trip distance,Alcohol Today,Age Range,Zipcode,Education Level,Employed,Blood Alcohol Content
3,3,CA,12/7/2013 2:34,8,Car,Someone else's home,Own home,0-5,No,19.0,93030.0,Some college - no degree,Employed Part-time,0
5,3,CA,12/7/2013 13:28,1,Car,Other,Other,More than 20,No,23.0,93033.0,Some college - no degree,Employed Full-time,0
9,1,FL,6/28/2013 10:54,1,SUV/ Crossover,Own home,Other,10-Jun,No,46.0,33125.0,9th - 11th grade,Employed Full-time,0
15,4,FL,6/29/2013 23:13,1,Car,Own home,Own home,20-Nov,Yes,25.0,33146.0,Professional degree,Employed Full-time,0
22,5,FL,6/30/2013 2:30,6,Car,Someone else's home,Own home,0-5,No,23.0,33137.0,Associate's degree,Employed Full-time,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11287,4,OK,9/7/2013 23:42,1,Pickup,Someone else's home,Own home,10-Jun,No,27.0,73159.0,High school graduate,Employed Full-time,0
11292,5,OK,9/8/2013 3:08,1,Pickup,Restaurant/eating place,Own home,0-5,Did not answer,22.0,73728.0,Professional degree,Other __________________________,0
11299,1,UT,2/7/2014 16:37,1,Car,Store or gas station,Own home,0-5,No,22.0,84010.0,High school graduate,Unemployed,0
11314,4,UT,2/8/2014 23:01,5,Car,Restaurant/eating place,Own home,0-5,No,40.0,84010.0,High school graduate,Employed Full-time,0


In [71]:
dui2007_clean.to_csv(output_2007data_file, index=False)
dui2013_clean.to_csv(output_2013data_file, index=False)

In [23]:
# Combine the data into a single dataset

dui_data_complete = pd.merge(dui2013_clean,dui2007_clean, how="left", on="State")

In [10]:
dui_data_complete.head()

Unnamed: 0,Day or Night?_x,State,Date_x,Race_x,Vehicle Type_x,From Where_x,To Where_x,Trip distance_x,Alcohol Today_x,Age Range_x,...,Alcohol Today_y,Age Range_y,Zipcode_y,Education Level_y,Employed_y,Race_y,Vehicle Type_y,Gender,Blood Alcohol Content_y,Student_2007
0,3,CA,12/7/2013 2:34,8,Car,Someone else's home,Own home,0-5,No,19.0,...,Yes,23,90732,High school graduate,Employed/self-employed,White,Car,2.0,0.043,No
1,3,CA,12/7/2013 2:34,8,Car,Someone else's home,Own home,0-5,No,19.0,...,No,31,90503,Some college,Employed/self-employed,White,Minivan,2.0,0.0,No
2,3,CA,12/7/2013 2:34,8,Car,Someone else's home,Own home,0-5,No,19.0,...,Yes,21,90503,Some college,Employed/self-employed,Hispanic,SUV,2.0,0.038,No
3,3,CA,12/7/2013 2:34,8,Car,Someone else's home,Own home,0-5,No,19.0,...,No,46,90701,High school graduate,Employed/self-employed,Hispanic,Pickup,1.0,0.0,No
4,3,CA,12/7/2013 2:34,8,Car,Someone else's home,Own home,0-5,No,19.0,...,Yes,30,90505,Some college,Employed/self-employed,White,Car,2.0,0.0,No


In [11]:
# Create an overview table grouped by State

per2007_state = dui2007_clean.set_index("State").groupby(["State"])
per2007_state.head()

Unnamed: 0_level_0,Day or Night?,Date,From Where,To Where,Trip distance,Alcohol Today,Age Range,Zipcode,Education Level,Employed,Race,Vehicle Type,Gender,Blood Alcohol Content,Student_2007
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
IL,4,7/22/2007,Work,Home (own home),0 - 5,No,20,60154,Some college,Employed/self-employed,Hispanic,Car,1.0,0,No
CA,3,7/28/2022,Other,Home (own home),10-Jun,Yes,23,90732,High school graduate,Employed/self-employed,White,Car,2.0,0.043,No
CA,1,7/27/2007,Home (own home),Restaurant / eating place,0 - 5,No,31,90503,Some college,Employed/self-employed,White,Minivan,2.0,0,No
WI,3,8/4/2007,Work,Someone else's home,10-Jun,No,46,53018,Some college,Employed/self-employed,White,Car,2.0,0,No
IL,2,7/21/2007,Work,Someone else's home,0 - 5,No,21,60613,College graduate,Employed/self-employed,Asian,Car,2.0,0,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NM,3,9/15/2007,Someone else's home,Home (own home),10-Jun,No,54,87105,Not a high school graduate,Employed/self-employed,Hispanic,Pickup,2.0,0,No
TX,3,10/13/2007,Someone else's home,Someone else's home,0 - 5,Yes,23,78221,Some college,Other,Hispanic,Car,1.0,0.065,No
TX,3,9/22/2007,Someone else's home,Home (own home),0 - 5,No,20,77099,Some college,Employed/self-employed,Black or African American,Car,1.0,0,No
NJ,3,11/3/2007,Other,Other,More than 20,Yes,27,7276,Some college,Employed/self-employed,Black or African American,Car,2.0,0,No


In [12]:
# Create an overview table grouped by State

per2013_state = dui2013_clean.set_index("State").groupby(["State"])
per2013_state.head()

Unnamed: 0_level_0,Day or Night?,Date,Race,Vehicle Type,From Where,To Where,Trip distance,Alcohol Today,Age Range,Zipcode,Education Level,Employed,Blood Alcohol Content
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
CA,3,12/7/2013 2:34,8,Car,Someone else's home,Own home,0-5,No,19.0,93030.0,Some college - no degree,Employed Part-time,0
CA,3,12/7/2013 13:28,1,Car,Other,Other,More than 20,No,23.0,93033.0,Some college - no degree,Employed Full-time,0
FL,1,6/28/2013 10:54,1,SUV/ Crossover,Own home,Other,10-Jun,No,46.0,33125.0,9th - 11th grade,Employed Full-time,0
FL,4,6/29/2013 23:13,1,Car,Own home,Own home,20-Nov,Yes,25.0,33146.0,Professional degree,Employed Full-time,0
FL,5,6/30/2013 2:30,6,Car,Someone else's home,Own home,0-5,No,23.0,33137.0,Associate's degree,Employed Full-time,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
IA,1,10/18/2013 14:30,1,SUV/ Crossover,Own home,Someone else's home,More than 20,No,59.0,56340.0,High school graduate,Employed Full-time,0
IA,1,10/18/2013 15:47,1,Car,Own home,Other,More than 20,No,56.0,54801.0,Associate's degree,Employed Full-time,0
IA,2,10/18/2013 22:31,1,SUV/ Crossover,Own home,Someone else's home,More than 20,No,49.0,54868.0,Some college - no degree,Employed Full-time,0
IA,2,10/18/2013 22:48,1,SUV/ Crossover,Own home,Someone else's home,More than 20,No,27.0,62363.0,Some college - no degree,Employed Full-time,0
