In [1]:
import os
import pandas as pd
import numpy as np
import operator

In [2]:
# read csv files in as pd dataframes

dist_2016_path = os.path.join('2016 - DISTRACT.csv')
raw2016 = pd.read_csv(dist_2016_path, header = 0)

dist_2017_path = os.path.join('2017 - DISTRACT.csv')
raw2017 = pd.read_csv(dist_2017_path, header = 0)

dist_2018_path = os.path.join('2018 - DISTRACT.csv')
raw2018 = pd.read_csv(dist_2018_path, header = 0)

acc_2016_path = os.path.join('2016 - ACCIDENT.csv')
raw2016_acc = pd.read_csv(acc_2016_path, header = 0)

acc_2017_path = os.path.join('2017 - ACCIDENT.csv')
raw2017_acc = pd.read_csv(acc_2017_path, header = 0)

acc_2018_path = os.path.join('2018 - ACCIDENT.csv')
raw2018_acc = pd.read_csv(acc_2018_path, header = 0)


In [3]:
# add a column for year to each table 

raw2016['Year'] = '2016'
raw2017['Year'] = '2017'
raw2018['Year'] = '2018'

raw2016_acc['Year'] = '2016'
raw2017_acc['Year'] = '2017'
raw2018_acc['Year'] = '2018'

In [4]:
# look at each tables shape

print("distracted tables:")
print(raw2016.shape)
print(raw2017.shape)
print(raw2018.shape)

print("accident tables:")
print(raw2016_acc.shape)
print(raw2017_acc.shape)
print(raw2018_acc.shape)

distracted tables:
(82178, 12)
(97657, 12)
(86131, 12)
accident tables:
(46511, 52)
(54969, 52)
(48443, 52)


In [5]:
# merge all 3 distracted tables together 

dis_raw = raw2016
dis_raw = dis_raw.append(raw2017)
dis_raw = dis_raw.append(raw2018)
dis_raw = dis_raw.reset_index(drop=True)
print(dis_raw.shape)
dis_raw.head(5)

(265966, 12)


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,CASENUM,MDRDSTRD,PJ,PSU,PSUSTRAT,PSU_VAR,REGION,STRATUM,URBANICITY,VEH_NO,WEIGHT,Year
0,201600014311,92,388,44,25,44,4,9,2,1,219.795475,2016
1,201600014311,0,388,44,25,44,4,9,2,2,219.795475,2016
2,201600014315,0,388,44,25,44,4,8,2,1,186.634758,2016
3,201600014315,0,388,44,25,44,4,8,2,2,186.634758,2016
4,201600014316,0,388,44,25,44,4,5,2,1,32.22255,2016


## Distracted Table Column Descriptions
- MDRDSTRD: Driver Distracted By (identifies the attribute(s) which best describe this driver’s attention to driving prior to the driver’s realization of an impending critical event or just prior to impact if realization of an impending critical event does not occur.)
- PJ:Police Jurisdiction (identifies the number of the police jurisdiction from which the police crash report was originally sampled.)
- PSU: Primary Sampling Unit (general geographic location from where police rpt was sampled. either a large central city, a county surrounding a city, or a group of counties. values: 10-83) 
- PSUSTRAT: Primary Sampling Unit Stratum (PSUs are grouped into strata to reflect the first stage of the sample selection.This data element is used by statistical software packages that use complex sample design for calculating variances, such as SUDAAN and SAS V9.)
- PSU_VAR: Primary Sampling Unit for Variance Estimation (provides the PSU identifier to be used for variance estimation. values: 10-206)
- STRATUM: Stratum (the category in which the police report was originally listed in the PARSE Program)
- VEH_NO: Vehicle Number (the consecutive number assigned to each vehicle in the case.This data element appears on each vehicle level data file and is used in conjunction with the CASENUM data element to merge information from vehicle level data files)
- WEIGHT: Case Weight (This data element is used to produce national estimates from the data)

In [6]:
dis_raw.columns

Index(['CASENUM', 'MDRDSTRD', 'PJ', 'PSU', 'PSUSTRAT', 'PSU_VAR', 'REGION',
       'STRATUM', 'URBANICITY', 'VEH_NO', 'WEIGHT', 'Year'],
      dtype='object')

In [7]:
# cut down distracted table to selected columns

dis_cut=dis_raw.loc[:,['CASENUM', 'MDRDSTRD','REGION','URBANICITY', 'VEH_NO','Year']]
dis_cut.head(5)

Unnamed: 0,CASENUM,MDRDSTRD,REGION,URBANICITY,VEH_NO,Year
0,201600014311,92,4,2,1,2016
1,201600014311,0,4,2,2,2016
2,201600014315,0,4,2,1,2016
3,201600014315,0,4,2,2,2016
4,201600014316,0,4,2,1,2016


In [8]:
#  reassigning integer labels to distraction column to groupby case num with max() taking the most detailed distraction labels
# decode other integer label columns
dis_cut['MDRDSTRD']=dis_cut['MDRDSTRD'].replace({0:0,
                                                 1:4,
                                                 3:5,
                                                 4:6,
                                                 5:7,
                                                 6:7,
                                                 7:8,
                                                 9:8,
                                                 10:9,
                                                 12:10,
                                                 13:11,
                                                 14:12,
                                                 15:6,
                                                 16:3,
                                                 17:2,
                                                 18:2,
                                                 19:2,
                                                 92:2,
                                                 93:2,
                                                 96:1,
                                                 97:13,
                                                 98:2,
                                                 99:0})
dis_cut['URBANICITY']=dis_cut['URBANICITY'].replace({1:"Urban", 2:"Rural"})
dis_cut['REGION']=dis_cut['REGION'].replace({1:"northeast",2:"midwest",3:"South",4:"West"})
print(dis_cut.shape)
dis_cut.head(5)

(265966, 6)


Unnamed: 0,CASENUM,MDRDSTRD,REGION,URBANICITY,VEH_NO,Year
0,201600014311,2,West,Rural,1,2016
1,201600014311,0,West,Rural,2,2016
2,201600014315,0,West,Rural,1,2016
3,201600014315,0,West,Rural,2,2016
4,201600014316,0,West,Rural,1,2016


In [9]:
# group distracted data by case number passing max() argument through 

dis_cut=dis_cut.groupby(['CASENUM']).max()
print(dis_cut.shape)
dis_cut.head(5)

(149923, 5)


Unnamed: 0_level_0,MDRDSTRD,REGION,URBANICITY,VEH_NO,Year
CASENUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
201600014311,2,West,Rural,2,2016
201600014315,0,West,Rural,2,2016
201600014316,0,West,Rural,1,2016
201600014335,0,West,Rural,2,2016
201600014586,0,West,Rural,1,2016


In [10]:
# decode integer lables for distraction discription 

dis_cut['MDRDSTRD']=dis_cut['MDRDSTRD'].replace({0:'Not Distracted',
                                                  4:'Looked But Did Not See',
                                                  5:'By Other Occupants',
                                                  6:'By a Moving Object In Vehicle',
                                                  7:'Cell Phone',
                                                  7:'Cell Phone',
                                                  8:'Adjusting Audio or Climate Controls',
                                                  8:'Adjusting Audio or Climate Controls',
                                                  9:'Reaching ',
                                                  10:'Distracted from outside',
                                                  11:'Eating or Drinking',
                                                  12:'Smoking Related',
                                                  6:'Cell Phone',
                                                  3:'No Driver',
                                                  2:'Distracted/Unknown Reason',
                                                  2:'Distracted/Unknown Reason',
                                                  2:'Distracted/Unknown Reason',
                                                  2:'Distracted/Unknown Reason',
                                                  2:'Distracted/Unknown Reason',
                                                  1:'Not Reported',
                                                  13:'Day Dreaming',
                                                  2:'Distracted/Unknown Reason',
                                                  0:'Not Distracted'})
print(dis_cut.shape)
dis_cut.head(5)

(149923, 5)


Unnamed: 0_level_0,MDRDSTRD,REGION,URBANICITY,VEH_NO,Year
CASENUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
201600014311,Distracted/Unknown Reason,West,Rural,2,2016
201600014315,Not Distracted,West,Rural,2,2016
201600014316,Not Distracted,West,Rural,1,2016
201600014335,Not Distracted,West,Rural,2,2016
201600014586,Not Distracted,West,Rural,1,2016


In [11]:
# create csv file for cut down distracted data table grouped with distraction encoded 

dis_cut_path = os.path.join('Results','distracted_cut_grouped.csv')
dis_cut.to_csv(dis_cut_path)

In [12]:
# merge all 3 accident data tables 

acc_raw = raw2016_acc
acc_raw = acc_raw.append(raw2017_acc)
acc_raw = acc_raw.append(raw2018_acc)
acc_raw = acc_raw.reset_index(drop=True)
print(acc_raw.shape)
acc_raw.head(5)

(149923, 52)


Unnamed: 0,ALCHL_IM,ALCOHOL,CASENUM,CF1,CF2,CF3,DAY_WEEK,EVENT1_IM,HARM_EV,HOUR,...,VE_TOTAL,WEATHER,WEATHER1,WEATHER2,WEATHR_IM,WEIGHT,WKDY_IM,WRK_ZONE,YEAR,Year
0,2,2,201600014311,0,0,0,2,12,12,12,...,2,1,1,0,1,219.795475,2,0,2016,2016
1,2,2,201600014315,0,0,0,2,12,12,19,...,2,1,1,0,1,186.634758,2,0,2016,2016
2,2,2,201600014316,0,0,0,3,43,43,6,...,1,1,1,0,1,32.22255,3,0,2016,2016
3,2,9,201600014335,0,0,0,6,12,12,20,...,2,3,3,0,3,317.681532,6,0,2016,2016
4,2,2,201600014586,0,0,0,2,8,8,8,...,1,1,1,0,1,37.251626,2,0,2016,2016


In [13]:
acc_raw.columns

Index(['ALCHL_IM', 'ALCOHOL', 'CASENUM', 'CF1', 'CF2', 'CF3', 'DAY_WEEK',
       'EVENT1_IM', 'HARM_EV', 'HOUR', 'HOUR_IM', 'INT_HWY', 'LGTCON_IM',
       'LGT_COND', 'MANCOL_IM', 'MAN_COLL', 'MAXSEV_IM', 'MAX_SEV', 'MINUTE',
       'MINUTE_IM', 'MONTH', 'NO_INJ_IM', 'NUM_INJ', 'PEDS', 'PERMVIT',
       'PERNOTMVIT', 'PJ', 'PSU', 'PSUSTRAT', 'PSU_VAR', 'PVH_INVL', 'REGION',
       'RELJCT1', 'RELJCT1_IM', 'RELJCT2', 'RELJCT2_IM', 'REL_ROAD', 'SCH_BUS',
       'STRATUM', 'TYP_INT', 'URBANICITY', 'VE_FORMS', 'VE_TOTAL', 'WEATHER',
       'WEATHER1', 'WEATHER2', 'WEATHR_IM', 'WEIGHT', 'WKDY_IM', 'WRK_ZONE',
       'YEAR', 'Year'],
      dtype='object')

## Accident Table Column Descriptions
- VE_TOTAL: Number of total motor vehicles involved in the crash
- VE_FORMS: Number of motor vehicles in transport involved in the crash
- PVH_INVL: Number of Parked/Working Vehicles involved in crash
- PERMVIT: Number of persons in motor vehicles in transport (driver, passenger, etc)
- HARM_EV: First harmful event, described the first injury or damage producing event of the crash
- MAN_COLL: Manner of collision, describes the orientation of 2 motor vehicles in-transport when they're involved in the “First Harmful Event” of crash. 
- TYP_INT: Type of Intersection, identifies & allows separation of various intersection types
- REL_ROAD: Relation to Trafficway, identifies the location of the crash as it relates to its position within or outside trafficway based on 1st harmful event
- LGT_COND: Light condition,  records the type/level of light that existed at the time of the crash 
- WEATHER: Atmospheric Conditions, recors the prevailing atmospheric conditions that existed at the time of the crash
- SCH_BUS: School Bus Related, indentifies if a school bus/motor vehicle functioning as a school bus related to the crash
- INT_HWY: Interstate Highway, indentifies whether the crash occured on an interstate highway 
- MAX_SEV: Maximum Injury Severity in Crash, records the single most severe injury of all persons involved in the crash 
- NUM_INJ: Number Injured in Crash, records number of persons injured in crash 
- ALCOHOL: Alcohol Involved in Crash, records alcohol use for those invovled in the crash

In [19]:
# cut down accident table to selected columns

acc_cut=acc_raw.loc[:,['CASENUM', 'ALCOHOL', 'DAY_WEEK','HARM_EV', 'HOUR', 'INT_HWY',
                       'LGT_COND','MAN_COLL', 'MAX_SEV','MONTH','NUM_INJ', 'PERMVIT', 
                       'PVH_INVL',  'REL_ROAD', 'SCH_BUS','TYP_INT', 'VE_FORMS', 'VE_TOTAL', 
                       'WEATHER','Year']]
print(acc_cut.shape)
acc_cut.head(5)

(149923, 20)


Unnamed: 0,CASENUM,ALCOHOL,DAY_WEEK,HARM_EV,HOUR,INT_HWY,LGT_COND,MAN_COLL,MAX_SEV,MONTH,NUM_INJ,PERMVIT,PVH_INVL,REL_ROAD,SCH_BUS,TYP_INT,VE_FORMS,VE_TOTAL,WEATHER,Year
0,201600014311,2,2,12,12,0,1,7,0,1,0,2,0,1,0,1,2,2,1,2016
1,201600014315,2,2,12,19,0,3,2,1,1,4,4,0,1,0,98,2,2,1,2016
2,201600014316,2,3,43,6,0,3,0,3,1,1,1,0,4,0,1,1,1,1,2016
3,201600014335,9,6,12,20,0,3,2,0,1,0,2,0,1,0,98,2,2,3,2016
4,201600014586,2,2,8,8,0,1,0,2,1,1,1,0,1,0,1,1,1,1,2016


In [20]:
# decode integer labels for selected columns

acc_cut['ALCOHOL']=acc_cut['ALCOHOL'].replace({1:"Alcohol Involved", 2:"No Alcohol Involved", 8:"No Applicable Person", 9:"Unknown"})
acc_cut['DAY_WEEK']=acc_cut['DAY_WEEK'].replace({1:"Sunday", 2:"Monday", 3:"Tuesday", 4:"Wednesday", 5:"Thursday", 6:"Friday", 7:"Saturday", 9:"Unknown"})
acc_cut['HOUR']=acc_cut['HOUR'].replace({0:"12AM", 1:"1AM", 2:"2AM", 3:"3AM", 4:"4AM", 5:"5AM", 6:"6AM", 7:"7AM", 8:"8AM", 9:"9AM", 10:"10AM", 11:"11AM", 12:"12PM", 13:"1PM", 14:"2PM", 15:"3PM", 16:"4PM", 17:"5PM", 18:"6PM", 19:"7PM", 20:"8PM", 21:"9PM", 22:"10PM", 23:"11PM"})
acc_cut['INT_HWY']=acc_cut['INT_HWY'].replace({0:"No", 1:"Yes", 9:"Unknown"})
acc_cut['LGT_COND']=acc_cut['LGT_COND'].replace({1:"daylight",2:"dark not lighted",3:"dark lighted",4:"dawn",5:"dusk",6:"dark unknown lighting",7:"other",8:"not reported", 9:"unknown"})
acc_cut['MAN_COLL']=acc_cut['MAN_COLL'].replace({0:"Not Collision", 1:"Front-to-Rear", 2:"Front-to-front", 6:"Angle", 7:"Sideswipe, same direction", 8:"Sideswipe, opposite direction", 9:"Rear-to-side", 10:"Rear-to-rear", 11:"Other",98:"Not Reported/Unknown", 99:"Not Reported/Unknown"})
acc_cut['MAX_SEV']=acc_cut['MAX_SEV'].replace({0:"No Apparent Injury", 1:"Possible Injury", 2:"Suspected Minor Injury", 3:"Suspected Serious Injury", 4:"Fatal", 5:"Injured, Severity Unknown", 6:"Died Prior to Crash", 8:"No Person Involved in Crash", 9:"Unknown/Not Reported"})
acc_cut['NUM_INJ']=acc_cut['NUM_INJ'].replace({0:"No Person Inj/Prop Damage Only", 98:"No Person Involved in Crash", 99:"All Persons in Crash Unknown if Inj"})
acc_cut['REL_ROAD']=acc_cut['REL_ROAD'].replace({1:"on roadway",2:"on shoulder", 3:"on median", 4:"on roadside", 5:"outside traffiway", 6:"off roadway", 7:"parking lane/zone", 8:"gore", 10:"separator", 11:"continuous left turn lane", 98:"Not Reported/Unknown", 99:"Not Reported/Unknown"})
acc_cut['SCH_BUS']=acc_cut['SCH_BUS'].replace({0:"No", 1:"Yes"})
acc_cut['TYP_INT']=acc_cut['TYP_INT'].replace({1:"Not an Intersection", 2:"Four-way int.", 3:"T-int.", 4:"Y-int.", 5:"Traffic circle", 6:"Roundabout", 7:"Five-point or more", 10:"L-int.", 98:"Not Reported/Unknown", 99:"Not Reported/Unknown"})
acc_cut['WEATHER']=acc_cut['WEATHER'].replace({0:"No Add. Condition", 1:"Clear", 2:"Rain", 3:"Sleet or Hail", 4:"Snow",5:"Fog,Smog,Smoke",6:"Severe Crosswinds",7:"Blowing Sand, Soil, Dirt",8:"Other",10:"Cloudy", 11:"Blowing Snow", 12:"Freezing Rain or Drizzle", 98:"Not Reported", 99:"Unknown"})


In [21]:
# decode integer labels for selected columns continued 

acc_cut['HARM_EV']=acc_cut['HARM_EV'].replace({1:"Non-collision harmful events",
                                                      2:"Non-collision harmful events",
                                                      3:"Non-collision harmful events",
                                                      4:"Non-collision harmful events",
                                                      5:"Non-collision harmful events",
                                                      6:"Non-collision harmful events",
                                                      7:"Non-collision harmful events",
                                                      16:"Non-collision harmful events",
                                                      44:"Non-collision harmful events",
                                                      51:"Non-collision harmful events",
                                                      72:"Non-collision harmful events",
                                                      12:"Collision w Motor Vehicle in Transport",
                                                      54:"Collision w Motor Vehicle in Transport",
                                                      55:"Collision w Motor Vehicle in Transport",
                                                      8:"Collision w Object not fixed",
                                                      9:"Collision w Object not fixed",
                                                      10:"Collision w Object not fixed",
                                                      11:"Collision w Object not fixed",
                                                      14:"Collision w Object not fixed",
                                                      15:"Collision w Object not fixed",
                                                      18:"Collision w Object not fixed",
                                                      45:"Collision w Object not fixed",
                                                      49:"Collision w Object not fixed",
                                                      73:"Collision w Object not fixed",
                                                      74:"Collision w Object not fixed",
                                                      91:"Collision w Object not fixed",
                                                      17:"Collision w Fixed Object",
                                                      19:"Collision w Fixed Object",
                                                      20:"Collision w Fixed Object",
                                                      21:"Collision w Fixed Object",
                                                      23:"Collision w Fixed Object",
                                                      24:"Collision w Fixed Object",
                                                      25:"Collision w Fixed Object",
                                                      26:"Collision w Fixed Object",
                                                      30:"Collision w Fixed Object",
                                                      31:"Collision w Fixed Object",
                                                      32:"Collision w Fixed Object",
                                                      33:"Collision w Fixed Object",
                                                      34:"Collision w Fixed Object",
                                                      35:"Collision w Fixed Object",
                                                      38:"Collision w Fixed Object",
                                                      39:"Collision w Fixed Object",
                                                      40:"Collision w Fixed Object",
                                                      41:"Collision w Fixed Object",
                                                      42:"Collision w Fixed Object",
                                                      43:"Collision w Fixed Object",
                                                      46:"Collision w Fixed Object",
                                                      48:"Collision w Fixed Object",
                                                      50:"Collision w Fixed Object",
                                                      52:"Collision w Fixed Object",
                                                      53:"Collision w Fixed Object",
                                                      57:"Collision w Fixed Object",
                                                      58:"Collision w Fixed Object",
                                                      59:"Collision w Fixed Object",
                                                      93:"Collision w Fixed Object",
                                                      99:"Collision w Fixed Object"})

In [24]:
acc_cut.head(5)

Unnamed: 0,CASENUM,ALCOHOL,DAY_WEEK,HARM_EV,HOUR,INT_HWY,LGT_COND,MAN_COLL,MAX_SEV,MONTH,NUM_INJ,PERMVIT,PVH_INVL,REL_ROAD,SCH_BUS,TYP_INT,VE_FORMS,VE_TOTAL,WEATHER,Year
0,201600014311,No Alcohol Involved,Monday,Collision w Motor Vehicle in Transport,12PM,No,daylight,"Sideswipe, same direction",No Apparent Injury,1,No Person Inj/Prop Damage Only,2,0,on roadway,No,Not an Intersection,2,2,Clear,2016
1,201600014315,No Alcohol Involved,Monday,Collision w Motor Vehicle in Transport,7PM,No,dark lighted,Front-to-front,Possible Injury,1,4,4,0,on roadway,No,Not Reported/Unknown,2,2,Clear,2016
2,201600014316,No Alcohol Involved,Tuesday,Collision w Fixed Object,6AM,No,dark lighted,Not Collision,Suspected Serious Injury,1,1,1,0,on roadside,No,Not an Intersection,1,1,Clear,2016
3,201600014335,Unknown,Friday,Collision w Motor Vehicle in Transport,8PM,No,dark lighted,Front-to-front,No Apparent Injury,1,No Person Inj/Prop Damage Only,2,0,on roadway,No,Not Reported/Unknown,2,2,Sleet or Hail,2016
4,201600014586,No Alcohol Involved,Monday,Collision w Object not fixed,8AM,No,daylight,Not Collision,Suspected Minor Injury,1,1,1,0,on roadway,No,Not an Intersection,1,1,Clear,2016


In [25]:
print(dis_cut.shape)
print(acc_cut.shape)

(149923, 5)
(149923, 20)


In [26]:
dis_acc=pd.merge(dis_cut,acc_cut,on="CASENUM",how="left")
dis_acc.shape

(149923, 25)

In [28]:
dis_acc.head(10)

Unnamed: 0,CASENUM,MDRDSTRD,REGION,URBANICITY,VEH_NO,Year_x,ALCOHOL,DAY_WEEK,HARM_EV,HOUR,...,NUM_INJ,PERMVIT,PVH_INVL,REL_ROAD,SCH_BUS,TYP_INT,VE_FORMS,VE_TOTAL,WEATHER,Year_y
0,201600014311,Distracted/Unknown Reason,West,Rural,2,2016,No Alcohol Involved,Monday,Collision w Motor Vehicle in Transport,12PM,...,No Person Inj/Prop Damage Only,2,0,on roadway,No,Not an Intersection,2,2,Clear,2016
1,201600014315,Not Distracted,West,Rural,2,2016,No Alcohol Involved,Monday,Collision w Motor Vehicle in Transport,7PM,...,4,4,0,on roadway,No,Not Reported/Unknown,2,2,Clear,2016
2,201600014316,Not Distracted,West,Rural,1,2016,No Alcohol Involved,Tuesday,Collision w Fixed Object,6AM,...,1,1,0,on roadside,No,Not an Intersection,1,1,Clear,2016
3,201600014335,Not Distracted,West,Rural,2,2016,Unknown,Friday,Collision w Motor Vehicle in Transport,8PM,...,No Person Inj/Prop Damage Only,2,0,on roadway,No,Not Reported/Unknown,2,2,Sleet or Hail,2016
4,201600014586,Not Distracted,West,Rural,1,2016,No Alcohol Involved,Monday,Collision w Object not fixed,8AM,...,1,1,0,on roadway,No,Not an Intersection,1,1,Clear,2016
5,201600014593,Not Distracted,West,Rural,2,2016,No Alcohol Involved,Wednesday,Collision w Motor Vehicle in Transport,3PM,...,No Person Inj/Prop Damage Only,5,0,on roadway,No,Not an Intersection,2,2,Clear,2016
6,201600014603,Not Distracted,West,Rural,4,2016,No Alcohol Involved,Friday,Collision w Motor Vehicle in Transport,1PM,...,1,4,0,on roadway,No,Not an Intersection,4,4,Clear,2016
7,201600014610,Not Distracted,West,Rural,2,2016,No Alcohol Involved,Monday,Collision w Motor Vehicle in Transport,5PM,...,1,2,0,on roadway,No,Not an Intersection,2,2,Clear,2016
8,201600014622,Not Distracted,West,Rural,2,2016,No Alcohol Involved,Thursday,Collision w Motor Vehicle in Transport,6PM,...,No Person Inj/Prop Damage Only,3,0,on roadway,No,Four-way int.,2,2,Clear,2016
9,201600014624,Not Distracted,West,Rural,1,2016,Alcohol Involved,Friday,Non-collision harmful events,12AM,...,1,1,0,on roadway,No,T-int.,1,1,Clear,2016


In [29]:
dis_acc_path = os.path.join('Results','accident_distracted_clean.csv')
dis_acc.to_csv(dis_acc_path)