In [1]:
# Import Modules
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

#modeling tools
import statsmodels.api as sm

import lightgbm as lgb

from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, confusion_matrix, auc, roc_auc_score, roc_curve

%matplotlib inline
plt.style.use('dark_background')

%load_ext autoreload
%autoreload 2

pd.set_option('display.min_row', 25)
pd.set_option('display.max_column', 200)
pd.set_option('display.max_colwidth', 300)

In [2]:
all_sdot = pd.read_csv('../data/processed/all_sdot_merged_with_wsdot_080820.csv', low_memory=False)

In [3]:
all_sdot.head()

Unnamed: 0,longitude,latitude,OBJECTID,INCKEY,COLDETKEY,REPORTNO,STATUS,ADDRTYPE,INTKEY,LOCATION,EXCEPTRSNCODE,EXCEPTRSNDESC,SEVERITYCODE,SEVERITYDESC,COLLISIONTYPE,PERSONCOUNT,PEDCOUNT,PEDCYLCOUNT,VEHCOUNT,INJURIES,SERIOUSINJURIES,FATALITIES,INCDATE,INCDTTM,JUNCTIONTYPE,SDOT_COLCODE,SDOT_COLDESC,INATTENTIONIND,UNDERINFL,WEATHER_x,ROADCOND,LIGHTCOND,SDOTCOLNUM,ST_COLCODE,ST_COLDESC,SEGLANEKEY,CROSSWALKKEY,HITPARKEDCAR,JURISDICTION,COUNTY,CITY,INDEXED PRIMARY TRAFFICWAY,PRIMARY TRAFFICWAY,BLOCK NUMBER,MILEPOST,INTERSECTING TRAFFICWAY,DIST FROM REF POINT,MI or FT,COMP DIR FROM REF POINT,REFERENCE POINT NAME,DATE,YEAR,QTR #,MONTH,FULL TIME,MOST SEVERE SOBRIETY TYPE,MOST SEVERE INJURY TYPE,TOTAL CRASHES,FATAL CRASHES,SERIOUS INJURY CRASHES,EVIDENT INJURY CRASHES,POSSIBLE INJURY CRASHES,PDO - NO INJURY CRASHES,TOTAL FATALITIES,TOTAL SERIOUS INJURIES,TOTAL EVIDENT INJURIES,TOTAL POSSIBLE INJURIES,TOTAL VEHICLES,TOTAL PEDESTRIANS INVOLVED,TOTAL BICYCLISTS INVOLVED,FIRST COLLISION TYPE / OBJECT STRUCK,SECOND COLLISION TYPE / OBJECT STRUCK,JUNCTION RELATIONSHIP,WEATHER_y,ROAD SURFACE CONDITIONS,LIGHTING CONDITIONS,ARM,History/Suspense Ind,"FIRST IMPACT LOCATION (City St, County Rd & Misc 2010 forward, State Routes indefinite)",SECOND IMPACT LOCATION (2010 forward for All Jurisdictions),"SR ONLY, VEH 1 MILEPOST DIRECTION","SR ONLY, VEH 1 MOVEMENT","SR ONLY, VEH 2 MILEPOST DIRECTION","SR ONLY, VEH 2 MOVEMENT",VEH 1 TYPE,VEH 1 MAKE,VEH 1 MODEL,VEH 1 STYLE,VEH 1 ACTION,VEH 1 COMPASS DIRECTION FROM,VEH 1 COMPASS DIRECTION TO,VEH 1 USAGE,VEH 1 TRAFFIC CONTROL,VEH 1 POSTED SPEED,VEH 1 CONDITION 1,VEH 2 TYPE,VEH 2 MAKE,VEH 2 MODEL,VEH 2 STYLE,VEH 2 ACTION,VEH 2 COMPASS DIRECTION FROM,VEH 2 COMPASS DIRECTION TO,VEH 2 USAGE,VEH 2 TRAFFIC CONTROL,VEH 2 POSTED SPEED,VEH 2 CONDITION 1,VEH 3 TYPE,VEH 3 MAKE,VEH 3 STYLE,VEH 3 MODEL,VEH 3 ACTION,VEH 1 MV DRIVER AGE,VEH 1 MV DRIVER GENDER,VEH 1 MV DRIVER INJURY TYPE,VEH 1 MV DRIVER CONTRIBUTING CIRCUMSTANCE 1,VEH 1 MV DRIVER CONTRIBUTING CIRCUMSTANCE 2,VEH 1 MV DRIVER RESTRAINT,VEH 1 MV DRIVER EJECTION,VEH 1 MV DRIVER MISC ACTION 1,VEH 1 MV DRIVER SEQUENCE 1,VEH 1 MV DRIVER SEQUENCE 2,VEH 2 MV DRIVER AGE,VEH 2 MV DRIVER GENDER,VEH 2 MV DRIVER INJURY TYPE,VEH 2 MV DRIVER CONTRIBUTING CIRCUMSTANCE 1,VEH 2 MV DRIVER RESTRAINT,VEH 2 MV DRIVER EJECTION,VEH 2 MV DRIVER MISC ACTION 1,VEH 2 MV DRIVER SEQUENCE 1,VEH 3 MV DRIVER SEQUENCE 1,HIT & RUN,NON-REPORTABLE,WA STATE PLANE SOUTH - X,WA STATE PLANE SOUTH - Y,TARGET ZERO Reportable Indicator,TZ Work Zone Related Collision Indicator,TZ Intersection Related Collision Indicator,TZ Wrong Way Vehicle Indicator,TZ Wrong Way Vehicle Count,TZ Alcohol Impaired Involved Person Indicator,TZ Alcohol Impaired Involved Person Count,TZ Drug Impaired Involved Person Indicator,TZ Drug Impaired Involved Person Count,TZ Impaired Involved Person Indicator,TZ Impaired Involved Person Count,TZ Drinking Involved Person Indicator,TZ Drinking Involved Person Count,TZ Speeding Driver Indicator,TZ Speeding Driver Count,TZ Distracted Driver Indicator,TZ Distracted Driver Count,TZ Unrestrained Occupant Indicator,TZ Unrestrained Occupant Count,TZ Unlicensed Driver Indicator,TZ Unlicensed Driver Count,TZ Drowsy Driver Indicator,TZ Drowsy Driver Count,TZ Run Off The Road Indicator,TZ Non Junction Opposite Direction Crash Indicator,TZ Lane Departure Indicator,TZ MV Driver 16 To 25 Years Involved Person Indicator,TZ MV Driver 16 To 25 Years Involved Person Count,TZ MV Driver 65 Plus Years Involved Person Indicator,TZ MV Driver 65 Plus Years Involved Person Count,TZ MV Driver 70 Plus Years Involved Person Indicator,TZ MV Driver 70 Plus Years Involved Person Count,TZ Wildlife Involved Indicator,TZ Motorcycle Collision Indicator,TZ Total Motorcycles Involved Count,TZ School Bus Involved Indicator,TZ School Bus Involved Count,TZ Heavy Vehicle Crash Indicator,TZ Heavy Vehicle Crash Count,TZ Vehicle Train Crash Indicator,TZ Catostrophic Event Indicator,TZ Fatal Crash Indicator,TZ Fatality Count,TZ Suspected Serious Injury Crash Indicator,TZ Suspected Serious Injury Count,TZ Pedestrian Involved Indicator,TZ Pedacyclist Involved Indicator
0,-122.309876,47.639559,1,74300,74300,2827840,Matched,Block,,E LYNN ST BETWEEN BOYER AVE E AND 18TH AVE E,,,1,Property Damage Only Collision,Parked Car,2,0,0,2,0,0,0,2007/09/16 00:00:00+00,2007-09-16 18:53:00,Mid-Block (but intersection related),13.0,"MOTOR VEHICLE STRUCK MOTOR VEHICLE, LEFT SIDE AT ANGLE",Y,0,Clear,Dry,Daylight,7259018.0,32,One parked--one moving,0,0,N,City Street,King,Seattle,E LYNN ST,E LYNN ST,1627.0,,,30.0,F,W,18 AV E,9/16/2007,2007,Q3,Sep,6:53 PM,Unknown,No Apparent Injury,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,One parked--one moving,,Not at Intersection and Not Related,Clear or Partly Cloudy,Dry,Daylight,,No,,,,,,,Truck & Trailer,INTERNATIONAL,4000,CONVENTIONAL CAB,Making Left Turn,West,North,,No Traffic Control,30.0,No Defects,"Pickup,Panel Truck or Vanette under 10,000 lb",FORD,EXPLORER,WAGON 4 DOOR,"Legally Parked, Unoccupied",,,"Vanette Under 10,000 lb",,,,,,,,,,,,Inattention,,,,Hit and run,Collision Involving Parked Vehicle,,,,,,,,,Collision Involving Motor Vehicle in Transport,,Yes,Reportable,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-122.342572,47.607795,2,56900,56900,2625188,Matched,Block,,ALASKAN WAY BETWEEN ALASKAN E RDWY WAY AND PIKE ST,,,1,Property Damage Only Collision,Parked Car,2,0,0,2,0,0,0,2006/11/24 00:00:00+00,2006-11-24 14:45:00,Mid-Block (not related to intersection),13.0,"MOTOR VEHICLE STRUCK MOTOR VEHICLE, LEFT SIDE AT ANGLE",,0,Clear,Dry,Daylight,6328024.0,32,One parked--one moving,0,0,N,City Street,King,Seattle,ALASKAN WY,ALASKAN WAY,,,,,,W,PIKE PLACE,11/24/2006,2006,Q4,Nov,2:45 PM,Unknown,No Apparent Injury,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,One parked--one moving,,Not at Intersection and Not Related,Clear or Partly Cloudy,Dry,Daylight,,No,,,,,,,"Pickup,Panel Truck or Vanette under 10,000 lb",DODGE,GRAND CARAVAN,SPORT VAN,Going Straight Ahead,South,North,"Vanette Under 10,000 lb",No Traffic Control,,No Defects,"Pickup,Panel Truck or Vanette under 10,000 lb",HONDA,CR-V,WAGON 4 DOOR,"Legally Parked, Unoccupied",,,"Vanette Under 10,000 lb",,,,,,,,,,,,Other Contributing Circ Not Listed,,,,Hit and run,Collision Involving Parked Vehicle,,,,,,,,,Collision Involving Motor Vehicle in Transport,,Yes,Reportable,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-122.319539,47.619917,3,61600,61600,2617309,Matched,Intersection,29053.0,10TH AVE E AND E JOHN ST,,,1,Property Damage Only Collision,Angles,2,0,0,2,0,0,0,2006/12/27 00:00:00+00,2006-12-27 12:28:00,At Intersection (intersection related),11.0,"MOTOR VEHICLE STRUCK MOTOR VEHICLE, FRONT END AT ANGLE",Y,0,Overcast,Dry,Daylight,6361017.0,10,Entering at angle,0,0,N,City Street,King,Seattle,E JOHN ST,E JOHN ST,,,10 AVE E,,,,,12/27/2006,2006,Q4,Dec,12:28 PM,Had NOT Been Drinking,No Apparent Injury,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,Entering at angle,,At Intersection and Related,Overcast,Dry,Daylight,,No,,,,,,,"Pickup,Panel Truck or Vanette under 10,000 lb",TOYOTA,4RUNNER,WAGON 4 DOOR,Backing,East,Vehicle Backing,"Vanette Under 10,000 lb",Unknown,30.0,No Defects,Passenger Car,AUDI,A4,Sedan 4 Dr,Making Right Turn,North,West,,Stop Sign,30.0,No Defects,,,,,,51.0,Male,No Apparent Injury,Inattention,Improper Backing,Lap & Shoulder Used,Unknown if Ejected,,Collision Involving Motor Vehicle in Transport,,41.0,Female,No Apparent Injury,,Lap & Shoulder Used,Unknown if Ejected,,Collision Involving Motor Vehicle in Transport,,No,Reportable,,,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-122.344997,47.725036,4,325640,327140,E955226,Matched,Block,,AURORA AVE N BETWEEN N 130TH ST AND N 135TH ST,,,2,Injury Collision,Angles,2,0,0,2,1,0,0,2019/08/19 00:00:00+00,2019-08-19 16:35:00,Driveway Junction,11.0,"MOTOR VEHICLE STRUCK MOTOR VEHICLE, FRONT END AT ANGLE",,N,Clear,Dry,Daylight,,10,Entering at angle,0,0,N,State Route,King,Seattle,99,99,,40.08,,,,,,8/19/2019,2019,Q3,Aug,4:35 PM,Had NOT Been Drinking,Possible Injury,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,Entering at angle,,At Driveway,Clear or Partly Cloudy,Dry,Daylight,33.66,No,Lane 3 Increasing Milepost,,Entering major roadway from the right,Turning Left,Increasing milepost of major roadway,Moving Straight,Passenger Car,TOYOTA,CAMRY,Sedan,Making Left Turn,East,South,,No Traffic Control,,No Defects,Motorcycle,YAMAHA,FZ6,Motorcycle (MT),Going Straight Ahead,South,North,,No Traffic Control,,No Defects,,,,,,24.0,Male,No Apparent Injury,Did Not Grant RW to Vehicle,,Lap & Shoulder Used,Not Ejected,,Collision Involving Motor Vehicle in Transport,,24.0,Male,Possible Injury,,No Restraints Used,Totally Ejected,,Collision Involving Motor Vehicle in Transport,,No,Reportable,1186303.48,878026.49,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-122.325059,47.598352,5,325615,327115,E978670,Matched,Intersection,30515.0,MAYNARD AVE S AND S KING ST,,,2,Injury Collision,Pedestrian,3,2,0,1,1,0,0,2019/10/28 00:00:00+00,2019-10-28 18:55:00,At Intersection (intersection related),24.0,MOTOR VEHCILE STRUCK PEDESTRIAN,Y,N,Clear,Dry,Daylight,,0,Vehicle going straight hits pedestrian,0,0,N,City Street,King,Seattle,S KING ST,S KING ST,0.0,,MAYNARD AVE S,,,,,10/28/2019,2019,Q4,Oct,6:55 PM,Had NOT Been Drinking,Suspected Minor Injury,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,0.0,Vehicle going straight hits pedestrian,Vehicle going straight hits pedestrian,At Intersection and Related,Clear or Partly Cloudy,Dry,Daylight,,No,Lane of Primary Trafficway,Lane of Primary Trafficway,,,,,Passenger Car,TOYOTA,PRIUS,Hatchback,Going Straight Ahead,West,East,,Stop Sign,,No Defects,,,,,,,,,,,,,,,,,62.0,Male,No Apparent Injury,Inattention,,Lap & Shoulder Used,Not Ejected,,Collision Involving Pedestrian,Collision Involving Pedestrian,,,,,,,,,,No,Reportable,1190125.83,831250.22,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [17]:
all_sdot.shape

(220808, 181)

7403