# Standing General Order on Crash Reporting

##### The following data was taken from the "Standing General Order on Crash Reporting" that was published on 08-15-2024

In [1]:
from sqlalchemy import create_engine, text, inspect, func
import sqlite3

# ORM imports
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import Column, Integer, String, Float, Boolean
from sqlalchemy.orm import Session
from sqlalchemy.ext.declarative import declarative_base

# API
import requests
import json

# Data Science and Visualization
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy import stats
import datetime as dt

In [2]:
#CSV import for crashes table
filepath = "Datasets/SGO-2021-01_Incident_Reports_ADS.csv"
df = pd.read_csv(filepath)
df.head()

Unnamed: 0,Report ID,Report Version,Reporting Entity,Report Type,Report Month,Report Year,Report Submission Date,VIN,VIN - Unknown,Serial Number,...,Investigating Officer Name,Inv. Officer Name - Unknown,Investigating Officer Phone,Inv. Officer Phone - Unknown,Investigating Officer Email,Inv. Officer Email - Unknown,Within ODD?,Within ODD? - CBI,Narrative,Narrative - CBI?
0,30270-7157,3,Waymo LLC,Update,,,AUG-2024,SADHW2S13M1,,,...,,,,,,,Yes,,Waymo is submitting a second update to the rep...,
1,30270-8406,1,Waymo LLC,Monthly,7.0,2024.0,AUG-2024,SADHW2S1XR1,,,...,,,,,,,Yes,,"On July [XXX], 2024 at 4:58 AM PT a Waymo Auto...",
2,30270-8405,1,Waymo LLC,Monthly,7.0,2024.0,AUG-2024,SADHW2S17R1,,,...,,,,,,,Yes,,"On July [XXX], 2024 at 11:29 AM MT a Waymo Aut...",
3,30270-8404,1,Waymo LLC,Monthly,7.0,2024.0,AUG-2024,SADHW2S1XR1,,,...,,,,,,,Yes,,"On July [XXX], 2024 at 4:16 PM PT a Waymo Auto...",
4,30270-8403,1,Waymo LLC,Monthly,7.0,2024.0,AUG-2024,SADHW2S19R1,,,...,,,,,,,Yes,,"On July [XXX], 2024 at 11:06 AM CST a Waymo Au...",


In [3]:
df.columns

Index(['Report ID', 'Report Version', 'Reporting Entity', 'Report Type',
       'Report Month', 'Report Year', 'Report Submission Date', 'VIN',
       'VIN - Unknown', 'Serial Number',
       ...
       'Investigating Officer Name', 'Inv. Officer Name - Unknown',
       'Investigating Officer Phone', 'Inv. Officer Phone - Unknown',
       'Investigating Officer Email', 'Inv. Officer Email - Unknown',
       'Within ODD?', 'Within ODD? - CBI', 'Narrative', 'Narrative - CBI?'],
      dtype='object', length=137)

In [4]:
cols_to_drop = ['Report ID', 'Report Version', 'Report Month', 'Report Year','Reporting Entity','Report Submission Date',\
                'VIN', 'Same Vehicle ID', 'Mileage','VIN - Unknown', 'Serial Number',\
               'Model - Unknown', 'Model Year - Unknown','Same Vehicle ID', 'Mileage',\
               'Report Type', 'Mileage - Unknown', 'Driver / Operator Type', 'ADAS/ADS System Version','ADAS/ADS System Version - Unk',\
                'ADAS/ADS System Version CBI','ADAS/ADS Hardware Version', 'ADAS/ADS Hardware Version - Unk','Investigating Officer Name',\
                'Inv. Officer Name - Unknown', 'Investigating Officer Phone', 'Inv. Officer Phone - Unknown','Investigating Officer Email',\
                'Inv. Officer Email - Unknown','Within ODD?', 'Within ODD? - CBI', 'Narrative', 'Narrative - CBI?','ADAS/ADS Hardware Version CBI',\
               'ADAS/ADS Software Version', 'ADAS/ADS Software Version - Unk',\
               'ADAS/ADS Software Version CBI', 'Other Reporting Entities?',\
               'Other Reporting Entities? - Unk', 'Other Reporting Entities? - NA',\
               'Federal Regulatory Exemption?',\
               'Data Availability - Telematics', 'Data Availability - Complaints',\
               'Data Availability - Video', 'Data Availability - Other',\
               'Data Availability - No Data', 'Data Availability - Unknown',\
               'Law Enforcement Investigating?', 'Investigating Agency',\
               'Investigating Agency - Unknown', 'Rep Ent Or Mfr Investigating?','Other Federal Reg. Exemption',\
               'Federal Reg. Exemption - Unk', 'Federal Reg. Exemption - No',\
               'State or Local Permit?', 'State or Local Permit', 'ADS Equipped?',\
               'Automation System Engaged?', 'Source - Complaint/Claim',\
               'Source - Telematics', 'Source - Law Enforcement',\
               'Source - Field Report', 'Source - Testing', 'Source - Media',\
               'Source - Other', 'Source - Other Text','Incident Date - Unknown', 'Notice Received Date',\
               'Same Incident ID','Address', 'Address - Unknown','Property Damage?',\
               'CP Pre-Crash Movement', 'CP Any Air Bags Deployed?','CP Was Vehicle Towed?', 'CP Contact Area - Rear Left',\
               'CP Contact Area - Left', 'CP Contact Area - Front Left','CP Contact Area - Rear', 'CP Contact Area - Top',\
               'CP Contact Area - Front', 'CP Contact Area - Rear Right','CP Contact Area - Right', 'CP Contact Area - Front Right',\
               'CP Contact Area - Bottom', 'CP Contact Area - Unknown','SV Pre-Crash Movement', 'SV Any Air Bags Deployed?',\
               'SV Was Vehicle Towed?','SV Precrash Speed (MPH)', 'SV Pre-crash Speed - Unknown','SV Contact Area - Rear Left',\
                'SV Contact Area - Left','SV Contact Area - Front Left', 'SV Contact Area - Rear','SV Contact Area - Top',\
                'SV Contact Area - Front','SV Contact Area - Rear Right', 'SV Contact Area - Right','SV Contact Area - Front Right',\
                'SV Contact Area - Bottom','SV Contact Area - Unknown', 'Data Availability - EDR','Data Availability - Police Rpt',\
               'Operating Entity','Operating Entity - Unknown','Crash With', 'Weather - Unknown','Incident Time - Unknown', 'Latitude',\
                'Latitude - Unknown','Longitude', 'Longitude - Unknown','City - Unknown','Zip Code','Zip Code - Unknown',\
                'Posted Speed Limit - Unknown', 'Weather - Other', 'Weather - Other Text', ]
df2 = df.drop(cols_to_drop, axis=1)
df2.head()

Unnamed: 0,Make,Model,Model Year,Incident Date,Incident Time (24:00),City,State,Roadway Type,Roadway Surface,Roadway Description,Posted Speed Limit (MPH),Lighting,Weather - Clear,Weather - Snow,Weather - Cloudy,Weather - Fog/Smoke,Weather - Rain,Weather - Severe Wind,Highest Injury Severity Alleged,SV Were All Passengers Belted?
0,Jaguar,I-Pace,2021.0,JAN-2024,12:04,Los Angeles,CA,Street,Dry,No Unusual Conditions,25.0,Daylight,Y,,,,,,Unknown,"No, see Narrative"
1,Jaguar,I-Pace,2024.0,JUL-2024,04:58,San Francisco,CA,Street,Dry,No Unusual Conditions,25.0,Dark - Lighted,Y,,,,,,No Injuries Reported,No Passengers in Vehicle
2,Jaguar,I-Pace,2024.0,JUL-2024,11:29,Phoenix,AZ,Parking Lot,Dry,No Unusual Conditions,5.0,Daylight,Y,,,,,,No Injuries Reported,Yes
3,Jaguar,I-Pace,2024.0,JUL-2024,18:16,San Francisco,CA,Street,Dry,No Unusual Conditions,25.0,Daylight,Y,,,,,,No Injuries Reported,No Passengers in Vehicle
4,Jaguar,I-Pace,2024.0,JUL-2024,11:06,Austin,TX,Street,Dry,No Unusual Conditions,40.0,Daylight,,,Y,,,,No Injuries Reported,Yes


In [5]:
# Refreshed look at columns
df2.columns

Index(['Make', 'Model', 'Model Year', 'Incident Date', 'Incident Time (24:00)',
       'City', 'State', 'Roadway Type', 'Roadway Surface',
       'Roadway Description', 'Posted Speed Limit (MPH)', 'Lighting',
       'Weather - Clear', 'Weather - Snow', 'Weather - Cloudy',
       'Weather - Fog/Smoke', 'Weather - Rain', 'Weather - Severe Wind',
       'Highest Injury Severity Alleged', 'SV Were All Passengers Belted?'],
      dtype='object')

In [6]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1362 entries, 0 to 1361
Data columns (total 20 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Make                             1359 non-null   object 
 1   Model                            1358 non-null   object 
 2   Model Year                       1348 non-null   float64
 3   Incident Date                    1358 non-null   object 
 4   Incident Time (24:00)            1352 non-null   object 
 5   City                             1356 non-null   object 
 6   State                            1359 non-null   object 
 7   Roadway Type                     1359 non-null   object 
 8   Roadway Surface                  1359 non-null   object 
 9   Roadway Description              1359 non-null   object 
 10  Posted Speed Limit (MPH)         1331 non-null   float64
 11  Lighting                         1359 non-null   object 
 12  Weather - Clear     

In [7]:
df2['Incident Date'] = pd.to_datetime(df2['Incident Date'], format = '%b-%Y')
df2['Incident Time (24:00)'] = pd.to_datetime(df2['Incident Time (24:00)'], format = '%H:%M')
df2['Model Year'] = df2['Model Year'].fillna(9999).astype(int)


# Code Recommendation was via Xpert

df2['Incident Date'] = df2['Incident Date'].dt.strftime('%B %Y')
df2['Incident Time (24:00)'] = df2['Incident Time (24:00)'].dt.strftime('%H:%M')


In [8]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1362 entries, 0 to 1361
Data columns (total 20 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Make                             1359 non-null   object 
 1   Model                            1358 non-null   object 
 2   Model Year                       1362 non-null   int32  
 3   Incident Date                    1358 non-null   object 
 4   Incident Time (24:00)            1352 non-null   object 
 5   City                             1356 non-null   object 
 6   State                            1359 non-null   object 
 7   Roadway Type                     1359 non-null   object 
 8   Roadway Surface                  1359 non-null   object 
 9   Roadway Description              1359 non-null   object 
 10  Posted Speed Limit (MPH)         1331 non-null   float64
 11  Lighting                         1359 non-null   object 
 12  Weather - Clear     

In [9]:
cols = ['Incident Date', 'Incident Time (24:00)','Make', 'Model','Model Year','City', 'State','Roadway Type', 'Roadway Surface',\
        'Roadway Description', 'Posted Speed Limit (MPH)', 'Lighting', 'Weather - Clear','Weather - Snow', 'Weather - Cloudy', 'Weather - Fog/Smoke',\
        'Weather - Rain', 'Weather - Severe Wind', 'Highest Injury Severity Alleged',\
        'SV Were All Passengers Belted?']

for col in cols:
  print(col)
  print(df2[col].nunique())
  print(df2[col].value_counts())
  print()

Incident Date
38
Incident Date
August 2023       100
September 2023     57
October 2023       56
March 2023         55
May 2024           55
July 2023          55
June 2024          50
August 2022        48
July 2024          48
June 2023          46
May 2023           43
April 2023         42
December 2022      41
March 2022         37
April 2022         37
June 2022          36
November 2021      34
July 2022          34
February 2023      33
February 2024      33
March 2024         33
May 2022           33
October 2021       32
September 2022     30
August 2021        29
January 2024       27
January 2022       26
April 2024         25
September 2021     25
July 2021          24
December 2023      24
February 2022      21
December 2021      19
November 2023      18
November 2022      16
October 2022       15
January 2023       12
August 2024         9
Name: count, dtype: int64

Incident Time (24:00)
549
Incident Time (24:00)
03:15    11
21:29    10
22:22    10
00:39    10
23:30     

In [10]:
df2['Incident Date'].value_counts()

Incident Date
August 2023       100
September 2023     57
October 2023       56
March 2023         55
May 2024           55
July 2023          55
June 2024          50
August 2022        48
July 2024          48
June 2023          46
May 2023           43
April 2023         42
December 2022      41
March 2022         37
April 2022         37
June 2022          36
November 2021      34
July 2022          34
February 2023      33
February 2024      33
March 2024         33
May 2022           33
October 2021       32
September 2022     30
August 2021        29
January 2024       27
January 2022       26
April 2024         25
September 2021     25
July 2021          24
December 2023      24
February 2022      21
December 2021      19
November 2023      18
November 2022      16
October 2022       15
January 2023       12
August 2024         9
Name: count, dtype: int64

In [11]:
df2['Incident Time (24:00)'].value_counts()

Incident Time (24:00)
03:15    11
21:29    10
22:22    10
00:39    10
23:30     9
         ..
20:26     1
12:03     1
11:20     1
07:38     1
09:50     1
Name: count, Length: 549, dtype: int64

In [12]:
df2['Make'].value_counts()

Make
Jaguar                     613
Cruise                     380
Toyota                     105
Ford                        50
Peterbilt                   37
Chrysler                    27
Hyundai                     26
Lexus                       17
Chevrolet                   11
Mercedes-Benz               10
Kenworth Motor Truck Co      8
Nissan                       8
Navya                        8
EZ10                         6
FREIGHTLINER                 5
Lincoln                      5
Volkswagen                   5
Lucid                        5
Local Motors                 4
International                3
Freightliner                 3
KIA                          2
EQS                          2
Jeep                         2
Polestar                     2
Ligier                       2
Kenworth                     2
Ligier Group                 1
INTERNATIONAL                1
NAVYA                        1
Crui9se                      1
LExus                        1
NIS

In [13]:
df2['Model'].value_counts()

Model
I-Pace          595
AV              378
Highlander       80
Escape           34
Pacifica         27
               ... 
Leaf SV Plus      1
111324            1
S 580 4Matic      1
ARMA Autonom      1
Olli              1
Name: count, Length: 67, dtype: int64

In [14]:
df2['Model Year'].value_counts()

Model Year
2021    568
2023    271
2020    135
2022    123
2024     97
2016     52
2017     48
2019     46
9999     14
2018      8
Name: count, dtype: int64

In [15]:
df2['City'].value_counts()

City
San Francisco       743
Phoenix             101
Tempe                80
Austin               75
Los Angeles          36
                   ... 
Foster City           1
Port Saint Lucie      1
Shenandoah            1
Rice                  1
Sunol                 1
Name: count, Length: 88, dtype: int64

In [16]:
df2['State'].value_counts()

State
CA     852
AZ     240
TX     130
FL      42
NV      35
MI      11
DC       8
NM       6
MN       6
CO       6
RI       5
IN       4
PA       3
OH       3
WA       3
WY       3
GA       1
VA       1
Name: count, dtype: int64

In [17]:
df2['Roadway Type'].value_counts()

Roadway Type
Intersection         592
Street               576
Highway / Freeway    107
Parking Lot           73
Traffic Circle         6
Unknown                5
Name: count, dtype: int64

In [18]:
df2['Roadway Surface'].value_counts()

Roadway Surface
Dry                   1284
Wet                     64
Unknown                  9
Snow / Slush / Ice       2
Name: count, dtype: int64

In [19]:
df2['Roadway Description'].value_counts()

Roadway Description
No Unusual Conditions          1285
Other, see Narrative             37
Work Zone                        19
Traffic Incident                  9
Unknown                           8
Missing / Degraded Markings       1
Name: count, dtype: int64

In [20]:
df2['Posted Speed Limit (MPH)'].value_counts()

Posted Speed Limit (MPH)
25.0    752
30.0    147
35.0    110
45.0     58
15.0     54
65.0     53
20.0     45
40.0     40
75.0     30
5.0      13
10.0      5
55.0      5
0.0       4
70.0      4
60.0      4
50.0      3
9.0       3
1.0       1
Name: count, dtype: int64

In [21]:
df2['Lighting'].value_counts()

Lighting
Daylight                   791
Dark - Lighted             488
Dawn / Dusk                 46
Dark - Not Lighted          26
Unknown                      5
Dark - Unknown Lighting      3
Name: count, dtype: int64

In [22]:
df2['Weather - Clear'].value_counts()

Weather - Clear
Y    1121
      241
Name: count, dtype: int64

In [23]:
df2['Weather - Snow'].value_counts()

Weather - Snow
     1361
Y       1
Name: count, dtype: int64

In [24]:
df2['Weather - Cloudy'].value_counts()

Weather - Cloudy
     1189
Y     173
Name: count, dtype: int64

In [25]:
df2['Weather - Fog/Smoke'].value_counts()

Weather - Fog/Smoke
     1362
Name: count, dtype: int64

In [26]:
df2['Weather - Rain'].value_counts()

Weather - Rain
     1317
Y      45
Name: count, dtype: int64

In [27]:
df2['Weather - Severe Wind'].value_counts()

Weather - Severe Wind
     1362
Name: count, dtype: int64

In [28]:
#df2['Weather - Other'].value_counts()

In [29]:
#df2['Weather - Other Text'].value_counts()

In [30]:
df2['Highest Injury Severity Alleged'].value_counts()

Highest Injury Severity Alleged
No Injuries Reported    1092
Minor                    133
Unknown                   74
Moderate                  37
Serious                   21
Fatality                   2
Name: count, dtype: int64

In [31]:
df2['SV Were All Passengers Belted?'].value_counts()

SV Were All Passengers Belted?
Yes                         876
No Passengers in Vehicle    442
No, see Narrative            30
Unknown                      11
Name: count, dtype: int64

In [32]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1362 entries, 0 to 1361
Data columns (total 20 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Make                             1359 non-null   object 
 1   Model                            1358 non-null   object 
 2   Model Year                       1362 non-null   int32  
 3   Incident Date                    1358 non-null   object 
 4   Incident Time (24:00)            1352 non-null   object 
 5   City                             1356 non-null   object 
 6   State                            1359 non-null   object 
 7   Roadway Type                     1359 non-null   object 
 8   Roadway Surface                  1359 non-null   object 
 9   Roadway Description              1359 non-null   object 
 10  Posted Speed Limit (MPH)         1331 non-null   float64
 11  Lighting                         1359 non-null   object 
 12  Weather - Clear     

In [33]:
df2.columns = df2.columns.str.replace('Weather - ', '', regex=False)

In [34]:
df2.columns

Index(['Make', 'Model', 'Model Year', 'Incident Date', 'Incident Time (24:00)',
       'City', 'State', 'Roadway Type', 'Roadway Surface',
       'Roadway Description', 'Posted Speed Limit (MPH)', 'Lighting', 'Clear',
       'Snow', 'Cloudy', 'Fog/Smoke', 'Rain', 'Severe Wind',
       'Highest Injury Severity Alleged', 'SV Were All Passengers Belted?'],
      dtype='object')

In [35]:
# df2 = ['Clear','Snow', 'Cloudy', 'Fog/Smoke', 'Rain', 'Severe Wind']
df2 = df2.replace({'Y': 1, ' ': 0})

In [36]:
df2.head()

Unnamed: 0,Make,Model,Model Year,Incident Date,Incident Time (24:00),City,State,Roadway Type,Roadway Surface,Roadway Description,Posted Speed Limit (MPH),Lighting,Clear,Snow,Cloudy,Fog/Smoke,Rain,Severe Wind,Highest Injury Severity Alleged,SV Were All Passengers Belted?
0,Jaguar,I-Pace,2021,January 2024,12:04,Los Angeles,CA,Street,Dry,No Unusual Conditions,25.0,Daylight,1,0,0,0,0,0,Unknown,"No, see Narrative"
1,Jaguar,I-Pace,2024,July 2024,04:58,San Francisco,CA,Street,Dry,No Unusual Conditions,25.0,Dark - Lighted,1,0,0,0,0,0,No Injuries Reported,No Passengers in Vehicle
2,Jaguar,I-Pace,2024,July 2024,11:29,Phoenix,AZ,Parking Lot,Dry,No Unusual Conditions,5.0,Daylight,1,0,0,0,0,0,No Injuries Reported,Yes
3,Jaguar,I-Pace,2024,July 2024,18:16,San Francisco,CA,Street,Dry,No Unusual Conditions,25.0,Daylight,1,0,0,0,0,0,No Injuries Reported,No Passengers in Vehicle
4,Jaguar,I-Pace,2024,July 2024,11:06,Austin,TX,Street,Dry,No Unusual Conditions,40.0,Daylight,0,0,1,0,0,0,No Injuries Reported,Yes


In [37]:
df2['Clear'].value_counts()

Clear
1    1121
0     241
Name: count, dtype: int64

In [38]:
df2['Snow'].value_counts()

Snow
0    1361
1       1
Name: count, dtype: int64

In [39]:
df2['Cloudy'].value_counts()

Cloudy
0    1189
1     173
Name: count, dtype: int64

In [40]:
df2['Fog/Smoke'].value_counts()

Fog/Smoke
0    1362
Name: count, dtype: int64

In [41]:
df2['Rain'].value_counts()

Rain
0    1317
1      45
Name: count, dtype: int64

In [42]:
df2['Severe Wind'].value_counts()

Severe Wind
0    1362
Name: count, dtype: int64

In [43]:
df2 = df2.rename(columns = {'Highest Injury Severity Alleged' : 'Injuries'})
df2['Injuries'].value_counts()

Injuries
No Injuries Reported    1092
Minor                    133
Unknown                   74
Moderate                  37
Serious                   21
Fatality                   2
Name: count, dtype: int64

In [44]:
df2 = df2.rename(columns = {'SV Were All Passengers Belted?': "Passengers Belted"})
df2['Passengers Belted'].value_counts()

Passengers Belted
Yes                         876
No Passengers in Vehicle    442
No, see Narrative            30
Unknown                      11
Name: count, dtype: int64

In [45]:
df_ml = df2.copy()

In [46]:
df_ml['Injuries'] = df_ml['Injuries'].fillna('No Injuries Reported').replace({
    'No Injuries Reported': 0,
    'Minor': 1,
    'Unknown': 1,
    'Moderate': 1,
    'Serious': 1,
    'Fatality': 1
}).astype(int)

In [47]:
df_ml['Injuries'].value_counts()

Injuries
0    1095
1     267
Name: count, dtype: int64

In [48]:
df_ml['Passengers Belted'] = df_ml['Passengers Belted'].fillna('Yes').replace({
    'Yes': 1,
    'No Passengers in Vehicle' : 0,
    'No, see Narrative': 0,
    'Unknown': 1
}).astype(int)

In [49]:
df_ml['Passengers Belted'].value_counts()

Passengers Belted
1    890
0    472
Name: count, dtype: int64

In [50]:
df_ml.head()

Unnamed: 0,Make,Model,Model Year,Incident Date,Incident Time (24:00),City,State,Roadway Type,Roadway Surface,Roadway Description,Posted Speed Limit (MPH),Lighting,Clear,Snow,Cloudy,Fog/Smoke,Rain,Severe Wind,Injuries,Passengers Belted
0,Jaguar,I-Pace,2021,January 2024,12:04,Los Angeles,CA,Street,Dry,No Unusual Conditions,25.0,Daylight,1,0,0,0,0,0,1,0
1,Jaguar,I-Pace,2024,July 2024,04:58,San Francisco,CA,Street,Dry,No Unusual Conditions,25.0,Dark - Lighted,1,0,0,0,0,0,0,0
2,Jaguar,I-Pace,2024,July 2024,11:29,Phoenix,AZ,Parking Lot,Dry,No Unusual Conditions,5.0,Daylight,1,0,0,0,0,0,0,1
3,Jaguar,I-Pace,2024,July 2024,18:16,San Francisco,CA,Street,Dry,No Unusual Conditions,25.0,Daylight,1,0,0,0,0,0,0,0
4,Jaguar,I-Pace,2024,July 2024,11:06,Austin,TX,Street,Dry,No Unusual Conditions,40.0,Daylight,0,0,1,0,0,0,0,1


In [51]:
df_ml.columns

Index(['Make', 'Model', 'Model Year', 'Incident Date', 'Incident Time (24:00)',
       'City', 'State', 'Roadway Type', 'Roadway Surface',
       'Roadway Description', 'Posted Speed Limit (MPH)', 'Lighting', 'Clear',
       'Snow', 'Cloudy', 'Fog/Smoke', 'Rain', 'Severe Wind', 'Injuries',
       'Passengers Belted'],
      dtype='object')

In [52]:
df_ml.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1362 entries, 0 to 1361
Data columns (total 20 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Make                      1359 non-null   object 
 1   Model                     1358 non-null   object 
 2   Model Year                1362 non-null   int32  
 3   Incident Date             1358 non-null   object 
 4   Incident Time (24:00)     1352 non-null   object 
 5   City                      1356 non-null   object 
 6   State                     1359 non-null   object 
 7   Roadway Type              1359 non-null   object 
 8   Roadway Surface           1359 non-null   object 
 9   Roadway Description       1359 non-null   object 
 10  Posted Speed Limit (MPH)  1331 non-null   float64
 11  Lighting                  1359 non-null   object 
 12  Clear                     1362 non-null   int64  
 13  Snow                      1362 non-null   int64  
 14  Cloudy  

In [53]:
df_ml.to_csv('crash_records_ml.csv', index = False)