In [1]:
import pandas as pd
import matplotlib as plt
import gmaps
import sys
sys.path.insert(0, 'Data')
from config import gkey

accident_data = pd.read_csv("Data/accident_data.csv")

In [2]:
# Data cleanup

# Filters out dates containing 2016, 2017, & 2019 -- too much data in dataset to handle with laptops
df = accident_data[~accident_data["Start_Time"].str.contains("2016")]
df = df[~df["Start_Time"].str.contains("2017")]
df = df[~df["Start_Time"].str.contains("2019")]

# Splits the Start_Time column into individual columns
df[["Date", "Time"]] = df["Start_Time"].str.split(expand=True)

# Renames columns to be more readable
df = df.rename(columns={"Start_Lat":"Lat", "Start_Lng":"Lng", "Weather_Condition":"Weather"})

# Filters and rearranges dataset to display most useful columns
df = df[["Date", "Time", "Lat", "Lng", "City", "State",
         "County", "Weather", "Temperature(F)", "Severity"]]

# Iterates through all column data to find NaN values and adds them to a list
# df.columns[df.isna().any()].tolist()
# Output: ['City', 'Weather', 'Temperature(F)']

df.head(10)

Unnamed: 0,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity
626043,2018-12-31,23:54:51,40.740047,-73.818512,Flushing,NY,Queens,Heavy Rain,46.9,3
626045,2018-12-31,22:40:11,40.83437,-73.864113,Bronx,NY,Bronx,Rain,46.4,3
626355,2018-12-31,12:35:54,38.441975,-88.953049,Dix,IL,Jefferson,Light Rain,57.0,3
626407,2018-12-31,22:32:27,30.241417,-97.726158,Austin,TX,Travis,Partly Cloudy,39.9,2
626462,2018-12-31,23:49:55,32.77306,-96.744247,Dallas,TX,Dallas,Clear,43.0,2
626463,2018-12-31,23:49:24,32.81842,-96.802391,Dallas,TX,Dallas,Clear,43.0,2
626563,2018-12-31,18:46:51,39.902153,-104.988914,Denver,CO,Adams,Snow,8.6,3
626574,2018-12-31,22:26:47,32.192139,-110.857933,Tucson,AZ,Pima,Light Rain,37.4,2
626599,2018-12-31,22:40:29,35.008537,-105.664352,Moriarty,NM,Torrance,Light Snow,27.9,3
626602,2018-12-31,22:37:23,35.423836,-108.312744,Continental Divide,NM,McKinley,Light Snow,25.0,3


In [3]:
weather_df = df[["Date", "Time", "Weather", "Severity"]]

weather_df = weather_df.reset_index()

weather_df

Unnamed: 0,index,Date,Time,Weather,Severity
0,626043,2018-12-31,23:54:51,Heavy Rain,3
1,626045,2018-12-31,22:40:11,Rain,3
2,626355,2018-12-31,12:35:54,Light Rain,3
3,626407,2018-12-31,22:32:27,Partly Cloudy,2
4,626462,2018-12-31,23:49:55,Clear,2
...,...,...,...,...,...
892620,2181999,2018-01-30,20:01:02,Clear,2
892621,2182000,2018-01-30,20:40:30,Clear,2
892622,2182001,2018-01-30,20:40:30,Clear,2
892623,2182002,2018-01-30,21:30:42,Partly Cloudy,2


In [39]:
# Renames most columns in weather data to lessen graph density

weather_df["Weather"] = weather_df["Weather"].replace({
    
    # Cloudy conditions
    "Mostly Cloudy":"Cloudy",
    "Scattered Clouds":"Cloudy",
    "Partly Cloudy":"Cloudy",
    
    # Rainy conditions
    "Light Rain":"Rain",
    "Heavy Rain":"Rain",
    "Light Drizzle":"Rain",
    "Light Thunderstorms and Rain":"Rain",
    "Heavy Thunderstorms and Rain":"Rain",
    "Heavy Rain Showers":"Rain",
    "Drizzle":"Rain",
    "Storm":"Rain",
    "Light Freezing Rain":"Rain",
    "Heavy Drizzle":"Rain",
    "Thunderstorms and Rain":"Rain",
    "Light Freezing Drizzle":"Rain",
    "Light Rain Showers":"Rain",
    "Light Thunderstorm":"Rain",
    "Heavy Freezing Rain":"Rain",
    "Heavy Freezing Drizzle":"Rain",
    "Squalls":"Rain",
    "Rain Showers":"Rain",
    "Thunderstorm":"Rain",
    
    # Foggy conditions
    "Patches of Fog":"Fog",
    "Shallow Fog":"Fog",
    "Mist":"Fog",
    "Light Freezing Fog":"Fog",
    "Haze":"Fog",
    "Light Haze":"Fog",
    
    # Snow conditions
    "Light Snow":"Snow",
    "Blowing Snow":"Snow",
    "Light Snow Showers":"Snow",
    "Heavy Snow":"Snow",
    "Light Snow Grains":"Snow",
    "Light Thunderstorms and Snow":"Snow",
    "Heavy Blowing Snow":"Snow",
    "Light Blowing Snow":"Snow",
    "Thunderstorms and Snow":"Snow",
    "Snow Showers":"Snow",
    
    # Other conditions
    "Hail":"Misc/Other",
    "Light Ice Pellets":"Misc/Other",
    "Small Hail":"Misc/Other",
    "Light Hail":"Misc/Other",
    "Heavy Thunderstorms with Small Hail":"Misc/Other",
    "Ice Pellets":"Misc/Other",
    "Smoke":"Misc/Other",
    "Heavy Smoke":"Misc/Other",
    "Volcanic Ash":"Misc/Other",
    "Widespread Dust":"Misc/Other",
    "Sand":"Misc/Other",
    "Funnel Cloud":"Misc/Other"
})

weather_df["Weather"].value_counts()

Clear         308073
Cloudy        287066
Overcast      156658
Rain           72261
Fog            20380
Snow           17283
Misc/Other      1732
Name: Weather, dtype: int64