In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
import gmaps
import os
import json
import sys
sys.path.insert(0, 'Data')
from config import g_key

#Store Part I results into DataFrame
#Load the data to a DataFrame
accident_data = pd.read_csv("Data/accident_data.csv", encoding="utf-8")
accident_data.head()

Unnamed: 0,ID,Source,TMC,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,MapQuest,201.0,3,2016-02-08 05:46:00,2016-02-08 11:00:00,39.865147,-84.058723,,,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,MapQuest,201.0,2,2016-02-08 06:07:59,2016-02-08 06:37:59,39.928059,-82.831184,,,...,False,False,False,False,False,False,Night,Night,Night,Day
2,A-3,MapQuest,201.0,2,2016-02-08 06:49:27,2016-02-08 07:19:27,39.063148,-84.032608,,,...,False,False,False,False,True,False,Night,Night,Day,Day
3,A-4,MapQuest,201.0,3,2016-02-08 07:23:34,2016-02-08 07:53:34,39.747753,-84.205582,,,...,False,False,False,False,False,False,Night,Day,Day,Day
4,A-5,MapQuest,201.0,2,2016-02-08 07:39:07,2016-02-08 08:09:07,39.627781,-84.188354,,,...,False,False,False,False,True,False,Day,Day,Day,Day


In [4]:
accident_data.columns

Index(['ID', 'Source', 'TMC', 'Severity', 'Start_Time', 'End_Time',
       'Start_Lat', 'Start_Lng', 'End_Lat', 'End_Lng', 'Distance(mi)',
       'Description', 'Number', 'Street', 'Side', 'City', 'County', 'State',
       'Zipcode', 'Country', 'Timezone', 'Airport_Code', 'Weather_Timestamp',
       'Temperature(F)', 'Wind_Chill(F)', 'Humidity(%)', 'Pressure(in)',
       'Visibility(mi)', 'Wind_Direction', 'Wind_Speed(mph)',
       'Precipitation(in)', 'Weather_Condition', 'Amenity', 'Bump', 'Crossing',
       'Give_Way', 'Junction', 'No_Exit', 'Railway', 'Roundabout', 'Station',
       'Stop', 'Traffic_Calming', 'Traffic_Signal', 'Turning_Loop',
       'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight',
       'Astronomical_Twilight'],
      dtype='object')

In [7]:
accident_data_revised = accident_data.drop(['Source', 'TMC', 'End_Time', 'End_Lat', 'End_Lng', 'Distance(mi)',
                                           'Country', 'Number', 'Street', 'Side','Timezone','Wind_Chill(F)', 
                                           'Humidity(%)', 'Pressure(in)', 'Wind_Direction', 'Amenity', 'Bump', 
                                           'Crossing', 'Give_Way', 'Junction', 'No_Exit', 'Railway', 'Roundabout', 
                                           'Station', 'Stop', 'Traffic_Calming', 'Traffic_Signal', 'Turning_Loop',
                                           'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight', 'Astronomical_Twilight'], axis=1)
accident_data_revised.columns

Index(['ID', 'Severity', 'Start_Time', 'Start_Lat', 'Start_Lng', 'Description',
       'City', 'County', 'State', 'Zipcode', 'Airport_Code',
       'Weather_Timestamp', 'Temperature(F)', 'Visibility(mi)',
       'Wind_Speed(mph)', 'Precipitation(in)', 'Weather_Condition'],
      dtype='object')

In [9]:
# Data cleanup

# Filters out dates containing 2016, 2017, & 2019 -- too much data in dataset to handle with laptops
accident_data_revised = accident_data_revised[~accident_data_revised["Start_Time"].str.contains("2016")]
accident_data_revised = accident_data_revised[~accident_data_revised["Start_Time"].str.contains("2017")]
accident_data_revised = accident_data_revised[~accident_data_revised["Start_Time"].str.contains("2019")]

# Splits the Start_Time column into individual columns
accident_data_revised[["Date", "Time"]] = accident_data_revised["Start_Time"].str.split(expand=True)

# Renames columns to be more readable
accident_data_revised = accident_data_revised.rename(columns={"Start_Lat":"Lat", "Start_Lng":"Lng", "Weather_Condition":"Weather"})

# Filters and rearranges dataset to display most useful columns
#df = df[["Date", "Time", "Lat", "Lng", "City", "State", "County", "Weather", "Temperature(F)", "Severity"]]

# Iterates through all column data to find NaN values and adds them to a list
accident_data_revised.columns[accident_data_revised.isna().any()].tolist()
# Output: ['City', 'Weather', 'Temperature(F)']
accident_data_revised.head(10)

Unnamed: 0,ID,Severity,Start_Time,Lat,Lng,Description,City,County,State,Zipcode,Airport_Code,Weather_Timestamp,Temperature(F),Visibility(mi),Wind_Speed(mph),Precipitation(in),Weather,Date,Time
626043,A-626044,3,2018-12-31 23:54:51,40.740047,-73.818512,Right & center lane blocked due to accident on...,Flushing,Queens,NY,11367,KLGA,2018-12-31 23:51:00,46.9,5.0,6.9,0.16,Heavy Rain,2018-12-31,23:54:51
626045,A-626046,3,2018-12-31 22:40:11,40.83437,-73.864113,Two lanes blocked due to accident on I-95 Cros...,Bronx,Bronx,NY,10460,KLGA,2018-12-31 22:46:00,46.4,5.0,10.4,0.15,Rain,2018-12-31,22:40:11
626355,A-626356,3,2018-12-31 12:35:54,38.441975,-88.953049,Right lane blocked due to accident on I-57 Nor...,Dix,Jefferson,IL,62830,KMVN,2018-12-31 12:56:00,57.0,10.0,6.9,0.04,Light Rain,2018-12-31,12:35:54
626407,A-626408,2,2018-12-31 22:32:27,30.241417,-97.726158,Accident on Elmont Dr at Tinnin Ford Rd.,Austin,Travis,TX,78741-3038,KAUS,2018-12-31 22:53:00,39.9,9.0,,,Partly Cloudy,2018-12-31,22:32:27
626462,A-626463,2,2018-12-31 23:49:55,32.77306,-96.744247,Accident on Spring Ave at Gay St.,Dallas,Dallas,TX,75210-1653,KDAL,2018-12-31 23:53:00,43.0,10.0,5.8,,Clear,2018-12-31,23:49:55
626463,A-626464,2,2018-12-31 23:49:24,32.81842,-96.802391,Accident on Newton Ct at TX-289 Oak Lawn Ave.,Dallas,Dallas,TX,75219,KDAL,2018-12-31 23:53:00,43.0,10.0,5.8,,Clear,2018-12-31,23:49:24
626563,A-626564,3,2018-12-31 18:46:51,39.902153,-104.988914,Right lane blocked due to accident on I-25 Sou...,Denver,Adams,CO,80233,KBJC,2018-12-31 18:58:00,8.6,1.5,8.1,,Snow,2018-12-31,18:46:51
626574,A-626575,2,2018-12-31 22:26:47,32.192139,-110.857933,Accident on Golf Links Rd at Wilmot Rd.,Tucson,Pima,AZ,85730,KDMA,2018-12-31 22:48:00,37.4,10.0,10.4,,Light Rain,2018-12-31,22:26:47
626599,A-626600,3,2018-12-31 22:40:29,35.008537,-105.664352,Queueing traffic due to jackknifed truck on I-...,Moriarty,Torrance,NM,87035,K0E0,2018-12-31 22:35:00,27.9,4.0,26.5,,Light Snow,2018-12-31,22:40:29
626602,A-626603,3,2018-12-31 22:37:23,35.423836,-108.312744,Slow traffic due to accident on I-40 both ways...,Continental Divide,McKinley,NM,87312,KGUP,2018-12-31 22:32:00,25.0,0.8,,0.02,Light Snow,2018-12-31,22:37:23


In [None]:

#Humidity Heatmap
#Configure gmaps.
gmaps.configure(api_key=g_key)
#Use the Lat and Lng as locations and Humidity as the weight.
locations = weather_data[["Latitude", "Longitude"]]
#Drop any rows will null values
humidity = weather_data["Humidity (%)"].astype(float)
maxhumidity = humidity.max()

fig = gmaps.figure()

#Add Heatmap layer to map.
heatmap_layer = gmaps.heatmap_layer(locations, weights=humidity,
                                    dissipating=False, max_intensity=maxhumidity,
                                    point_radius=2)
fig.add_layer(heatmap_layer)
fig

In [None]:
#Create new DataFrame for NC hotspots
#Narrow down the cities to fit weather conditions.

ideal_weather_df = pd.DataFrame(weather_data, columns = ["City", "Country", "Latitude", "Longitude", 
                                                         "Temperature (F)", "Wind Speed (mph)", "Cloudiness (%)"])

temperature = (weather_data["Temperature (F)"] <= 80) & (ideal_weather_df["Temperature (F)"] > 70)
wind_speed = weather_data["Wind Speed (mph)"] < 10
cloudiness = weather_data["Cloudiness (%)"] == 0

ideal_weather_df = ideal_weather_df[temperature & wind_speed & cloudiness]
ideal_weather_df