In [None]:
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import MinMaxScaler
import folium
from sklearn.pipeline import Pipeline
import numpy as np

In this notebook, we'll be working with the Nashville traffic accidents data. For simplicity, we'll only look at crashes that happened in February of 2020.

In [None]:
crashes = pd.read_csv('data/Traffic_Accidents.csv')

crashes['Date and Time'] = pd.to_datetime(crashes['Date and Time'], format = '%m/%d/%Y %I:%M:%S %p')

crashes = crashes[(crashes['Date and Time'].dt.year == 2020) & 
                  (crashes['Date and Time'].dt.month == 2)].sort_values('Date and Time')

crashes = crashes.dropna(subset = ['Latitude', 'Longitude'])

Since we're doing calculations involving latitude and longitude, let's use distance in miles.

In [None]:
def haversine(x, y):
    lon1, lat1, lon2, lat2 = map(np.radians, [x[0], x[1], y[0], y[1]])
    

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    mi = 3959 * c
    return mi

**Goal:** Identify traffic hotspots. We want to find groups of a large number of crashes that happened a small distance from each other. Try and identify 2 or 3 such areas.

Hint: You can use the haversine function above as your distance metric in the DSCAN

In [None]:
variables = ['Longitude', 'Latitude']

X = crashes[variables]

dbscan = DBSCAN(# Fill this in)

dbscan.fit(X)

In [None]:
pd.Series(dbscan.labels_).value_counts()

After finding good values for these parameters, you can use the following code to map out your findings.

In [None]:
crashes = crashes.assign(cluster = dbscan.labels_)

cluster_num = 0

crashes_sub = crashes[crashes['cluster'] == cluster_num][['Latitude', 'Longitude', 'cluster']]

# The format for a point in folium is: [lat, lng]

nash_map = folium.Map(location = [36.1612, -86.7775], zoom_start = 12)

for idx, (lat, lng, cluster), in crashes_sub.iterrows():
    point = [lat, lng]
    folium.Marker(location = point).add_to(nash_map)

nash_map