In [19]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import folium
from geopy.distance import great_circle
from sklearn.cluster import DBSCAN as dbscan
import math
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
        
df = pd.read_csv('accidents 2014.csv', usecols=['Latitude','Longitude','Number_of_Vehicles',
                                                                   'Time','Local_Authority_(Highway)','Year'])

df = df[df['Year'] == 2014] # Focus on accidents that took place in 2014
City = df[df['Local_Authority_(Highway)'] == 'E09000001'] # Investigate City and Westminster boroughs
Westminster = df[df['Local_Authority_(Highway)'] == 'E09000033']

df = pd.concat([City, Westminster], axis = 0)
df['Time'] = pd.to_datetime(df['Time'], format = '%H:%M').dt.hour # convert time to the nearest hour, we shall make use of this later

In [20]:
def greatcircle(x,y):
    lat1, long1 = x[0], x[1]
    lat2, long2 = y[0], y[1]
    dist = great_circle((lat1,long1),(lat2,long2)).meters
    return dist

In [35]:
eps = 75 #distance in meters
min_samples = 8

df_dbc = df

loc = df_dbc[['Latitude','Longitude']]

dbc = dbscan(eps = eps, min_samples = min_samples, metric=greatcircle).fit(loc)

labels = dbc.labels_
unique_labels = np.unique(dbc.labels_)

print(unique_labels)

df_dbc['Cluster'] = labels

[-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22
 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
 47 48 49 50]


In [36]:
location = df_dbc['Latitude'].mean(), df_dbc['Longitude'].mean()

m = folium.Map(location=location,zoom_start=13)

folium.TileLayer('cartodbpositron').add_to(m)

clust_colours = ['#B061FF','#61ffff','#808080','#050505','#707B7C','#F4D03F','#fdbf6f','#1F618D','#cab2d6','#6a3d9a','#ffff99','#b15928']

for i in range(0,len(df_dbc)):
    colouridx = df_dbc['Cluster'].iloc[i]
    if colouridx == -1:
        pass
    else:
        col = clust_colours[colouridx%len(clust_colours)]
        folium.CircleMarker([df_dbc['Latitude'].iloc[i],df_dbc['Longitude'].iloc[i]], radius = 10, color = col, fill = col).add_to(m)

m