In [1]:
import pandas as pd
from sklearn.cluster import DBSCAN
from pydeck.data_utils import assign_random_colors

FOIL_DATA = 'https://raw.githubusercontent.com/fivethirtyeight/uber-tlc-foil-response/master/uber-trip-data/uber-raw-data-jul14.csv'

df = pd.read_csv(FOIL_DATA)
df.head()

Unnamed: 0,Date/Time,Lat,Lon,Base
0,7/1/2014 0:03:00,40.7586,-73.9706,B02512
1,7/1/2014 0:05:00,40.7605,-73.9994,B02512
2,7/1/2014 0:06:00,40.732,-73.9999,B02512
3,7/1/2014 0:09:00,40.7635,-73.9793,B02512
4,7/1/2014 0:20:00,40.7204,-74.0047,B02512


In [2]:
df['ts'] = df['Date/Time'].apply(lambda d: pd.Timestamp.strptime(d, '%m/%d/%Y %H:%M:%S'))

In [9]:
df['position'] = df.apply(lambda row: [row['Lon'], row['Lat']], axis=1)

In [10]:
import pydeck

ORANGE_RGB = [255, 140, 0, 50]

# Gives us a scatterplot with all the specified attributes
scatterplot = pydeck.Layer(
    'ScatterplotLayer',
    data=df,
    radius=5,
    get_fill_color=ORANGE_RGB,
    get_position='position')

In [11]:
# Fits a viewport to the center 50% of the data
viewport = pydeck.data_utils.autocompute_viewport(df['position'], view_proportion=0.5)

In [12]:
# Actually configures the plot
r = pydeck.Deck(scatterplot, initial_view_state=viewport)

In [13]:
r.show()

DeckGLWidget(json_input='{"initialViewState": {"bearing": 0, "latitude": 40.73914102805288, "longitude": -73.9…

In [14]:
# Runs a DBSCAN clustering algorithm on the geospatial data
db = DBSCAN(eps=0.00001, min_samples=60).fit(list(df.position))
df['labels'] = db.labels_
centroids = df[df['labels'] != -1].groupby('labels').mean().reset_index()
# Gives each point a random color
colors_lookup = assign_random_colors(df['labels'])
colors_lookup["-1"] = [0, 0, 0, 0]  # make -1 (no group) transparent
df['color'] = df['labels'].apply(lambda g: colors_lookup[str(g)])

# Plot the new data above
centroids_plot = pydeck.Layer(
    'ScatterplotLayer',
    data=df,
    radius=30,
    stroked=False,
    filled=True,
    get_fill_color='color',
    get_position='position')
r.layers[0] = centroids_plot
r.update()