In [1]:
# Base dependancies:
import pandas as pd
import numpy as np
from collections import defaultdict

In [2]:
# Make an empty DefaultDict
# fill dictionary {year: dataframe}
# build hierarchal dataframe using dictionary

# Example of data location
# https://raw.githubusercontent.com/SethDKelly/NiceRideMN/master/Nice_Ride_data/2010/NiceRide_station_2010.csv

station_dict = defaultdict()
ridership_dict = defaultdict()

for year in [2010 + x for x in range(8)] :
    station_dict[year] = pd.read_csv("https://raw.githubusercontent.com/SethDKelly/NiceRideMN/master/Nice_Ride_data/" \
                             +str(year)+"/NiceRide_station_"+str(year)+".csv")

    ridership_dict[year] = pd.read_csv("https://raw.githubusercontent.com/SethDKelly/NiceRideMN/master/Nice_Ride_data/" \
                             +str(year)+"/NiceRide_trip_history_"+str(year)+".csv")

NR_station = pd.concat(station_dict)
NR_ridership = pd.concat(ridership_dict)

In [3]:
NR_station.head()

Unnamed: 0,Unnamed: 1,Terminal,Station,Latitude,Longitude,Ndocks
2010,0,30000,100 Main Street SE,44.984892,-93.256551,23
2010,1,30001,25th Street & 33rd. Ave. S.,44.95734,-93.22374,15
2010,2,30002,Augsburg College,44.96622,-93.2384,21
2010,3,30003,Plymouth Ave N. & N. Oliver Ave,44.991412,-93.306569,23
2010,4,30004,11th Street & Hennepin,44.97534,-93.27869,23


In [4]:
# for model building
from sklearn.cluster import KMeans

# for optimizations
from sklearn.decomposition import PCA

In [7]:
NR_station['Cluster'] = int(0)

for year in [2010 + x for x in range(8)] :
    
    distance_matrix = pd.DataFrame() # Reinstantiate the matrix DataFrame so size row-col reset
    
    # Read in the distance matrix that has been already computed
    distance_matrix = pd.read_csv("https://raw.githubusercontent.com/SethDKelly/NiceRideMN/master/Nice_Ride_data/" \
                               +str(year)+"/distance_matrix_"+str(year)+".csv",index_col=0)
    # Optimize distance matrix to two primary x-y components
    
    pca = PCA(n_components=2).fit_transform(distance_matrix)
    NR_station.loc[year, 'Cluster'] =  KMeans(n_clusters=6, n_init=200).fit(pca).labels_

In [8]:
NR_station

Unnamed: 0,Unnamed: 1,Terminal,Station,Latitude,Longitude,Ndocks,Cluster
2010,0,30000,100 Main Street SE,44.984892,-93.256551,23,5
2010,1,30001,25th Street & 33rd. Ave. S.,44.957340,-93.223740,15,1
2010,2,30002,Augsburg College,44.966220,-93.238400,21,1
2010,3,30003,Plymouth Ave N. & N. Oliver Ave,44.991412,-93.306569,23,2
2010,4,30004,11th Street & Hennepin,44.975340,-93.278690,23,4
2010,5,30005,Hennepin & 5th Street NE,44.988890,-93.253920,15,5
2010,6,30006,YWCA Downtown,44.972217,-93.276350,19,4
2010,7,30007,9th Street & 4th Ave.,44.972650,-93.268720,15,0
2010,8,30008,Midtown Exchange,44.949130,-93.261240,19,2
2010,9,30009,4th Street & 13th Ave. SE,44.981020,-93.237460,15,1


In [9]:
# Perform necessary imports for Bokeh plotting
from bokeh.io import output_file, show
from bokeh.plotting import figure

# Import the models modules
from bokeh.models import ColumnDataSource, HoverTool, Slider

# Import the layout modules
from bokeh.layouts import widgetbox, row

# To build graph as an application
from bokeh.io import curdoc

# For cluster coloring
from bokeh.models import ColorMapper
from bokeh.palettes import Spectral6

In [15]:
sorted(NR_station.loc[2017,:].Cluster.unique())
#NR_station.loc[:,:].Longitude.values.max()

#                            ('ID','@terminal')
#                            ('Name', '@station')
#                            

[0, 1, 2, 3, 4, 5]

In [20]:
# Make the ColumnDataSource: source
source = ColumnDataSource(data={
    'x'        : NR_station.loc[2017,:].Longitude,
    'y'        : NR_station.loc[2017,:].Latitude,
    'terminal' : NR_station.loc[2017,:].Terminal,
    'station'  : NR_station.loc[2017,:].Station,
    'cluster'  : NR_station.loc[2017,:].Cluster
    })


# Setting boundaries for x-y axis ranges
xmin, xmax = (NR_station.loc[:,:].Longitude.values.min() - .01) , (NR_station.loc[:,:].Longitude.values.max() + .01)
ymin, ymax = (NR_station.loc[:,:].Latitude.values.min() - .01) , (NR_station.loc[:,:].Latitude.values.max() + .01)


# Create the figure: plot
plot = figure(plot_height=600, plot_width=900,
              x_range = (xmin, xmax), y_range = (ymin, ymax))

# Creating hovertool tip
hover = HoverTool(tooltips=[('Terminal', '@terminal'), 
                           ('Name', '@station'),
                           ('Location', '($x, $y)'),
                           ('Cluster', '@cluster')])

# Set the legend.location attribute of the plot to 'top_right'
plot.legend.location = 'top_right'

# adding hovertool to plot
plot.add_tools(hover)


# Add the color mapper to the circle glyph
plot.circle(x='x', y='y', source=source,
            fill_alpha=0.8, legend='cluster'
           )

# Output the file and show the figure
output_file('niceride.html')
show(plot)

