In [1]:
# note: The map only shows in jupyter notebook

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import folium       # python3 -m pip install folium


def map_scatter_plot(data_file):
    # read file
    data = pd.read_csv(data_file, index_col=0)

    # create map centered at mean lat and lon of the data
    data_map = folium.Map(location=[49.121383503296705, -122.67246901153845], zoom_start=10)

    # add every data point to the map
    for index, row in data.iterrows():
        folium.Circle(
            radius=10,
            location=[row['lat'], row['lon']],
            color='blue',
        ).add_to(data_map)
        
    return data_map

In [2]:
data_map = map_scatter_plot("data/transportation.csv")
data_map

In [3]:
from data_cleaning import remove_outlier
from sklearn.cluster import KMeans

In [4]:
def initial_map(data_file, num_cluster):
    # read data file
    data = pd.read_csv(data_file, index_col=0)
    
    data = remove_outlier(data)
    
    X = np.stack([data['lat'], data['lon']], axis=1)
    model = KMeans(n_clusters=num_cluster)
    y = model.fit_predict(X)
    
    data_map = folium.Map(location=[49.121383503296705, -122.67246901153845], zoom_start=10)
    
    return [data_map, data, y]

In [5]:
# add every data point to the map
color_map = {0:'red', 1:'blue', 2:'yellow', 3:'purple', 4:'black', 5:'pink', 6:'orange', 7:'green', 8:'grey',\
             9:'cyan', 10:'white', 11:'brown', 12:'olive', 13:'magenta', 14:'maroon'}

In [6]:
def cluster_map(data_file, num_cluster):
    result = initial_map(data_file, num_cluster)
    data_map = result[0]
    data = result[1]
    y = result[2]
    
    for index, row in data.iterrows():
        folium.Circle(
            radius=10,
            location=[row['lat'], row['lon']],
            color=color_map[y[index]],
        ).add_to(data_map)
        
    return data_map

In [7]:
data_map = cluster_map('data/transportation.csv', 11)
data_map

In [8]:
from cluster import cluster_mean

In [9]:
def add_cluster_center(data_file, num_cluster, data_map):    
    lat_lon_mean_pair = cluster_mean(data_file, num_cluster)
    
    for i in range(num_cluster):
        folium.Marker(
            location=list(lat_lon_mean_pair[i]),
            popup=str(lat_lon_mean_pair[i]),
            icon=folium.Icon(color='blue')
        ).add_to(data_map)
        
    return [data_map, lat_lon_mean_pair]

In [10]:
data_map, lat_lon_mean_pair = add_cluster_center('data/transportation.csv', 11, data_map)
data_map

In [11]:
print(sorted(lat_lon_mean_pair, key=lambda x: x[1]))

[(49.27091210169494, -123.13897853898307), (49.18125266153846, -123.12323573846153), (49.27542107127658, -123.11200180319149), (49.27115377105264, -123.07571150526319), (49.230208600000005, -123.06494229166667), (49.245139304000006, -122.99644034799998), (49.198399676923074, -122.88977942307693), (49.259409158333334, -122.80102973333334), (49.105273825, -122.7920249875), (49.13833570714286, -122.66761861428574), (49.21367978888888, -122.62506896666667)]
