In [1]:
# note: The map only shows in jupyter notebook

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import folium       # python3 -m pip install folium


def map_scatter_plot(data_file):
    # read file
    data = pd.read_csv(data_file, index_col=0)

    # create map centered at mean lat and lon of the data
    data_map = folium.Map(location=[49.121383503296705, -122.67246901153845], zoom_start=10)

    # add every data point to the map
    for index, row in data.iterrows():
        folium.Circle(
            radius=10,
            location=[row['lat'], row['lon']],
            color='yellow',
        ).add_to(data_map)
        
    return data_map

In [2]:
data_map = map_scatter_plot("data/shopping_banking.csv")
data_map

In [3]:
from data_cleaning import remove_outlier
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import DBSCAN
from sklearn.cluster import MeanShift, estimate_bandwidth

In [4]:
def initial_map(data_file, num_cluster):
    # read data file
    data = pd.read_csv(data_file, index_col=0)
    
    data = remove_outlier(data)
    
    X = np.stack([data['lat'], data['lon']], axis=1)
    model = KMeans(n_clusters=num_cluster)
    y = model.fit_predict(X)
    
    data_map = folium.Map(location=[49.121383503296705, -122.67246901153845], zoom_start=10)
    
    return [data_map, data, y]

In [5]:
# add every data point to the map
color_map = {0:'red', 1:'blue', 2:'yellow', 3:'purple', 4:'black', 5:'pink', 6:'orange', 7:'green', 8:'grey',\
             9:'cyan', 10:'white', 11:'brown', 12:'olive', 13:'magenta'}

In [6]:
def cluster_map(data_file, num_cluster):
    result = initial_map(data_file, num_cluster)
    data_map = result[0]
    data = result[1]
    y = result[2]
    
    for index, row in data.iterrows():
        folium.Circle(
            radius=10,
            location=[row['lat'], row['lon']],
            color=color_map[y[index]],
        ).add_to(data_map)
        
    return data_map

In [7]:
data_map = cluster_map('data/food.csv', 14)
data_map

In [8]:
from cluster import cluster_mean

In [9]:
def add_cluster_center(data_file, num_cluster, data_map):    
    lat_lon_mean_pair = cluster_mean(data_file, num_cluster)
    
    for i in range(num_cluster):
        folium.Marker(
            location=list(lat_lon_mean_pair[i]),
            popup=str(lat_lon_mean_pair[i]),
            icon=folium.Icon(color='blue')
        ).add_to(data_map)
        
    return [data_map, lat_lon_mean_pair]

In [10]:
data_map, lat_lon_mean_pair = add_cluster_center('data/food.csv', 14, data_map)
data_map

In [11]:
print(lat_lon_mean_pair)

[(49.17455710948509, -123.13631768401073), (49.031977672897185, -122.79527506261684), (49.05671173710405, -122.30996404796375), (49.267392640145985, -123.07711180602173), (49.27287841181102, -122.79403062598426), (49.226614716842114, -122.91078672894731), (49.208452029885045, -122.61788959999998), (49.121383503296705, -122.67246901153845), (49.280122759207, -123.12465214387653), (49.171731204347836, -122.84109296376816), (49.060514907407416, -123.08047512777782), (49.25891726142132, -123.19262496243651), (49.073248204166674, -122.49008110416666), (49.24960178422713, -123.00023782744486)]
