In [1]:
# note: The map only shows in jupyter notebook
# The code is adapted from Zuo Yifan
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import folium       # python3 -m pip install folium
from cluster import cluster_mean

def map_scatter_plot(data_file):
    # read file
    data = pd.read_csv(data_file, index_col=0)

    # create map centered at mean lat and lon of the data
    data_map = folium.Map(location=[49.121383503296705, -122.67246901153845], zoom_start=10)

    # add every data point to the map
    for index, row in data.iterrows():
        folium.Circle(
            radius=10,
            location=[row['lat'], row['lon']],
            color='blue',
        ).add_to(data_map)
        
    return data_map

In [2]:
chain_map = map_scatter_plot("data/chain.csv")
chain_map

In [None]:
all_resturant_map = 

In [3]:
non_chain_map = map_scatter_plot("data/non_chain.csv")
non_chain_map

In [4]:

from sklearn.cluster import KMeans

In [5]:
def initial_map(data_file, num_cluster):
    # read data file
    data = pd.read_csv(data_file, index_col=0)
    
    
    X = np.stack([data['lat'], data['lon']], axis=1)
    model = KMeans(n_clusters=num_cluster)
    y = model.fit_predict(X)
    predict = model.fit(X)
    
    cluster_data = pd.DataFrame(X)
    cluster_data.columns = ["var1", "var2"]
    cluster_data["cluster"] = y
    cluster_data = cluster_data.sort_values("cluster")
    
    
    data_map = folium.Map(location=[49.121383503296705, -122.67246901153845], zoom_start=10)
    
    return [data_map, data, y,cluster_data]

In [6]:
# add every data point to the map
color_map = {0:'red', 1:'blue', 2:'yellow', 3:'purple', 4:'black', 5:'pink', 6:'orange', 7:'green', 8:'grey',\
             9:'cyan', 10:'white', 11:'brown', 12:'olive', 13:'magenta', 14:'maroon'}

In [7]:
def cluster_map(data_file, num_cluster):
    result = initial_map(data_file, num_cluster)
    data_map = result[0]
    data = result[1]
    y = result[2]
    cluster_data = result[3]
    for index, row in data.iterrows():
        folium.Circle(
            radius=10,
            location=[row['lat'], row['lon']],
            color=color_map[y[index]],
        ).add_to(data_map)
        
    return data_map,cluster_data

In [8]:
chain_data_map,chain_cluster_data = cluster_map('data/chain.csv', 8)
chain_data_map,chain_cluster_data = cluster_map('data/chain.csv', 8)
non_chain_data_map, nonchain_cluster_data = cluster_map('data/non_chain.csv', 8)

In [9]:
chain_cluster_data.to_csv('cl_data.csv')
nonchain_cluster_data.to_csv('ncl_data.csv')

In [10]:
def add_cluster_center(data_file, num_cluster, data_map,marker_color):    
    lat_lon_mean_pair = cluster_mean(data_file, num_cluster)
    
    for i in range(num_cluster):
        folium.Marker(
            location=list(lat_lon_mean_pair[i]),
            popup=str(lat_lon_mean_pair[i]),
            icon=folium.Icon(color=marker_color)
        ).add_to(data_map)
        
    return [data_map, lat_lon_mean_pair]

In [11]:
data_map, lat_lon_mean_pair = add_cluster_center('data/chain.csv', 8, chain_data_map,'blue')
non_chain, lat_lon_mean_pair = add_cluster_center('data/non_chain.csv', 8, data_map,'orange')

In [12]:

non_chain

In [13]:
print(sorted(lat_lon_mean_pair, key=lambda x: x[1]))

[(49.166558134812284, -123.12974774505106), (49.27706458452654, -123.12953859226342), (49.2537348407143, -123.03722671547618), (49.20332119897961, -122.91811396326528), (49.228819914024385, -122.80929047195119), (49.0609830317647, -122.79492782352942), (49.14802447565792, -122.64594162039478), (49.06340949043478, -122.32523004956519)]
