In [3]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
import folium
from folium.plugins import FastMarkerCluster

In [5]:
datum = "EPSG:4326"
long = -79.2241
lat = 43.4305
earth_circumference = 40074 #km
lat_radians = lat * (np.pi/180) #conversion of degrees to radians
long_km = 360 / (earth_circumference * np.cos(lat_radians)) 
lat_km = 1/110.574 
n=10
shift = 2 
portion = 1


In [3]:
#Constant function of '1s' at every spatial location. Homogeneous.
def ones_2d(size=n):
    return np.ones((n,n))

# Random normally distributed values, can set the mean and standard deviation. Can be scaled with the parameter A.
def normal_dist_2d(mean=0,sd=1,size=n,A=1):
    return A*np.random.normal(mean,sd, size =(n,n))

# https://en.wikipedia.org/wiki/Gaussian_function#Two-dimensional_Gaussian_function  
def gaus2d(x=0, y=0, mx=long, my=lat, sx=0.009, sy=0.009,shift_x=0.01,shift_y=0.00):
    # Different amplitude parameters, wikipedia uses A=1, however the chosen amplitude is usually selected
      # for gaussian normalization (integral = 1)
    A = 1. / (2 * np.pi*sx * sy)
    
    return A * np.exp(-((x - mx - shift_x)**2. / (2. * sx**2.) + (y - my - shift_y)**2. / (2. * sy**2.)))


# Canadian Cities
https://simplemaps.com/data/canada-cities

Data updated as of August 26, 2020

Only includes cities with >1000 individuals.

In [4]:
canadian_cities = pd.read_csv(r"C:\Users\jreye\Desktop\NS_DATA\simplemaps_canadacities_basic\canadacities.csv")
len(canadian_cities)

1738

In [5]:
geometry = [Point(xy) for xy in zip(canadian_cities["lng"],canadian_cities["lat"])]
gdf = gpd.GeoDataFrame(canadian_cities,crs=datum,geometry=geometry)

In [6]:
gdf.province_id.unique()

array(['ON', 'QC', 'BC', 'AB', 'MB', 'NS', 'SK', 'NL', 'NB', 'PE', 'YT',
       'NT', 'NU'], dtype=object)

In [7]:
d = {}
for city in gdf.province_id.unique():
    d["city_{}".format(city)] = gdf.loc[gdf["province_id"]==city]

In [28]:
provinces = list(d.keys())
provinces

['city_ON',
 'city_QC',
 'city_BC',
 'city_AB',
 'city_MB',
 'city_NS',
 'city_SK',
 'city_NL',
 'city_NB',
 'city_PE',
 'city_YT',
 'city_NT',
 'city_NU']

In [9]:
d.get("city_ON")

Unnamed: 0,city,city_ascii,province_id,province_name,lat,lng,population,density,timezone,ranking,postal,id,geometry
0,Toronto,Toronto,ON,Ontario,43.7417,-79.3733,5429524.0,4334.4,America/Toronto,1,M5T M5V M5P M5S M5R M5E M5G M5A M5C M5B M5M M5...,1124279679,POINT (-79.37330 43.74170)
5,Ottawa,Ottawa,ON,Ontario,45.4247,-75.6950,989567.0,334.0,America/Montreal,1,K4P K4M K4A K4B K4C K7S K1S K1R K1P K1W K1V K1...,1124399363,POINT (-75.69500 45.42470)
6,Mississauga,Mississauga,ON,Ontario,43.6000,-79.6500,721599.0,2467.6,America/Toronto,2,L4W L4V L4T L4Z L4Y L4X L5R L5V L5W L5A L5B L5...,1124112672,POINT (-79.65000 43.60000)
9,Hamilton,Hamilton,ON,Ontario,43.2567,-79.8692,693645.0,480.6,America/Toronto,2,L0R L0P L8W L8V L8T L8S L8R L8P L8G L8E L8N L8...,1124567288,POINT (-79.86920 43.25670)
10,Brampton,Brampton,ON,Ontario,43.6833,-79.7667,593638.0,2228.7,America/Toronto,2,L7A L6T L6W L6V L6P L6S L6R L6Y L6X L6Z,1124625989,POINT (-79.76670 43.68330)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1699,Laird,Laird,ON,Ontario,46.3833,-84.0667,1047.0,10.2,America/Detroit,4,P0S,1124000662,POINT (-84.06670 46.38330)
1715,Plantagenet,Plantagenet,ON,Ontario,45.5321,-74.9956,1027.0,260.5,America/Toronto,3,K0B,1124496473,POINT (-74.99560 45.53210)
1727,Papineau-Cameron,Papineau-Cameron,ON,Ontario,46.3000,-78.7333,1016.0,1.8,America/Toronto,4,P0H,1124000867,POINT (-78.73330 46.30000)
1728,Assiginack,Assiginack,ON,Ontario,45.7333,-81.8000,1013.0,4.5,America/Toronto,3,P0P,1124000091,POINT (-81.80000 45.73330)


In [10]:
#mean for ontario
mean_lat_on = np.mean(d["city_ON"]["lat"])
mean_lng_on = np.mean(d["city_ON"]["lng"])

#mean for canada
gdf_mean_lat = np.mean(gdf.lat)
gdf_mean_lng = np.mean(gdf.lng)

In [11]:
my_map = folium.Map(location=[gdf_mean_lat,gdf_mean_lng], zoom_start=5)
#folium.GeoJson(data = gdf).add_to(my_map)
my_map.add_child(FastMarkerCluster(gdf[["lat","lng"]].values.tolist()))
my_map

In [13]:
my_map = folium.Map(location=[mean_lat_on,mean_lng_on], zoom_start=5)

folium.GeoJson(data = d["city_ON"],popup=folium.GeoJsonPopup(fields=["city",
                                                                     "province_id",
                                                                     "lat",
                                                                     "lng",
                                                                     "population",
                                                                     "density"])).add_to(my_map)

folium.GeoJson(data = d["city_NS"],popup=folium.GeoJsonPopup(fields=["city",
                                                                     "province_id",
                                                                     "lat",
                                                                     "lng",
                                                                     "population",
                                                                     "density"])).add_to(my_map)

my_map


# Clustering using DBSCAN

https://en.wikipedia.org/wiki/DBSCAN

https://en.wikipedia.org/wiki/Cluster_analysis#Density-based_clustering

https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html

https://github.com/gboeing/2014-summer-travels/blob/master/clustering-scikitlearn.ipynb

https://geoffboeing.com/2014/08/clustering-to-reduce-spatial-data-set-size/

https://stackoverflow.com/questions/34579213/dbscan-for-clustering-of-geographic-location-data

https://scikit-learn.org/stable/auto_examples/calibration/plot_calibration.html#sphx-glr-auto-examples-calibration-plot-calibration-py

https://www.scikit-yb.org/en/latest/api/cluster/elbow.html#:~:text=The%20elbow%20method%20runs%20k,point%20to%20its%20assigned%20center.

In [3]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
import folium
from folium.plugins import FastMarkerCluster
from sklearn.cluster import DBSCAN
from geopy.distance import great_circle
from shapely.geometry import MultiPoint
datum = "EPSG:4326"

In [362]:
data_path = r"C:\Users\jua12849\Documents\GitHub\GeospatialDataAnalysis\canadacities.csv"


canadian_cities = pd.read_csv(data_path)

#create geodataframe containing data with all canadian cities and a point geometry column
geometry = [Point(xy) for xy in zip(canadian_cities["lng"],canadian_cities["lat"])]
gdf = gpd.GeoDataFrame(canadian_cities,crs=datum,geometry=geometry)

In [363]:
#gdf

In [364]:
#create dictionary with complete data for each province
d = {}
for city in gdf.province_id.unique():
    d["city_{}".format(city)] = gdf.loc[gdf["province_id"]==city]

#obtain province names as well as list of dictionary keys.
provinces = list(d.keys())

#obtain lat/long data for each province and the entire country as a numpy array.
d_lat_lon_numpy = {}
for province in provinces:
    d_lat_lon_numpy["{}".format(province)] = [d.get(province)[["lat","lng"]].to_numpy()]

d_lat_lon_numpy["Canada"] = [gdf[["lat","lng"]].to_numpy()]


# DBSCAN

In [365]:
kms_per_radian = 6371.0088
#epsilon = 50 / kms_per_radian
epsilon = 0.01
min_samples = 5

#perform DBSCAN algorithm to each province separately as well as the entire country
for province in list(d_lat_lon_numpy.keys()):
    #Create DBSCAN object and apply to each latitude/longitude pair
    d_lat_lon_numpy["{}".format(province)].append(
        {"dbs_{}".format(province):DBSCAN(eps=epsilon, min_samples=min_samples,algorithm = 'ball_tree',metric='haversine').fit(np.radians(d_lat_lon_numpy.get(province)[0]))})
    #Retrieve labels obtained from algorithm
    d_lat_lon_numpy["{}".format(province)].append(
        {"{}_cluster_label".format(province):d_lat_lon_numpy.get(province)[1]["dbs_{}".format(province)].labels_})
    #Obtain number of clusters
    d_lat_lon_numpy["{}".format(province)].append(
        {"{}_num_clusters".format(province):len(set(d_lat_lon_numpy.get(province)[2]["{}_cluster_label".format(province)]))})
    #
    d_lat_lon_numpy["{}".format(province)].append(
        {"{}_clusters".format(province):
        pd.Series(d_lat_lon_numpy.get(province)[0][d_lat_lon_numpy.get(province)[2]["{}_cluster_label".format(province)] == n] for n in range(d_lat_lon_numpy["{}".format(province)][3]["{}_num_clusters".format(province)]))})

    
    final_clusters = d_lat_lon_numpy.get(province)[4].get("{}_clusters".format(province))
    
    if len(final_clusters.iloc[-1]) == 0:
        print(province)
        print(final_clusters.iloc[-1])
        final_clusters.drop(final_clusters.tail(1).index,inplace=True) 
        
    




city_ON
[]
city_QC
[]
city_BC
[]
city_AB
[]
city_MB
[]
city_NS
[]
city_SK
[]
city_NL
[]
city_YT
[]
city_NT
[]
city_NU
[]
Canada
[]


In [366]:

for province in list(d_lat_lon_numpy.keys()):
    final_clusters = d_lat_lon_numpy.get(province)[4].get("{}_clusters".format(province))
    print(len(final_clusters))
   
    
    if len(final_clusters) == 0:
        print("entering if condition settind dbscan min samples to 1")
        print(province)
        
        print("erasing previous dictionary")
        del d_lat_lon_numpy["{}".format(province)]
        
        print("recreating initial array")
        d_lat_lon_numpy["{}".format(province)] = [d.get(province)[["lat","lng"]].to_numpy()]

        
        #Create DBSCAN object and apply to each latitude/longitude pair
        d_lat_lon_numpy["{}".format(province)].append(
            {"dbs_{}".format(province):DBSCAN(eps=epsilon, min_samples=1,algorithm = 'ball_tree',metric='haversine').fit(np.radians(d_lat_lon_numpy.get(province)[0]))})
        #Retrieve labels obtained from algorithm
        d_lat_lon_numpy["{}".format(province)].append(
            {"{}_cluster_label".format(province):d_lat_lon_numpy.get(province)[1]["dbs_{}".format(province)].labels_})
        #Obtain number of clusters
        d_lat_lon_numpy["{}".format(province)].append(
            {"{}_num_clusters".format(province):len(set(d_lat_lon_numpy.get(province)[2]["{}_cluster_label".format(province)]))})
        #
        d_lat_lon_numpy["{}".format(province)].append(
            {"{}_clusters".format(province):
            pd.Series(d_lat_lon_numpy.get(province)[0][d_lat_lon_numpy.get(province)[2]["{}_cluster_label".format(province)] == n] for n in range(d_lat_lon_numpy["{}".format(province)][3]["{}_num_clusters".format(province)]))})

4
2
3
2
1
1
6
4
1
1
0
entering if condition settind dbscan min samples to 1
city_YT
erasing previous dictionary


KeyError: 'city_YT'

In [349]:
prov = "YT"
d_lat_lon_numpy.get("city_{}".format(prov))[4].get("city_{}_clusters".format(prov))

Series([], dtype: object)

## Finding the point in each cluster closest to the centroid

The objective is to determine a representative point from each cluster. 
The lat/long coordinates of the cluster's centroid is located with shapely (where lat is x and lon is y). Then we proceed to find the member of the cluster with the smallest great circle distance to the centroid.

Special note: DBSCAN clusters may be non-convex, and thus the representative point may lie outside of the cluster.

In [355]:
def get_centermost_point(cluster):
    
    centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y)
    centermost_point = min(cluster, key=lambda point: great_circle(point, centroid).m)
    return tuple(centermost_point)

for province in list(d_lat_lon_numpy.keys()):

    final_clusters = d_lat_lon_numpy.get(province)[4].get("{}_clusters".format(province))

    if len(final_clusters) > 0 :


        d_lat_lon_numpy["{}".format(province)].append(
            {"{}_centermost_points".format(province):d_lat_lon_numpy.get(province)[4]["{}_clusters".format(province)].map(get_centermost_point)})

        #unzip the list of centermost points (lat,lon) tuples into separate lat/lon lists
        lats, lons = zip(*d_lat_lon_numpy.get(province)[5]["{}_centermost_points".format(province)])
        #create a pandas dataframe
        rep_points = pd.DataFrame({'lon':lons, 'lat':lats})

        d_lat_lon_numpy["{}".format(province)].append({"{}_centermost_points_numpy".format(province) : rep_points.to_numpy()})


        d_lat_lon_numpy["{}".format(province)].append(
            {"{}_gdf_cluster_samples".format(province):gpd.GeoDataFrame(rep_points, geometry=gpd.points_from_xy(rep_points.lon, rep_points.lat),crs = "EPSG:4326" )})

    


In [359]:
d_lat_lon_numpy.keys()

dict_keys(['city_ON', 'city_QC', 'city_BC', 'city_AB', 'city_MB', 'city_NS', 'city_SK', 'city_NL', 'city_NB', 'city_PE', 'city_YT', 'city_NT', 'city_NU', 'Canada'])

In [128]:
#mean for ontario
mean_lat_on = np.mean(d["city_ON"]["lat"])
mean_lng_on = np.mean(d["city_ON"]["lng"])

#mean for canada
gdf_mean_lat = np.mean(gdf.lat)
gdf_mean_lng = np.mean(gdf.lng)

In [148]:
clusters = {}
cities = {}

for province in provinces:
    clusters["{}".format(province)] = d_lat_lon_numpy.get("{}".format(province))[6].get("{}_centermost_points_numpy".format(province))

    cities["{}".format(province)] = d_lat_lon_numpy.get("{}".format(province))[0]
    

In [149]:
study = np.concatenate([cities["city_ON"],
                            cities["city_QC"],
                            cities["city_NB"],
                            cities["city_NS"]])

study_clusters = np.concatenate([clusters["city_ON"],
                            clusters["city_QC"],
                            clusters["city_NB"],
                            clusters["city_NS"]])


my_map = folium.Map(location=[gdf_mean_lat,gdf_mean_lng], zoom_start=5)


for point in study_clusters :
    loc = [point[1],point[0]]
    folium.Marker(location=loc,icon=folium.Icon(color="red")).add_to(my_map)
    #folium.Circle(radius=40000,location=[point[1],point[0]],color="red").add_to(my_map)

for point in study :
    loc = [point[0],point[1]]
    #folium.Marker(location=loc,icon=folium.Icon(color="blue")).add_to(my_map)
    folium.Circle(radius=4000,location=loc,color="BLUE").add_to(my_map)
    
#folium.GeoJson(data = gdf).add_to(my_map)
    

my_map

# All Together

In [16]:
from sklearn.cluster import DBSCAN
from geopy.distance import great_circle
from shapely.geometry import MultiPoint

canadian_cities = pd.read_csv(r"c:\\Users\\jua12849\\Documents\\GitHub\\GeospatialDataAnalysis\\canadacities.csv")

#create geodataframe containing data with all canadian cities and a point geometry column
geometry = [Point(xy) for xy in zip(canadian_cities["lng"],canadian_cities["lat"])]
gdf = gpd.GeoDataFrame(canadian_cities,crs=datum,geometry=geometry)


#create dictionary with complete data for each province
d = {}
for city in gdf.province_id.unique():
    d["city_{}".format(city)] = gdf.loc[gdf["province_id"]==city]

#obtain province names as well as list of dictionary keys.
provinces = list(d.keys())

#obtain lat/long data for each province and the entire country as a numpy array.
d_lat_lon_numpy = {}
for province in provinces:
    d_lat_lon_numpy["{}".format(province)] = [d.get(province)[["lat","lng"]].to_numpy()]

d_lat_lon_numpy["Canada"] = [gdf[["lat","lng"]].to_numpy()]

kms_per_radian = 6371.0088
#epsilon = 50 / kms_per_radian

#epsilons = [0.009,0.01,0.015,0.019]

epsilon = 0.19

#perform DBSCAN algorithm to each province separately as well as the entire country
for province in list(d_lat_lon_numpy.keys()):
    #Create DBSCAN object and apply to each latitude/longitude pair
    d_lat_lon_numpy["{}".format(province)].append(
        {"dbs_{}".format(province):DBSCAN(eps=epsilon, min_samples=1,algorithm = 'ball_tree',metric='haversine').fit(np.radians(d_lat_lon_numpy.get(province)[0]))})
    #Retrieve labels obtained from algorithm
    d_lat_lon_numpy["{}".format(province)].append(
        {"{}_cluster_label".format(province):d_lat_lon_numpy.get(province)[1]["dbs_{}".format(province)].labels_})
    #Obtain cluster labels
    d_lat_lon_numpy["{}".format(province)].append(
        {"{}_num_clusters".format(province):len(set(d_lat_lon_numpy.get(province)[2]["{}_cluster_label".format(province)]))})
    #
    d_lat_lon_numpy["{}".format(province)].append(
        {"{}_clusters".format(province):
        pd.Series(d_lat_lon_numpy.get(province)[0][d_lat_lon_numpy.get(province)[2]["{}_cluster_label".format(province)] == n] for n in range(d_lat_lon_numpy["{}".format(province)][3]["{}_num_clusters".format(province)]))})
    

def get_centermost_point(cluster):
    centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y)
    centermost_point = min(cluster, key=lambda point: great_circle(point, centroid).m)
    return tuple(centermost_point)

for province in list(d_lat_lon_numpy.keys()):

    d_lat_lon_numpy["{}".format(province)].append(
        {"{}_centermost_points".format(province):d_lat_lon_numpy.get(province)[4]["{}_clusters".format(province)].map(get_centermost_point)})

    #unzip the list of centermost points (lat,lon) tuples into separate lat/lon lists
    lats, lons = zip(*d_lat_lon_numpy.get(province)[5]["{}_centermost_points".format(province)])
    #create a pandas dataframe
    rep_points = pd.DataFrame({'lon':lons, 'lat':lats})

    d_lat_lon_numpy["{}".format(province)].append({"{}_centermost_points_numpy".format(province) : rep_points.to_numpy()})


    d_lat_lon_numpy["{}".format(province)].append(
        {"{}_gdf_cluster_samples".format(province):gpd.GeoDataFrame(rep_points, geometry=gpd.points_from_xy(rep_points.lon, rep_points.lat),crs = "EPSG:4326" )})

    

#mean for ontario
mean_lat_on = np.mean(d["city_ON"]["lat"])
mean_lng_on = np.mean(d["city_ON"]["lng"])

#mean for canada
gdf_mean_lat = np.mean(gdf.lat)
gdf_mean_lng = np.mean(gdf.lng)

clusters = {}
cities = {}

for province in provinces:
    clusters["{}".format(province)] = d_lat_lon_numpy.get("{}".format(province))[6].get("{}_centermost_points_numpy".format(province))

    cities["{}".format(province)] = d_lat_lon_numpy.get("{}".format(province))[0]





study = np.concatenate([cities["city_ON"],
                            cities["city_QC"],
                            cities["city_NB"],
                            cities["city_NS"]])

study_clusters = np.concatenate([clusters["city_ON"],
                            clusters["city_QC"],
                            clusters["city_NB"],
                            clusters["city_NS"]])




my_map = folium.Map(location=[gdf_mean_lat,gdf_mean_lng], zoom_start=5)


for point in study_clusters :
    loc = [point[1],point[0]]
    folium.Marker(location=loc,icon=folium.Icon(color="red")).add_to(my_map)
    #folium.Circle(radius=40000,location=[point[1],point[0]],color="red").add_to(my_map)

for point in study :
    loc = [point[0],point[1]]
    #folium.Marker(location=loc,icon=folium.Icon(color="blue")).add_to(my_map)
    folium.Circle(radius=4000,location=loc,color="BLUE").add_to(my_map)
    
#folium.GeoJson(data = gdf).add_to(my_map)
    

my_map   


In [17]:
from sklearn.cluster import DBSCAN
from geopy.distance import great_circle
from shapely.geometry import MultiPoint

canadian_cities = pd.read_csv(r"c:\\Users\\jua12849\\Documents\\GitHub\\GeospatialDataAnalysis\\canadacities.csv")

#create geodataframe containing data with all canadian cities and a point geometry column
geometry = [Point(xy) for xy in zip(canadian_cities["lng"],canadian_cities["lat"])]
gdf = gpd.GeoDataFrame(canadian_cities,crs=datum,geometry=geometry)


#create dictionary with complete data for each province
d = {}
for city in gdf.province_id.unique():
    d["city_{}".format(city)] = gdf.loc[gdf["province_id"]==city]

#obtain province names as well as list of dictionary keys.
provinces = list(d.keys())

#obtain lat/long data for each province and the entire country as a numpy array.
d_lat_lon_numpy = {}
for province in provinces:
    d_lat_lon_numpy["{}".format(province)] = [d.get(province)[["lat","lng"]].to_numpy()]

d_lat_lon_numpy["Canada"] = [gdf[["lat","lng"]].to_numpy()]

kms_per_radian = 6371.0088
#epsilon = 50 / kms_per_radian

epsilons = [0.009,0.01,0.015,0.019,0.19]

maps = {}


#mean for ontario
mean_lat_on = np.mean(d["city_ON"]["lat"])
mean_lng_on = np.mean(d["city_ON"]["lng"])

#mean for canada
gdf_mean_lat = np.mean(gdf.lat)
gdf_mean_lng = np.mean(gdf.lng)

def get_centermost_point(cluster):
    centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y)
    centermost_point = min(cluster, key=lambda point: great_circle(point, centroid).m)
    return tuple(centermost_point)

for epsilon in epsilons:
    print(epsilon)
        
    #perform DBSCAN algorithm to each province separately as well as the entire country
    for province in list(d_lat_lon_numpy.keys()):
        #Create DBSCAN object and apply to each latitude/longitude pair
        d_lat_lon_numpy["{}".format(province)].append(
            {"dbs_{}".format(province):DBSCAN(eps=epsilon, min_samples=1,algorithm = 'ball_tree',metric='haversine').fit(np.radians(d_lat_lon_numpy.get(province)[0]))})
        #Retrieve labels obtained from algorithm
        d_lat_lon_numpy["{}".format(province)].append(
            {"{}_cluster_label".format(province):d_lat_lon_numpy.get(province)[1]["dbs_{}".format(province)].labels_})
        #Obtain cluster labels
        d_lat_lon_numpy["{}".format(province)].append(
            {"{}_num_clusters".format(province):len(set(d_lat_lon_numpy.get(province)[2]["{}_cluster_label".format(province)]))})
        #
        d_lat_lon_numpy["{}".format(province)].append(
            {"{}_clusters".format(province):
            pd.Series(d_lat_lon_numpy.get(province)[0][d_lat_lon_numpy.get(province)[2]["{}_cluster_label".format(province)] == n] for n in range(d_lat_lon_numpy["{}".format(province)][3]["{}_num_clusters".format(province)]))})

        d_lat_lon_numpy["{}".format(province)].append(
            {"{}_centermost_points".format(province):d_lat_lon_numpy.get(province)[4]["{}_clusters".format(province)].map(get_centermost_point)})

        #unzip the list of centermost points (lat,lon) tuples into separate lat/lon lists
        lats, lons = zip(*d_lat_lon_numpy.get(province)[5]["{}_centermost_points".format(province)])
        #create a pandas dataframe
        rep_points = pd.DataFrame({'lon':lons, 'lat':lats})

        d_lat_lon_numpy["{}".format(province)].append({"{}_centermost_points_numpy".format(province) : rep_points.to_numpy()})


        d_lat_lon_numpy["{}".format(province)].append(
            {"{}_gdf_cluster_samples".format(province):gpd.GeoDataFrame(rep_points, geometry=gpd.points_from_xy(rep_points.lon, rep_points.lat),crs = "EPSG:4326" )})

    

    clusters = {}
    cities = {}

    for province in provinces:
        clusters["{}".format(province)] = d_lat_lon_numpy.get("{}".format(province))[6].get("{}_centermost_points_numpy".format(province))

        cities["{}".format(province)] = d_lat_lon_numpy.get("{}".format(province))[0]





    study = np.concatenate([cities["city_ON"],
                                cities["city_QC"],
                                cities["city_NB"],
                                cities["city_NS"]])

    study_clusters = np.concatenate([clusters["city_ON"],
                                clusters["city_QC"],
                                clusters["city_NB"],
                                clusters["city_NS"]])


    
    maps["map_{}".format(str(epsilon).split(".")[1])] = folium.Map(location=[gdf_mean_lat,gdf_mean_lng], zoom_start=5) 


    


    for point in study_clusters :
        loc = [point[1],point[0]]
        folium.Marker(location=loc,icon=folium.Icon(color="red")).add_to(maps["map_{}".format(str(epsilon).split(".")[1])])
        #folium.Circle(radius=40000,location=[point[1],point[0]],color="red").add_to(my_map)

    for point in study :
        loc = [point[0],point[1]]
        #folium.Marker(location=loc,icon=folium.Icon(color="blue")).add_to(my_map)
        folium.Circle(radius=4000,location=loc,color="BLUE").add_to(maps["map_{}".format(str(epsilon).split(".")[1])])
        
    #folium.GeoJson(data = gdf).add_to(my_map)
        

#my_map   


0.009
0.01
0.015
0.019
0.19


In [18]:
maps

{'map_009': <folium.folium.Map at 0x1fc3415e748>,
 'map_01': <folium.folium.Map at 0x1fc31dd9708>,
 'map_015': <folium.folium.Map at 0x1fc31e583c8>,
 'map_019': <folium.folium.Map at 0x1fc34da5408>,
 'map_19': <folium.folium.Map at 0x1fc34f50648>}

In [20]:
maps["map_009"]

In [12]:
maps["map_{}".format(str(epsilon).split(".")[1])]

In [184]:
maps.keys()

dict_keys(['map_009', 'map_01', 'map_015', 'map_019'])

In [186]:
maps["map_009"]

In [176]:
maps["map_009"]

In [167]:
maps["map_019"]