# K-Means Clustering of Municipalities Based on Venue Categories in Miami-Dade County


*The Capstone Project of IBM Professional Certificate Courses at Coursera*

**Kangrui Lu**

**Libraries imported**

In [1]:
import requests
import pandas as pd
import numpy as np 
import random 
import json
import time
import matplotlib.cm as cm
import matplotlib.colors as colors

from IPython.display import Image 
from IPython.core.display import HTML 
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium
#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim
print('Folium installed')
print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                       

**List of municipality in Miami-Dade**

In [2]:
!wget -q -O 'miami_data.json' https://opendata.arcgis.com/datasets/b671f3bc435d40928bf22b5f44410600_0.geojson

In [3]:
with open('miami_data.json') as json_data:
    miami_data = json.load(json_data)

In [4]:
miami_municipality = miami_data['features']

Construct a dataframe for miami municipalities

In [5]:
column_names = ['MuniID','Municipality'] 
df_MIA = pd.DataFrame(columns=column_names)

In [6]:
for data in miami_municipality:
    MuniID = Municipality = data['properties']['MUNICID'] 
    Municipality = data['properties']['NAME']
    
    df_MIA = df_MIA.append({'MuniID': MuniID,'Municipality': Municipality}, ignore_index=True)
    
df_MIA.head()

Unnamed: 0,MuniID,Municipality
0,5,MIAMI SPRINGS
1,23,NORTH BAY VILLAGE
2,32,MIAMI LAKES
3,24,KEY BISCAYNE
4,26,VIRGINIA GARDENS


**Foursquare Credentials**

This section of code was later removed for privacy reason.

**Get coordinates for those municipalities**

Use the code below in a loop to generate coordinates for every municipalities, and insert into the df_MIA. Note there are several NaN in latitudes been generated. Modify the dataframe to populate these rows with coordinates generated from 'geocode'.

In [None]:
geolocator = Nominatim(user_agent="foursquare_agent")
for x in range(0,len(df_MIA)):
    location = geolocator.geocode(df_MIA['Municipality'][x])
    time.sleep(2)
    df_MIA.at[x, 'lat']=location.latitude
    df_MIA.at[x, 'lon']=location.longitude

In [10]:
df_MIA = df_MIA.drop([df_MIA.index[30],df_MIA.index[31],df_MIA.index[32],df_MIA.index[33],df_MIA.index[34]])

In [11]:
#address = 'GOLDEN BEACH, FL'
#geolocator = Nominatim(user_agent="foursquare_agent")
#location = geolocator.geocode(address)
#latitude = location.latitude
#longitude = location.longitude
#print(latitude, longitude)

row_30 = {'MuniID':'18', 'Municipality':'EL PORTAL', 'lat':25.8553739, 'lon':-80.193103}
row_31 = {'MuniID':'15', 'Municipality':'WEST MIAMI', 'lat':25.7632181, 'lon':-80.2958939}
row_32 = {'MuniID':'3', 'Municipality':'CORAL GABLES', 'lat':25.72149, 'lon':-80.2683838}
row_33 = {'MuniID':'19', 'Municipality':'GOLDEN BEACH', 'lat':25.9660825, 'lon':-80.12097619912299}
df_MIA = df_MIA.append([row_30,row_31,row_32,row_33], ignore_index=True)

In [13]:
print(df_MIA.shape)
df_MIA.head()

(34, 4)


Unnamed: 0,MuniID,Municipality,lat,lon
0,5,MIAMI SPRINGS,25.821848,-80.292708
1,23,NORTH BAY VILLAGE,25.846207,-80.153935
2,32,MIAMI LAKES,25.911621,-80.321275
3,24,KEY BISCAYNE,25.696835,-80.163526
4,26,VIRGINIA GARDENS,25.810376,-80.302273


**Explore venues in these municipalities**

In [14]:
def getNearbyVenues(names, latitudes, longitudes, radius=5000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius,
            LIMIT)

        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Municipality', 
                  'Muni_Lat', 
                  'Muni_Lon', 
                  'Venue', 
                  'Venue_Lat', 
                  'Venue_Lon', 
                  'Venue_Category']
    
    return(nearby_venues)

In [15]:
miami_venues = getNearbyVenues(names=df_MIA['Municipality'],
                                   latitudes=df_MIA['lat'],
                                   longitudes=df_MIA['lon']
                                  )

MIAMI SPRINGS
NORTH BAY VILLAGE
MIAMI LAKES
KEY BISCAYNE
VIRGINIA GARDENS
MIAMI BEACH
NORTH MIAMI BEACH
AVENTURA
SOUTH MIAMI
HIALEAH
OPA-LOCKA
HIALEAH GARDENS
BISCAYNE PARK
SURFSIDE
DORAL
PINECREST
INDIAN CREEK VILLAGE
SUNNY ISLES BEACH
MIAMI
SWEETWATER
MIAMI GARDENS
HOMESTEAD
BAY HARBOR ISLANDS
FLORIDA CITY
CUTLER BAY
MIAMI SHORES
BAL HARBOUR
MEDLEY
PALMETTO BAY
NORTH MIAMI
EL PORTAL
WEST MIAMI
CORAL GABLES
GOLDEN BEACH


In [16]:
print(miami_venues.shape)
miami_venues.head()

(3225, 7)


Unnamed: 0,Municipality,Muni_Lat,Muni_Lon,Venue,Venue_Lat,Venue_Lon,Venue_Category
0,MIAMI SPRINGS,25.821848,-80.292708,My Little Greek Deli,25.821241,-80.284097,Greek Restaurant
1,MIAMI SPRINGS,25.821848,-80.292708,Bella Bakery,25.825418,-80.286681,Bakery
2,MIAMI SPRINGS,25.821848,-80.292708,La Fresa Francesa,25.824762,-80.28253,French Restaurant
3,MIAMI SPRINGS,25.821848,-80.292708,Siamo,25.824442,-80.285277,Pizza Place
4,MIAMI SPRINGS,25.821848,-80.292708,Harvest Moon Gourmet Bistro,25.821145,-80.282357,Salad Place


In [17]:
miami_venues.groupby('Municipality').count()

Unnamed: 0_level_0,Muni_Lat,Muni_Lon,Venue,Venue_Lat,Venue_Lon,Venue_Category
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AVENTURA,100,100,100,100,100,100
BAL HARBOUR,82,82,82,82,82,82
BAY HARBOR ISLANDS,100,100,100,100,100,100
BISCAYNE PARK,100,100,100,100,100,100
CORAL GABLES,100,100,100,100,100,100
CUTLER BAY,100,100,100,100,100,100
DORAL,100,100,100,100,100,100
EL PORTAL,100,100,100,100,100,100
FLORIDA CITY,95,95,95,95,95,95
GOLDEN BEACH,100,100,100,100,100,100


Frequency of occurrence of each category by municipality

In [18]:
miami_onehot = pd.get_dummies(miami_venues[['Venue_Category']], prefix="", prefix_sep="")
miami_onehot['Municipality'] = miami_venues['Municipality'] 
fixed_columns = [miami_onehot.columns[-1]] + list(miami_onehot.columns[:-1])
miami_onehot = miami_onehot[fixed_columns]
miami_grouped = miami_onehot.groupby('Municipality').mean().reset_index()
miami_grouped.head()

Unnamed: 0,Municipality,Airport,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,...,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,AVENTURA,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0
1,BAL HARBOUR,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0
2,BAY HARBOR ISLANDS,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0
3,BISCAYNE PARK,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0
4,CORAL GABLES,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.01,0.0,0.0,0.01,0.02,0.01,0.0,0.0


Top 10 venues of each municipality

In [19]:
def most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Municipality']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

miamimuni_venues_sorted = pd.DataFrame(columns=columns)
miamimuni_venues_sorted['Municipality'] = miami_grouped['Municipality']

for ind in np.arange(miami_grouped.shape[0]):
    miamimuni_venues_sorted.iloc[ind, 1:] = most_common_venues(miami_grouped.iloc[ind, :], num_top_venues)

miamimuni_venues_sorted.head()

Unnamed: 0,Municipality,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,AVENTURA,Clothing Store,Italian Restaurant,Grocery Store,Cosmetics Shop,Ice Cream Shop,Coffee Shop,American Restaurant,Gym,Department Store,Juice Bar
1,BAL HARBOUR,Café,Hotel,Beach,Resort,Coffee Shop,Shopping Mall,Supermarket,Italian Restaurant,Sushi Restaurant,Bar
2,BAY HARBOR ISLANDS,Beach,Italian Restaurant,Grocery Store,Coffee Shop,Park,Hotel,Burger Joint,Resort,Peruvian Restaurant,Spa
3,BISCAYNE PARK,Italian Restaurant,Grocery Store,Pizza Place,Coffee Shop,Park,Burger Joint,Middle Eastern Restaurant,Pet Store,Deli / Bodega,Bakery
4,CORAL GABLES,Italian Restaurant,American Restaurant,Gym / Fitness Center,Cuban Restaurant,Movie Theater,Ice Cream Shop,Bakery,Bookstore,Golf Course,Park


**K-Mean Clustering**

In [20]:
kclusters = 5

miami_grouped_clustering = miami_grouped.drop('Municipality', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(miami_grouped_clustering)
kmeans.labels_[0:10] 
miamimuni_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
miami_merged = df_MIA
miami_merged = miami_merged.join(miamimuni_venues_sorted.set_index('Municipality'), on='Municipality')

miami_merged.head()

Unnamed: 0,MuniID,Municipality,lat,lon,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,5,MIAMI SPRINGS,25.821848,-80.292708,2,Cuban Restaurant,Hotel,Grocery Store,Coffee Shop,Mexican Restaurant,American Restaurant,Food Truck,Café,Latin American Restaurant,Pizza Place
1,23,NORTH BAY VILLAGE,25.846207,-80.153935,3,Beach,Italian Restaurant,Park,Golf Course,Burger Joint,American Restaurant,Japanese Restaurant,Pizza Place,Brazilian Restaurant,Bistro
2,32,MIAMI LAKES,25.911621,-80.321275,2,Coffee Shop,Pharmacy,Bakery,Cuban Restaurant,Ice Cream Shop,Mexican Restaurant,Grocery Store,Pizza Place,Sandwich Place,Wings Joint
3,24,KEY BISCAYNE,25.696835,-80.163526,3,Aquarium,Beach,Park,Italian Restaurant,Argentinian Restaurant,Restaurant,Harbor / Marina,Moving Target,Tennis Stadium,Boat or Ferry
4,26,VIRGINIA GARDENS,25.810376,-80.302273,2,Hotel,Cuban Restaurant,South American Restaurant,Coffee Shop,Arepa Restaurant,Mexican Restaurant,Restaurant,Sushi Restaurant,Pizza Place,Convenience Store


Examine the clusters

In [21]:
cluster1 = miami_merged.loc[miami_merged['Cluster Labels'] == 0, miami_merged.columns[[1] + list(range(5, miami_merged.shape[1]))]]
cluster1 = cluster1.transpose()
cluster1

Unnamed: 0,19
Municipality,SWEETWATER
1st Most Common Venue,Hotel
2nd Most Common Venue,Pizza Place
3rd Most Common Venue,Fast Food Restaurant
4th Most Common Venue,Convenience Store
5th Most Common Venue,American Restaurant
6th Most Common Venue,Gas Station
7th Most Common Venue,Discount Store
8th Most Common Venue,Burger Joint
9th Most Common Venue,BBQ Joint


In [22]:
cluster2 = miami_merged.loc[miami_merged['Cluster Labels'] == 1, miami_merged.columns[[1] + list(range(5, miami_merged.shape[1]))]]
cluster2 = cluster2.transpose()
cluster2

Unnamed: 0,6,7,8,15,16,17,33
Municipality,NORTH MIAMI BEACH,AVENTURA,SOUTH MIAMI,PINECREST,INDIAN CREEK VILLAGE,SUNNY ISLES BEACH,GOLDEN BEACH
1st Most Common Venue,Coffee Shop,Clothing Store,American Restaurant,Coffee Shop,Coffee Shop,Beach,Clothing Store
2nd Most Common Venue,Sushi Restaurant,Italian Restaurant,Coffee Shop,Grocery Store,Pizza Place,Clothing Store,Furniture / Home Store
3rd Most Common Venue,Grocery Store,Grocery Store,Bakery,Bakery,Brewery,American Restaurant,Cosmetics Shop
4th Most Common Venue,Mexican Restaurant,Cosmetics Shop,Shopping Mall,American Restaurant,American Restaurant,Department Store,Beach
5th Most Common Venue,Clothing Store,Ice Cream Shop,Clothing Store,Park,Hotel,Coffee Shop,Grocery Store
6th Most Common Venue,Chinese Restaurant,Coffee Shop,Grocery Store,Department Store,Park,Grocery Store,American Restaurant
7th Most Common Venue,Ice Cream Shop,American Restaurant,Department Store,Furniture / Home Store,Café,Gym,Japanese Restaurant
8th Most Common Venue,Gym,Gym,Asian Restaurant,Clothing Store,Mexican Restaurant,Ice Cream Shop,Department Store
9th Most Common Venue,Fast Food Restaurant,Department Store,Furniture / Home Store,Pharmacy,Restaurant,Italian Restaurant,Electronics Store


In [23]:
cluster3 = miami_merged.loc[miami_merged['Cluster Labels'] == 2, miami_merged.columns[[1] + list(range(5, miami_merged.shape[1]))]]
cluster3 = cluster3.transpose()
cluster3

Unnamed: 0,0,2,4,9,11,14,18,26,27,31,32
Municipality,MIAMI SPRINGS,MIAMI LAKES,VIRGINIA GARDENS,HIALEAH,HIALEAH GARDENS,DORAL,MIAMI,BAL HARBOUR,MEDLEY,WEST MIAMI,CORAL GABLES
1st Most Common Venue,Cuban Restaurant,Coffee Shop,Hotel,Cuban Restaurant,Pizza Place,Grocery Store,Hotel,Café,Cuban Restaurant,Cuban Restaurant,Italian Restaurant
2nd Most Common Venue,Hotel,Pharmacy,Cuban Restaurant,Sandwich Place,Fast Food Restaurant,Italian Restaurant,Italian Restaurant,Hotel,Park,Bakery,American Restaurant
3rd Most Common Venue,Grocery Store,Bakery,South American Restaurant,Bakery,Latin American Restaurant,Pharmacy,Seafood Restaurant,Beach,Hotel,Latin American Restaurant,Gym / Fitness Center
4th Most Common Venue,Coffee Shop,Cuban Restaurant,Coffee Shop,Grocery Store,Cuban Restaurant,Coffee Shop,Park,Resort,South American Restaurant,Café,Cuban Restaurant
5th Most Common Venue,Mexican Restaurant,Ice Cream Shop,Arepa Restaurant,Latin American Restaurant,Sandwich Place,Hotel,American Restaurant,Coffee Shop,Grocery Store,Grocery Store,Movie Theater
6th Most Common Venue,American Restaurant,Mexican Restaurant,Mexican Restaurant,Italian Restaurant,Grocery Store,Latin American Restaurant,Japanese Restaurant,Shopping Mall,Latin American Restaurant,Italian Restaurant,Ice Cream Shop
7th Most Common Venue,Food Truck,Grocery Store,Restaurant,Donut Shop,Pharmacy,Golf Course,Peruvian Restaurant,Supermarket,Mexican Restaurant,Restaurant,Bakery
8th Most Common Venue,Café,Pizza Place,Sushi Restaurant,Coffee Shop,Spa,Japanese Restaurant,Asian Restaurant,Italian Restaurant,Fast Food Restaurant,Seafood Restaurant,Bookstore
9th Most Common Venue,Latin American Restaurant,Sandwich Place,Pizza Place,Pharmacy,Hotel,Fast Food Restaurant,Restaurant,Sushi Restaurant,Coffee Shop,Coffee Shop,Golf Course


In [24]:
cluster4 = miami_merged.loc[miami_merged['Cluster Labels'] == 3, miami_merged.columns[[1] + list(range(5, miami_merged.shape[1]))]]
cluster4 = cluster4.transpose()
cluster4

Unnamed: 0,1,3,5,13,22
Municipality,NORTH BAY VILLAGE,KEY BISCAYNE,MIAMI BEACH,SURFSIDE,BAY HARBOR ISLANDS
1st Most Common Venue,Beach,Aquarium,Hotel,Beach,Beach
2nd Most Common Venue,Italian Restaurant,Beach,Beach,Italian Restaurant,Italian Restaurant
3rd Most Common Venue,Park,Park,Bakery,Hotel,Grocery Store
4th Most Common Venue,Golf Course,Italian Restaurant,Grocery Store,Coffee Shop,Coffee Shop
5th Most Common Venue,Burger Joint,Argentinian Restaurant,Sushi Restaurant,Grocery Store,Park
6th Most Common Venue,American Restaurant,Restaurant,Gym / Fitness Center,Burger Joint,Hotel
7th Most Common Venue,Japanese Restaurant,Harbor / Marina,Cocktail Bar,Bakery,Burger Joint
8th Most Common Venue,Pizza Place,Moving Target,Peruvian Restaurant,Resort,Resort
9th Most Common Venue,Brazilian Restaurant,Tennis Stadium,Art Gallery,Peruvian Restaurant,Peruvian Restaurant


In [25]:
cluster5 = miami_merged.loc[miami_merged['Cluster Labels'] == 4, miami_merged.columns[[1] + list(range(5, miami_merged.shape[1]))]]
cluster5 = cluster5.transpose()
cluster5

Unnamed: 0,10,12,20,21,23,24,25,28,29,30
Municipality,OPA-LOCKA,BISCAYNE PARK,MIAMI GARDENS,HOMESTEAD,FLORIDA CITY,CUTLER BAY,MIAMI SHORES,PALMETTO BAY,NORTH MIAMI,EL PORTAL
1st Most Common Venue,Sandwich Place,Italian Restaurant,Caribbean Restaurant,Mexican Restaurant,Mexican Restaurant,Sandwich Place,Italian Restaurant,Grocery Store,Grocery Store,Italian Restaurant
2nd Most Common Venue,Seafood Restaurant,Grocery Store,Fast Food Restaurant,Hotel,Fast Food Restaurant,Pharmacy,Park,Burger Joint,Bakery,Pizza Place
3rd Most Common Venue,Discount Store,Pizza Place,Seafood Restaurant,Grocery Store,American Restaurant,Grocery Store,Pizza Place,Coffee Shop,Mexican Restaurant,American Restaurant
4th Most Common Venue,Fast Food Restaurant,Coffee Shop,Furniture / Home Store,American Restaurant,Pizza Place,Park,Grocery Store,Bakery,Chinese Restaurant,Park
5th Most Common Venue,Pizza Place,Park,Sandwich Place,Pizza Place,Hotel,Ice Cream Shop,Caribbean Restaurant,Italian Restaurant,Middle Eastern Restaurant,Grocery Store
6th Most Common Venue,Donut Shop,Burger Joint,Grocery Store,Fast Food Restaurant,Sandwich Place,Gym / Fitness Center,Bistro,Park,Cuban Restaurant,Coffee Shop
7th Most Common Venue,Fried Chicken Joint,Middle Eastern Restaurant,Bakery,Pharmacy,Donut Shop,Wings Joint,Burger Joint,American Restaurant,Coffee Shop,Caribbean Restaurant
8th Most Common Venue,Rental Car Location,Pet Store,Discount Store,Steakhouse,Gas Station,Pizza Place,Gym,Sandwich Place,Pizza Place,Café
9th Most Common Venue,Cuban Restaurant,Deli / Bodega,Park,Donut Shop,Grocery Store,Cuban Restaurant,Donut Shop,Asian Restaurant,Pharmacy,Bistro


**Overview of Miami-Dade population by municipality**

In [26]:
url = 'https://raw.githubusercontent.com/KL98/Coursera_Capstone/master/MiamiDade_populationbycity.csv'
miami_pop = pd.read_csv(url)
miami_pop.head()

Unnamed: 0,Incorporated Community,Designation,2010 Population
0,Miami,City,399457
1,Hialeah,City,224669
2,Miami Gardens,City,107167
3,Miami Beach,City,87779
4,Homestead,City,60512


In [27]:
miami_pop['2010 Population'] = miami_pop['2010 Population'].str.replace(',','')
convert_dict = {'2010 Population': int} 
miami_pop = miami_pop.astype(convert_dict) 
miami_pop = miami_pop.rename(columns={"Incorporated Community": "Municipality", "2010 Population": "Population"})
miami_pop = miami_pop.drop(['Designation'], axis=1)
miami_pop['Municipality'] = miami_pop['Municipality'].str.upper() 

miami_pop_coor = pd.merge(df_MIA, miami_pop, on = 'Municipality', how = 'left')
miami_pop_coor.head()

Unnamed: 0,MuniID,Municipality,lat,lon,Population
0,5,MIAMI SPRINGS,25.821848,-80.292708,13809.0
1,23,NORTH BAY VILLAGE,25.846207,-80.153935,7137.0
2,32,MIAMI LAKES,25.911621,-80.321275,29361.0
3,24,KEY BISCAYNE,25.696835,-80.163526,12344.0
4,26,VIRGINIA GARDENS,25.810376,-80.302273,2375.0


In [28]:
miami_pop_coor = miami_pop_coor.drop(miami_pop_coor.index[16])

In [29]:
miami_pop_coor.astype({'Population': 'int64'}).dtypes

MuniID           object
Municipality     object
lat             float64
lon             float64
Population        int64
dtype: object

Geopraphic Choropleth Map

In [30]:
!wget --quiet https://opendata.arcgis.com/datasets/bd523e71861749959a7f12c9d0388d1c_0.geojson -O miami.json

In [31]:
miami_geo = r'miami.json'

In [32]:
threshold_scale = np.linspace(miami_pop_coor['Population'].min(),
                              miami_pop_coor['Population'].max(),
                              6, dtype=int)
threshold_scale = threshold_scale.tolist() 
threshold_scale[-1] = threshold_scale[-1] + 1 

miami_pop_map = folium.Map(location=[25.7617, -80.1918], zoom_start=10)
miami_pop_map.choropleth(
    geo_data=miami_geo,
    data=miami_pop_coor,
    columns=['Municipality', 'Population'],
    key_on='feature.properties.NAME',
    threshold_scale=threshold_scale,
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Population by Municipality'
)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(miami_merged['lat'], miami_merged['lon'], miami_merged['Municipality'], miami_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=["blue", "grey", "green", "purple","pink"][cluster-1],
        fill=True,
        fill_color=["blue", "grey", "green", "purple","pink"][cluster-1],
        fill_opacity=0.7).add_to(miami_pop_map)

miami_pop_map