# Buenos Aires analysis

#### Imports

In [1]:
import pandas as pd 
import requests 
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium

#### Load the table with the Buenos Aires boroughs and the rest of the cities.

In [311]:
borough = pd.read_csv('Boroughs.csv')
borough

Unnamed: 0,Borough,City,LAT,LONG,Population,Radius
0,Comuna 1,Buenos Aires,-34.608289,-58.376774,205886,1800
1,Comuna 2,Buenos Aires,-34.590761,-58.397481,157932,1500
2,Comuna 3,Buenos Aires,-34.613631,-58.402195,187537,1400
3,Comuna 4,Buenos Aires,-34.645805,-58.391198,218245,3800
4,Comuna 5,Buenos Aires,-34.617382,-58.420265,179005,1900
5,Comuna 6,Buenos Aires,-34.61828,-58.442155,176076,1200
6,Comuna 7,Buenos Aires,-34.638042,-58.452503,220591,1700
7,Comuna 8,Buenos Aires,-34.674402,-58.465629,187237,2200
8,Comuna 9,Buenos Aires,-34.65487,-58.499815,161797,2100
9,Comuna 10,Buenos Aires,-34.626712,-58.50231,166022,2200


#### Visualize the boroughs and the cities on a map. Buenos Aires' boroughs are in red, while the cities are in blue.

In [315]:
# create map of Manhattan using latitude and longitude values
world_map = folium.Map(location=[0, 0], zoom_start=2)

# add markers to map.
for lat, lng, label,city in zip(borough['LAT'], borough['LONG'], borough['Borough'], borough['City']):
    label = folium.Popup(label, parse_html=True)
    if city == 'Buenos Aires':
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='red',
            fill=True,
            fill_color='#cc323a',
            fill_opacity=0.7,
            parse_html=False).add_to(world_map) 
    else:
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(world_map)  
    
world_map

In [316]:
# create map of Manhattan using latitude and longitude values
buenosaires_map = folium.Map(location=[-34.613436, -58.437442], zoom_start=12)

# add markers to map.
for lat, lng, label,city in zip(borough['LAT'], borough['LONG'], borough['Borough'], borough['City']):
    label = folium.Popup(label, parse_html=True)
    if city == 'Buenos Aires':
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='red',
            fill=True,
            fill_color='#cc323a',
            fill_opacity=0.7,
            parse_html=False).add_to(buenosaires_map) 
    else:
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(buenosaires_map)  
    
buenosaires_map

#### Use the Foursquare API to know how many venues each of the boroughs and cities have

In [317]:
#FourSquare Credential
CLIENT_ID = 'GBCPRLKKJ1BNCQ31M2JXN1YPKTZJ3N30SR2111KH44QEBUPP' # your Foursquare ID
CLIENT_SECRET = 'VNN5NS10RJDRMF3PS0EYLOQ10CJF2NGWXCDJ42WPPNZ0CKHT' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 500 # A default Foursquare API limit value

In [318]:
#Evaluate Boroughs
def getNearbyVenues(names, latitudes, longitudes, radius):
    
    venues_list=[]
    for name, lat, lng,radius in zip(names, latitudes, longitudes, radius):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]["groups"][0]["items"]
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [234]:
#New Dataframe for the venues
boro_venues = getNearbyVenues(names=borough['Borough'],latitudes=borough['LAT'],longitudes=borough['LONG'],radius=borough['Radius'])


Comuna 1
Comuna 2
Comuna 3
Comuna 4
Comuna 5
Comuna 6
Comuna 7
Comuna 8
Comuna 9
Comuna 10
Comuna 11
Comuna 12
Comuna 13
Comuna 14
Comuna 15
Sao Paulo
Rio de Janeiro
Brasilia
Montevideo
Santiago de Chile
Bogota
Ciudad de Mexico
Lima
Asuncion
Rosario
Cordoba
Medellin
Guadalajara
Quito
Monterrey
Madrid
Barcelona
Sevilla
Valencia
Paris
London
Roma
Milano
Napoli
Warszawa
Berlin
Munich
Kiev
Lisboa
Amsterdam


In [319]:
print(boro_venues.shape)
boro_venues.head()

(4383, 7)


Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Comuna 1,-34.608289,-58.376774,London City,-34.608505,-58.374874,Café
1,Comuna 1,-34.608289,-58.376774,de Dios Editores,-34.609999,-58.379109,Bookstore
2,Comuna 1,-34.608289,-58.376774,Foto Club Buenos Aires,-34.609475,-58.378273,Camera Store
3,Comuna 1,-34.608289,-58.376774,Piazzolla Tango,-34.606099,-58.374899,Theme Restaurant
4,Comuna 1,-34.608289,-58.376774,725 Continental Hotel,-34.606234,-58.376724,Hotel


#### Analizing the venues data

In [320]:
# one hot encoding
boro_onehot = pd.get_dummies(boro_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
boro_onehot['Borough'] = boro_venues['Borough'] 

# move neighborhood column to the first column
fixed_columns = [boro_onehot.columns[-1]] + list(boro_onehot.columns[:-1])
boro_onehot = boro_onehot[fixed_columns]

print(boro_onehot.shape)
boro_onehot.head()

(4383, 328)


Unnamed: 0,Borough,Acai House,Accessories Store,African Restaurant,Airport Lounge,American Restaurant,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,...,Vietnamese Restaurant,Water Park,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio,Zoo
0,Comuna 1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Comuna 1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Comuna 1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Comuna 1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Comuna 1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [321]:
# I drop the "Argentinian Restaurant" variable, because it has a very big correlation between these one and the boroughs of Buenos Aires.
boro_onehot2 = boro_onehot.drop(columns=['Argentinian Restaurant'])

In [322]:
# group rows by neighborhood and the mean of occurrence of each category
boro_grouped = boro_onehot2.groupby('Borough').mean().reset_index()


In [323]:
boro_grouped.head()

Unnamed: 0,Borough,Acai House,Accessories Store,African Restaurant,Airport Lounge,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,...,Vietnamese Restaurant,Water Park,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio,Zoo
0,Amsterdam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01
1,Asuncion,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Barcelona,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0
3,Berlin,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.01,...,0.03,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.01,0.0
4,Bogota,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [345]:
# 5 most common venue from each neighborhood
num_top_venues = 5

for hood in boro_grouped['Borough']:
    print("----"+hood+"----")
    temp = boro_grouped[boro_grouped['Borough'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Amsterdam----
               venue  freq
0              Hotel  0.14
1        Coffee Shop  0.05
2         Restaurant  0.05
3                Bar  0.04
4  French Restaurant  0.04


----Asuncion----
                  venue  freq
0           Pizza Place  0.06
1  Gym / Fitness Center  0.06
2                  Café  0.05
3           Coffee Shop  0.03
4                   Bar  0.03


----Barcelona----
                venue  freq
0               Hotel  0.14
1         Coffee Shop  0.06
2    Tapas Restaurant  0.06
3  Spanish Restaurant  0.05
4               Plaza  0.04


----Berlin----
                 venue  freq
0          Coffee Shop  0.09
1                Hotel  0.05
2       Ice Cream Shop  0.05
3            Bookstore  0.05
4  Indie Movie Theater  0.04


----Bogota----
                venue  freq
0         Coffee Shop  0.09
1               Hotel  0.08
2              Bakery  0.06
3    Asian Restaurant  0.05
4  Italian Restaurant  0.05


----Brasilia----
                  venue  freq
0  Gym /

In [346]:
# Create DataFrame with the 10 most common venues for each Neighborhood

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
boro_venues_sorted = pd.DataFrame(columns=columns)
boro_venues_sorted['Borough'] = boro_grouped['Borough']

for ind in np.arange(boro_grouped.shape[0]):
    boro_venues_sorted.iloc[ind, 1:] = return_most_common_venues(boro_grouped.iloc[ind, :], num_top_venues)

boro_venues_sorted

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Amsterdam,Hotel,Restaurant,Coffee Shop,Cocktail Bar,Bar,French Restaurant,Bakery,Café,Beer Bar,Bookstore
1,Asuncion,Pizza Place,Gym / Fitness Center,Café,Soccer Stadium,South American Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Bar,Gymnastics Gym
2,Barcelona,Hotel,Coffee Shop,Tapas Restaurant,Spanish Restaurant,Plaza,Historic Site,Ice Cream Shop,Bookstore,Park,Sandwich Place
3,Berlin,Coffee Shop,Ice Cream Shop,Bookstore,Hotel,Indie Movie Theater,Café,Vietnamese Restaurant,Monument / Landmark,Bakery,Park
4,Bogota,Coffee Shop,Hotel,Bakery,Asian Restaurant,Italian Restaurant,Restaurant,French Restaurant,Park,Shopping Mall,BBQ Joint
5,Brasilia,Gym / Fitness Center,Ice Cream Shop,Burger Joint,Steakhouse,Food Stand,Pastelaria,Café,Hot Dog Joint,Pet Store,Bar
6,Ciudad de Mexico,Ice Cream Shop,Coffee Shop,Bakery,Mexican Restaurant,Art Museum,Art Gallery,Gourmet Shop,Flower Shop,Seafood Restaurant,Tattoo Parlor
7,Comuna 1,Coffee Shop,Café,Theater,Hotel,Hostel,Ice Cream Shop,Italian Restaurant,Pizza Place,Restaurant,Bar
8,Comuna 10,Pizza Place,Café,Deli / Bodega,Plaza,Ice Cream Shop,Restaurant,Sports Club,Gym / Fitness Center,Sandwich Place,BBQ Joint
9,Comuna 11,Café,Ice Cream Shop,Pharmacy,Burger Joint,Restaurant,Coffee Shop,Bus Stop,BBQ Joint,Bakery,Plaza


### Clustering

#### Classificate between 2 cluster. The ideal would be an "European Cluster" and an "Latinamerican Cluster"

In [347]:

# set number of clusters
kclusters = 2

boro_clustering = boro_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(boro_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 0, 1, 1, 0, 0, 0, 0, 0, 0])

In [348]:
# create new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
# add clustering labels
boro_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [349]:
boro_merged = borough

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
boro_merged = boro_merged.join(boro_venues_sorted.set_index('Borough'), on='Borough')

boro_merged.head()

Unnamed: 0,Borough,City,LAT,LONG,Population,Radius,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Comuna 1,Buenos Aires,-34.608289,-58.376774,205886,1800,0,Coffee Shop,Café,Theater,Hotel,Hostel,Ice Cream Shop,Italian Restaurant,Pizza Place,Restaurant,Bar
1,Comuna 2,Buenos Aires,-34.590761,-58.397481,157932,1500,1,Hotel,Coffee Shop,Ice Cream Shop,Italian Restaurant,Café,Plaza,Park,Salad Place,Bakery,Chocolate Shop
2,Comuna 3,Buenos Aires,-34.613631,-58.402195,187537,1400,0,Café,Japanese Restaurant,Bakery,Ice Cream Shop,Spanish Restaurant,Bar,Grocery Store,Pizza Place,Hotel,Theater
3,Comuna 4,Buenos Aires,-34.645805,-58.391198,218245,3800,0,Japanese Restaurant,Pizza Place,Italian Restaurant,BBQ Joint,Restaurant,Deli / Bodega,Café,Hostel,Bar,Gym
4,Comuna 5,Buenos Aires,-34.617382,-58.420265,179005,1900,0,Ice Cream Shop,Pizza Place,Bakery,Café,Burger Joint,Bar,Coffee Shop,Cheese Shop,Indie Theater,Italian Restaurant


In [351]:
#Visualize results

# create map
map_clusters_world = folium.Map(location=[0, 0], zoom_start=2)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(boro_merged['LAT'], boro_merged['LONG'], boro_merged['Borough'], boro_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_world)
       
map_clusters_world

In [352]:
# create map
map_clusters_BuenosAires = folium.Map(location=[-34.613436, -58.437442], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(boro_merged['LAT'], boro_merged['LONG'], boro_merged['Borough'], boro_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_BuenosAires)
       
map_clusters_BuenosAires

In [353]:
# Examine each cluster

#Cluster 1
boro_merged.loc[boro_merged['Cluster Labels'] == 0, boro_merged.columns[[0] + list(range(5, boro_merged.shape[1]))]]

Unnamed: 0,Borough,Radius,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Comuna 1,1800,0,Coffee Shop,Café,Theater,Hotel,Hostel,Ice Cream Shop,Italian Restaurant,Pizza Place,Restaurant,Bar
2,Comuna 3,1400,0,Café,Japanese Restaurant,Bakery,Ice Cream Shop,Spanish Restaurant,Bar,Grocery Store,Pizza Place,Hotel,Theater
3,Comuna 4,3800,0,Japanese Restaurant,Pizza Place,Italian Restaurant,BBQ Joint,Restaurant,Deli / Bodega,Café,Hostel,Bar,Gym
4,Comuna 5,1900,0,Ice Cream Shop,Pizza Place,Bakery,Café,Burger Joint,Bar,Coffee Shop,Cheese Shop,Indie Theater,Italian Restaurant
5,Comuna 6,1200,0,Café,Ice Cream Shop,Bakery,Coffee Shop,Pizza Place,Gym,Brewery,Restaurant,Italian Restaurant,Soccer Field
6,Comuna 7,1700,0,Ice Cream Shop,Café,Pizza Place,Coffee Shop,Pharmacy,Bakery,Athletics & Sports,Restaurant,Korean Restaurant,Fish Market
7,Comuna 8,2200,0,Pizza Place,Fast Food Restaurant,Soccer Stadium,Shopping Mall,Soccer Field,Supermarket,Ice Cream Shop,Gas Station,Park,Bus Stop
8,Comuna 9,2100,0,Pizza Place,Café,BBQ Joint,Dessert Shop,Ice Cream Shop,Plaza,Gym,Coffee Shop,Brewery,Park
9,Comuna 10,2200,0,Pizza Place,Café,Deli / Bodega,Plaza,Ice Cream Shop,Restaurant,Sports Club,Gym / Fitness Center,Sandwich Place,BBQ Joint
10,Comuna 11,1800,0,Café,Ice Cream Shop,Pharmacy,Burger Joint,Restaurant,Coffee Shop,Bus Stop,BBQ Joint,Bakery,Plaza


In [354]:
# Examine each cluster

#Cluster 2
boro_merged.loc[boro_merged['Cluster Labels'] == 1, boro_merged.columns[[0] + list(range(5, boro_merged.shape[1]))]]

Unnamed: 0,Borough,Radius,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Comuna 2,1500,1,Hotel,Coffee Shop,Ice Cream Shop,Italian Restaurant,Café,Plaza,Park,Salad Place,Bakery,Chocolate Shop
18,Montevideo,7000,1,Plaza,Coffee Shop,Hotel,Park,Scenic Lookout,Garden,BBQ Joint,Supermarket,Dessert Shop,Restaurant
30,Madrid,12000,1,Hotel,Restaurant,Plaza,Spanish Restaurant,Art Gallery,Art Museum,Coffee Shop,Tapas Restaurant,Mediterranean Restaurant,Japanese Restaurant
31,Barcelona,7000,1,Hotel,Coffee Shop,Tapas Restaurant,Spanish Restaurant,Plaza,Historic Site,Ice Cream Shop,Bookstore,Park,Sandwich Place
32,Sevilla,5000,1,Tapas Restaurant,Spanish Restaurant,Plaza,Hotel,Gastropub,Theater,Monument / Landmark,Historic Site,Park,Mediterranean Restaurant
33,Valencia,3500,1,Hotel,Ice Cream Shop,Plaza,Paella Restaurant,Italian Restaurant,Coffee Shop,Spanish Restaurant,Restaurant,Historic Site,Tapas Restaurant
34,Paris,11000,1,Plaza,Hotel,Cocktail Bar,Art Museum,Bookstore,French Restaurant,Garden,Japanese Restaurant,Wine Bar,Historic Site
35,London,16000,1,Hotel,Park,Lounge,Art Museum,Coffee Shop,Cocktail Bar,Theater,Bookstore,Food Court,Department Store
36,Roma,8000,1,Plaza,Historic Site,Ice Cream Shop,Monument / Landmark,Hotel,Italian Restaurant,Fountain,Art Museum,Church,Wine Bar
37,Milano,6000,1,Boutique,Hotel,Plaza,Ice Cream Shop,Italian Restaurant,Art Museum,Bookstore,Monument / Landmark,Japanese Restaurant,Wine Bar


#### Classificate into 3 Clusters. The idea is to let the model catch different values, making other possible combinations

In [356]:
boro_clustering3 = boro_grouped.drop('Borough', 1)

# run k-means clustering
kmeans3 = KMeans(n_clusters=3, random_state=0).fit(boro_clustering3)

# check cluster labels generated for each row in the dataframe
kmeans3.labels_[0:10] 

array([2, 1, 2, 1, 1, 1, 1, 1, 0, 1])

In [366]:
# add clustering labels
boro_venues_sorted3 = boro_venues_sorted.drop(columns=['Cluster Labels'])
boro_venues_sorted3.insert(0, 'Cluster Labels', kmeans3.labels_)

In [367]:
boro_merged3 = borough

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
boro_merged3 = boro_merged3.join(boro_venues_sorted3.set_index('Borough'), on='Borough')

boro_merged3.head()

Unnamed: 0,Borough,City,LAT,LONG,Population,Radius,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Comuna 1,Buenos Aires,-34.608289,-58.376774,205886,1800,1,Coffee Shop,Café,Theater,Hotel,Hostel,Ice Cream Shop,Italian Restaurant,Pizza Place,Restaurant,Bar
1,Comuna 2,Buenos Aires,-34.590761,-58.397481,157932,1500,2,Hotel,Coffee Shop,Ice Cream Shop,Italian Restaurant,Café,Plaza,Park,Salad Place,Bakery,Chocolate Shop
2,Comuna 3,Buenos Aires,-34.613631,-58.402195,187537,1400,1,Café,Japanese Restaurant,Bakery,Ice Cream Shop,Spanish Restaurant,Bar,Grocery Store,Pizza Place,Hotel,Theater
3,Comuna 4,Buenos Aires,-34.645805,-58.391198,218245,3800,1,Japanese Restaurant,Pizza Place,Italian Restaurant,BBQ Joint,Restaurant,Deli / Bodega,Café,Hostel,Bar,Gym
4,Comuna 5,Buenos Aires,-34.617382,-58.420265,179005,1900,0,Ice Cream Shop,Pizza Place,Bakery,Café,Burger Joint,Bar,Coffee Shop,Cheese Shop,Indie Theater,Italian Restaurant


In [373]:
# create map
map_clusters_world3 = folium.Map(location=[0, 0], zoom_start=2)

# set color scheme for the clusters
x = np.arange(3)
ys = [i + x + (i*x)**2 for i in range(3)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(boro_merged3['LAT'], boro_merged3['LONG'], boro_merged3['Borough'], boro_merged3['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-3],
        fill=True,
        fill_color=rainbow[cluster-3],
        fill_opacity=0.7).add_to(map_clusters_world3)
       
map_clusters_world3

In [376]:
# create map
map_clusters_BuenosAires3 = folium.Map(location=[-34.613436, -58.437442], zoom_start=12)

# set color scheme for the clusters
x = np.arange(3)
ys = [i + x + (i*x)**2 for i in range(3)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(boro_merged3['LAT'], boro_merged3['LONG'], boro_merged3['Borough'], boro_merged3['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-3],
        fill=True,
        fill_color=rainbow[cluster-3],
        fill_opacity=0.7).add_to(map_clusters_BuenosAires3)
       
map_clusters_BuenosAires3

In [377]:
# Examine each cluster

#Cluster 1
boro_merged3.loc[boro_merged3['Cluster Labels'] == 0, boro_merged3.columns[[0] + list(range(5, boro_merged3.shape[1]))]]

Unnamed: 0,Borough,Radius,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Comuna 5,1900,0,Ice Cream Shop,Pizza Place,Bakery,Café,Burger Joint,Bar,Coffee Shop,Cheese Shop,Indie Theater,Italian Restaurant
5,Comuna 6,1200,0,Café,Ice Cream Shop,Bakery,Coffee Shop,Pizza Place,Gym,Brewery,Restaurant,Italian Restaurant,Soccer Field
6,Comuna 7,1700,0,Ice Cream Shop,Café,Pizza Place,Coffee Shop,Pharmacy,Bakery,Athletics & Sports,Restaurant,Korean Restaurant,Fish Market
7,Comuna 8,2200,0,Pizza Place,Fast Food Restaurant,Soccer Stadium,Shopping Mall,Soccer Field,Supermarket,Ice Cream Shop,Gas Station,Park,Bus Stop
8,Comuna 9,2100,0,Pizza Place,Café,BBQ Joint,Dessert Shop,Ice Cream Shop,Plaza,Gym,Coffee Shop,Brewery,Park
9,Comuna 10,2200,0,Pizza Place,Café,Deli / Bodega,Plaza,Ice Cream Shop,Restaurant,Sports Club,Gym / Fitness Center,Sandwich Place,BBQ Joint
11,Comuna 12,1900,0,Ice Cream Shop,Pizza Place,BBQ Joint,Coffee Shop,Bakery,Athletics & Sports,Park,Plaza,Italian Restaurant,Gym / Fitness Center
15,Sao Paulo,20000,0,Ice Cream Shop,Pizza Place,Brazilian Restaurant,Park,Bookstore,Theater,Art Museum,Gym,Gym / Fitness Center,Cultural Center
38,Napoli,8000,0,Pizza Place,Plaza,Historic Site,Italian Restaurant,Hotel,Café,Art Museum,Wine Bar,Dessert Shop,Castle


In [378]:
#Cluster 2
boro_merged3.loc[boro_merged3['Cluster Labels'] == 1, boro_merged3.columns[[0] + list(range(5, boro_merged3.shape[1]))]]

Unnamed: 0,Borough,Radius,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Comuna 1,1800,1,Coffee Shop,Café,Theater,Hotel,Hostel,Ice Cream Shop,Italian Restaurant,Pizza Place,Restaurant,Bar
2,Comuna 3,1400,1,Café,Japanese Restaurant,Bakery,Ice Cream Shop,Spanish Restaurant,Bar,Grocery Store,Pizza Place,Hotel,Theater
3,Comuna 4,3800,1,Japanese Restaurant,Pizza Place,Italian Restaurant,BBQ Joint,Restaurant,Deli / Bodega,Café,Hostel,Bar,Gym
10,Comuna 11,1800,1,Café,Ice Cream Shop,Pharmacy,Burger Joint,Restaurant,Coffee Shop,Bus Stop,BBQ Joint,Bakery,Plaza
12,Comuna 13,1900,1,Pizza Place,Café,Coffee Shop,Deli / Bodega,Ice Cream Shop,BBQ Joint,Tea Room,Bakery,Bistro,Italian Restaurant
13,Comuna 14,2000,1,Hotel,Coffee Shop,Bakery,Italian Restaurant,Gym,Beer Bar,BBQ Joint,Asian Restaurant,Plaza,Pizza Place
14,Comuna 15,2200,1,Pizza Place,Coffee Shop,Café,Bakery,Restaurant,BBQ Joint,Plaza,Ice Cream Shop,Italian Restaurant,Beer Bar
16,Rio de Janeiro,17000,1,Steakhouse,Ice Cream Shop,Park,Mountain,Stadium,Hotel,Seafood Restaurant,Beach,Shopping Mall,Japanese Restaurant
17,Brasilia,10000,1,Gym / Fitness Center,Ice Cream Shop,Burger Joint,Steakhouse,Food Stand,Pastelaria,Café,Hot Dog Joint,Pet Store,Bar
19,Santiago de Chile,10000,1,Bakery,Pizza Place,Park,Sandwich Place,Coffee Shop,Tea Room,Café,Italian Restaurant,Performing Arts Venue,Pet Store


In [379]:
#Cluster 3
boro_merged3.loc[boro_merged3['Cluster Labels'] == 2, boro_merged3.columns[[0] + list(range(5, boro_merged3.shape[1]))]]

Unnamed: 0,Borough,Radius,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Comuna 2,1500,2,Hotel,Coffee Shop,Ice Cream Shop,Italian Restaurant,Café,Plaza,Park,Salad Place,Bakery,Chocolate Shop
18,Montevideo,7000,2,Plaza,Coffee Shop,Hotel,Park,Scenic Lookout,Garden,BBQ Joint,Supermarket,Dessert Shop,Restaurant
30,Madrid,12000,2,Hotel,Restaurant,Plaza,Spanish Restaurant,Art Gallery,Art Museum,Coffee Shop,Tapas Restaurant,Mediterranean Restaurant,Japanese Restaurant
31,Barcelona,7000,2,Hotel,Coffee Shop,Tapas Restaurant,Spanish Restaurant,Plaza,Historic Site,Ice Cream Shop,Bookstore,Park,Sandwich Place
32,Sevilla,5000,2,Tapas Restaurant,Spanish Restaurant,Plaza,Hotel,Gastropub,Theater,Monument / Landmark,Historic Site,Park,Mediterranean Restaurant
33,Valencia,3500,2,Hotel,Ice Cream Shop,Plaza,Paella Restaurant,Italian Restaurant,Coffee Shop,Spanish Restaurant,Restaurant,Historic Site,Tapas Restaurant
34,Paris,11000,2,Plaza,Hotel,Cocktail Bar,Art Museum,Bookstore,French Restaurant,Garden,Japanese Restaurant,Wine Bar,Historic Site
35,London,16000,2,Hotel,Park,Lounge,Art Museum,Coffee Shop,Cocktail Bar,Theater,Bookstore,Food Court,Department Store
36,Roma,8000,2,Plaza,Historic Site,Ice Cream Shop,Monument / Landmark,Hotel,Italian Restaurant,Fountain,Art Museum,Church,Wine Bar
37,Milano,6000,2,Boutique,Hotel,Plaza,Ice Cream Shop,Italian Restaurant,Art Museum,Bookstore,Monument / Landmark,Japanese Restaurant,Wine Bar
