# <div align="center">Northern, Central and Southern Italy: machine learning applied to metropolitan cities</div>

Davide Di Lucca

January 17, 2021

The first step is importing the libraries needed for the project

In [1]:
!pip install folium
import pandas as pd
import numpy as np
import requests
import json
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim
import folium

Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 4.8 MB/s  eta 0:00:01
[?25hCollecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1


## Rome

First, we download the data in json format

In [2]:
rome_request = requests.get('https://raw.githubusercontent.com/blackmad/neighborhoods/master/rome-rioni.geojson')
rome_json_data = json.loads(rome_request.text)

All the relevant data is in the features key, which is basically a list of the neighborhoods

In [3]:
rome_neighborhoods_data = rome_json_data['features']

Let's take a look at the first item in this list.

In [5]:
rome_neighborhoods_data[0]

{'type': 'Feature',
 'properties': {'name': 'Castro Pretorio',
  'created_at': '2013-12-06T00:00:00Z',
  'updated_at': '2013-12-06T00:00:00Z',
  'cartodb_id': 3},
 'geometry': {'type': 'MultiPolygon',
  'coordinates': [[[[12.490683, 41.901926],
     [12.494094, 41.904289],
     [12.501497, 41.909431],
     [12.502613, 41.908473],
     [12.504587, 41.90809],
     [12.508278, 41.909112],
     [12.51081, 41.904098],
     [12.507119, 41.902756],
     [12.510939, 41.896687],
     [12.500811, 41.903395],
     [12.49815, 41.901766],
     [12.501583, 41.899818],
     [12.499566, 41.897645],
     [12.497592, 41.898668],
     [12.497163, 41.898412],
     [12.490683, 41.901926]]]]}}

We then define the dataframe columns and instantiate the dataframe

In [4]:
# define the dataframe columns
rome_column_names = ['Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
rome_neighborhoods = pd.DataFrame(columns=rome_column_names)

In [6]:
for data in rome_neighborhoods_data:
    rome_neighborhood_name = data['properties']['name']
        
    rome_neighborhood_latlon = data['geometry']['coordinates']
    rome_neighborhood_lat = rome_neighborhood_latlon[0][0][0][1]
    rome_neighborhood_lon = rome_neighborhood_latlon[0][0][0][0]
    
    rome_neighborhoods = rome_neighborhoods.append({'Neighborhood': rome_neighborhood_name,
                                                    'Latitude': rome_neighborhood_lat,
                                                    'Longitude': rome_neighborhood_lon}, ignore_index=True)

In [7]:
rome_neighborhoods.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Castro Pretorio,41.901926,12.490683
1,Monti,41.899562,12.487078
2,Esquilino,41.897645,12.499566
3,Sallustiano,41.909431,12.501497
4,Testaccio,41.876543,12.481713


We use the geopy library to get rome's coordinates

In [9]:
rome_address = 'Rome, IT'

rome_geolocator = Nominatim(user_agent="rome_explorer")
rome_location = rome_geolocator.geocode(rome_address)
rome_latitude = rome_location.latitude
rome_longitude = rome_location.longitude
print('The geograpical coordinate of Rome are {}, {}.'.format(rome_latitude, rome_longitude))

The geograpical coordinate of Rome are 41.8933203, 12.4829321.


With the coordinates we just got, we create a map of Rome

In [10]:
# create map of Rome using latitude and longitude values
map_rome = folium.Map(location=[rome_latitude, rome_longitude], zoom_start=14)

# add markers to map
for lat, lng, label in zip(rome_neighborhoods['Latitude'], rome_neighborhoods['Longitude'], rome_neighborhoods['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_rome)  
    
map_rome

We define Foursquare API credentials and URL

In [11]:
CLIENT_ID = 'COCPIDQM4O1WLNDXXIIDIRSYF11PV3Z55SVGGC44LLXWK1P5' # your Foursquare ID
CLIENT_SECRET = 'ENTTNOM0CLPW0MADTEHOEWDECAGSNOFSTN2SYMA3TSC0FP5N' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: COCPIDQM4O1WLNDXXIIDIRSYF11PV3Z55SVGGC44LLXWK1P5
CLIENT_SECRET:ENTTNOM0CLPW0MADTEHOEWDECAGSNOFSTN2SYMA3TSC0FP5N


In [12]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
roma_url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    rome_latitude, 
    rome_longitude, 
    radius, 
    LIMIT)
roma_url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=COCPIDQM4O1WLNDXXIIDIRSYF11PV3Z55SVGGC44LLXWK1P5&client_secret=ENTTNOM0CLPW0MADTEHOEWDECAGSNOFSTN2SYMA3TSC0FP5N&v=20180605&ll=41.8933203,12.4829321&radius=500&limit=100'

We perform a HTTP request o the URL we just created to retrieve the results

In [13]:
rome_results = requests.get(roma_url).json()
#rome_results

Let's create a function that extracts the category of the venue

In [14]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [15]:
rome_venues = rome_results['response']['groups'][0]['items']
    
rome_nearby_venues = json_normalize(rome_venues) # flatten JSON

# filter columns
rome_filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
rome_nearby_venues = rome_nearby_venues.loc[:, rome_filtered_columns]

# filter the category for each row
rome_nearby_venues['venue.categories'] = rome_nearby_venues.apply(get_category_type, axis=1)

# clean columns
rome_nearby_venues.columns = [col.split(".")[-1] for col in rome_nearby_venues.columns]

rome_nearby_venues.head()


  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,Piazza del Campidoglio,Plaza,41.893321,12.482956
1,Terrazza delle Quadrighe,Scenic Lookout,41.894346,12.483336
2,Foro di Cesare,Historic Site,41.894128,12.485232
3,Foro di Traiano,Historic Site,41.894729,12.484871
4,Capitoline Hill (Campidoglio),Scenic Lookout,41.893462,12.483588


We then create a function to get nearby venues for each neighborhood

In [16]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [17]:
rome_venues = getNearbyVenues(names = rome_neighborhoods['Neighborhood'],
                              latitudes = rome_neighborhoods['Latitude'],
                              longitudes = rome_neighborhoods['Longitude']
                              )

Castro Pretorio
Monti
Esquilino
Sallustiano
Testaccio
Ludovisi
Trevi
Colonna
Campo Marzio
Pigna
Sant'Eustachio
Ponte
Parione
Regola
Sant'Angelo
Campitelli
Ripa
Celio
San Saba
Trastevere
Prati
Borgo


In [18]:
rome_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Castro Pretorio,41.901926,12.490683,Palazzo Barberini,41.902713,12.489532,Art Museum
1,Castro Pretorio,41.901926,12.490683,Hotel Artemide,41.900747,12.493785,Hotel
2,Castro Pretorio,41.901926,12.490683,Piazza Barberini,41.903702,12.488574,Plaza
3,Castro Pretorio,41.901926,12.490683,Wonderful Ice Cream,41.901009,12.493615,Ice Cream Shop
4,Castro Pretorio,41.901926,12.490683,La Nazionale Gelateria,41.900076,12.492317,Ice Cream Shop


Now we apply one-hot encoding to convert categorical features to numerical values

In [20]:
# one hot encoding
rome_onehot = pd.get_dummies(rome_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
rome_onehot['Neighborhood'] = rome_venues['Neighborhood'] 

# move neighborhood column to the first column
rome_fixed_columns = [rome_onehot.columns[-1]] + list(rome_onehot.columns[:-1])
rome_onehot = rome_onehot[rome_fixed_columns]

rome_onehot.head()

Unnamed: 0,Neighborhood,Abruzzo Restaurant,Accessories Store,African Restaurant,American Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Bakery,Bar,...,Theater,Toy / Game Store,Train Station,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Wine Bar,Wine Shop,Winery,Women's Store
0,Castro Pretorio,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Castro Pretorio,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Castro Pretorio,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Castro Pretorio,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Castro Pretorio,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Let's group the dataframe by category

In [21]:
rome_grouped = rome_onehot.groupby('Neighborhood').mean().reset_index()
rome_grouped.head()

Unnamed: 0,Neighborhood,Abruzzo Restaurant,Accessories Store,African Restaurant,American Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Bakery,Bar,...,Theater,Toy / Game Store,Train Station,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Wine Bar,Wine Shop,Winery,Women's Store
0,Borgo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.018519,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Campitelli,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.025,...,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0
2,Campo Marzio,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.017544,...,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0
3,Castro Pretorio,0.0,0.0,0.0,0.025,0.0,0.0,0.05,0.0,0.0,...,0.0,0.0,0.0,0.05,0.0,0.0,0.025,0.0,0.0,0.0
4,Celio,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.025,...,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0


We create a function that returns the most common venues for each neighborhood

In [22]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [23]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
rome_neighborhoods_venues_sorted = pd.DataFrame(columns = columns)
rome_neighborhoods_venues_sorted['Neighborhood'] = rome_grouped['Neighborhood']

for ind in np.arange(rome_grouped.shape[0]):
    rome_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(rome_grouped.iloc[ind, :], num_top_venues)

rome_neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Borgo,Italian Restaurant,Hotel,Café,Plaza,Historic Site,Bed & Breakfast,History Museum,Ice Cream Shop,Restaurant,Pizza Place
1,Campitelli,Historic Site,Italian Restaurant,Plaza,Pub,Restaurant,Monument / Landmark,Temple,Hotel,Roof Deck,Road
2,Campo Marzio,Hotel,Italian Restaurant,Cocktail Bar,Restaurant,Nightclub,Roman Restaurant,Chinese Restaurant,Plaza,Gym,Movie Theater
3,Castro Pretorio,Hotel,Italian Restaurant,Ice Cream Shop,Plaza,Trattoria/Osteria,Fountain,Art Museum,Hostel,Department Store,Boarding House
4,Celio,Historic Site,Italian Restaurant,Plaza,Pub,Restaurant,Monument / Landmark,Temple,Hotel,Roof Deck,Road


We apply machine learning to our dataset. We use knn clustering algorithm to group data into clusters

In [24]:
from sklearn.cluster import KMeans

In [25]:
# set number of clusters
rome_k_num_clusters = 4

rome_grouped_clustering = rome_grouped.drop('Neighborhood', 1)

# run k-means clustering
rome_kmeans = KMeans(n_clusters=rome_k_num_clusters, random_state=0).fit(rome_grouped_clustering)

# check cluster labels generated for each row in the dataframe
rome_kmeans.labels_[0:10]

array([3, 2, 1, 1, 2, 1, 3, 0, 0, 0], dtype=int32)

In [26]:
# add clustering labels
rome_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', rome_kmeans.labels_)

In [27]:
rome_merged = rome_neighborhoods

rome_merged = rome_merged.join(rome_neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

rome_merged.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Castro Pretorio,41.901926,12.490683,1,Hotel,Italian Restaurant,Ice Cream Shop,Plaza,Trattoria/Osteria,Fountain,Art Museum,Hostel,Department Store,Boarding House
1,Monti,41.899562,12.487078,0,Italian Restaurant,Hotel,Ice Cream Shop,Plaza,Sandwich Place,Pizza Place,Art Museum,History Museum,Historic Site,Wine Bar
2,Esquilino,41.897645,12.499566,3,Italian Restaurant,Hotel,Café,Pizza Place,Bed & Breakfast,Bakery,Church,Restaurant,Ice Cream Shop,Trattoria/Osteria
3,Sallustiano,41.909431,12.501497,1,Italian Restaurant,Hotel,Pizza Place,Ice Cream Shop,Turkish Restaurant,Dessert Shop,Pub,Coffee Shop,Seafood Restaurant,Peruvian Restaurant
4,Testaccio,41.876543,12.481713,0,Italian Restaurant,Gastropub,Hotel,Ice Cream Shop,Sandwich Place,Trattoria/Osteria,Roman Restaurant,Restaurant,Café,Salad Place


In [29]:
rome_merged_nonan = rome_merged.dropna(subset=['Cluster Labels'])

Then we plot the clusters to the Rome map to visualize them

In [30]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [31]:
rome_map_clusters = folium.Map(location=[rome_latitude, rome_longitude], zoom_start=14)

# set color scheme for the clusters
x = np.arange(rome_k_num_clusters)
ys = [i + x + (i*x)**2 for i in range(rome_k_num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(rome_merged_nonan['Latitude'], rome_merged_nonan['Longitude'], rome_merged_nonan['Neighborhood'], rome_merged_nonan['Cluster Labels']):
    label = folium.Popup('Cluster ' + str(int(cluster) +1) + '\n' + str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)]
        ).add_to(rome_map_clusters)
        
rome_map_clusters

Let's analyze the first cluster:

In [32]:
rome_merged_nonan.loc[rome_merged_nonan['Cluster Labels'] == 0, rome_merged_nonan.columns[[1] + list(range(4, rome_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,41.899562,Italian Restaurant,Hotel,Ice Cream Shop,Plaza,Sandwich Place,Pizza Place,Art Museum,History Museum,Historic Site,Wine Bar
4,41.876543,Italian Restaurant,Gastropub,Hotel,Ice Cream Shop,Sandwich Place,Trattoria/Osteria,Roman Restaurant,Restaurant,Café,Salad Place
5,41.910885,Italian Restaurant,Pizza Place,Hotel,Ice Cream Shop,Turkish Restaurant,Pub,Monument / Landmark,Restaurant,Dessert Shop,Juice Bar
10,41.900041,Italian Restaurant,Hotel,Plaza,Sandwich Place,Ice Cream Shop,Fountain,Pizza Place,Wine Bar,Bistro,Restaurant
11,41.902916,Italian Restaurant,Hotel,Plaza,Ice Cream Shop,Restaurant,Sandwich Place,Café,Pizza Place,Fountain,Art Museum
12,41.897669,Italian Restaurant,Hotel,Café,Sandwich Place,Plaza,Ice Cream Shop,Pizza Place,Roman Restaurant,Restaurant,Wine Bar
13,41.896847,Italian Restaurant,Roman Restaurant,Café,Wine Bar,Sandwich Place,Restaurant,Pizza Place,Ice Cream Shop,Hotel,Burger Joint
14,41.891863,Italian Restaurant,Pizza Place,Café,Ice Cream Shop,Cocktail Bar,Bakery,Wine Bar,Historic Site,Plaza,Trattoria/Osteria
16,41.891424,Italian Restaurant,Plaza,Café,Hotel,Church,Historic Site,Fountain,Trattoria/Osteria,Monument / Landmark,Temple
19,41.899634,Italian Restaurant,Café,Hotel,Plaza,Castle,Trattoria/Osteria,Ice Cream Shop,Bed & Breakfast,Restaurant,Cocktail Bar


Let's analyze the second cluster:

In [33]:
rome_merged_nonan.loc[rome_merged_nonan['Cluster Labels'] == 1, rome_merged_nonan.columns[[1] + list(range(4, rome_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,41.901926,Hotel,Italian Restaurant,Ice Cream Shop,Plaza,Trattoria/Osteria,Fountain,Art Museum,Hostel,Department Store,Boarding House
3,41.909431,Italian Restaurant,Hotel,Pizza Place,Ice Cream Shop,Turkish Restaurant,Dessert Shop,Pub,Coffee Shop,Seafood Restaurant,Peruvian Restaurant
6,41.904976,Hotel,Italian Restaurant,Ice Cream Shop,Pizza Place,Café,Trattoria/Osteria,Hostel,Restaurant,Fountain,Coffee Shop
7,41.905104,Italian Restaurant,Hotel,Plaza,Ice Cream Shop,Boutique,Café,Middle Eastern Restaurant,Jewelry Store,Sandwich Place,Historic Site
8,41.909416,Hotel,Italian Restaurant,Cocktail Bar,Restaurant,Nightclub,Roman Restaurant,Chinese Restaurant,Plaza,Gym,Movie Theater


Let's analyze the third cluster:

In [34]:
rome_merged_nonan.loc[rome_merged_nonan['Cluster Labels'] == 2, rome_merged_nonan.columns[[1] + list(range(4, rome_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,41.896607,Italian Restaurant,Plaza,Historic Site,Hotel,Ice Cream Shop,Pizza Place,Monument / Landmark,Sandwich Place,Church,Restaurant
15,41.890617,Historic Site,Italian Restaurant,Plaza,Pub,Restaurant,Monument / Landmark,Temple,Hotel,Roof Deck,Road
17,41.890617,Historic Site,Italian Restaurant,Plaza,Pub,Restaurant,Monument / Landmark,Temple,Hotel,Roof Deck,Road


Let's analyze the fourth cluster:

In [35]:
rome_merged_nonan.loc[rome_merged_nonan['Cluster Labels'] == 3, rome_merged_nonan.columns[[1] + list(range(4, rome_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,41.897645,Italian Restaurant,Hotel,Café,Pizza Place,Bed & Breakfast,Bakery,Church,Restaurant,Ice Cream Shop,Trattoria/Osteria
18,41.873444,Historic Site,History Museum,Café,Sports Bar,Italian Restaurant,Park,Pizza Place,Hotel,Pub,Trattoria/Osteria
21,41.89854,Italian Restaurant,Hotel,Café,Plaza,Historic Site,Bed & Breakfast,History Museum,Ice Cream Shop,Restaurant,Pizza Place


We repeat the same process for Bari and for Milan

## Bari

In [36]:
#Downloading the data in json format
bari_request = requests.get('https://raw.githubusercontent.com/blackmad/neighborhoods/master/bari.geojson')
bari_json_data = json.loads(bari_request.text)

In [37]:
#All the relevant data is in the features key, which is basically a list of the neighborhoods
bari_neighborhoods_data = bari_json_data['features']

In [38]:
#Let's take a look at the first item in this list.
bari_neighborhoods_data[0]

{'type': 'Feature',
 'properties': {'name': 'San Paolo',
  'created_at': '2013-12-03T00:00:00Z',
  'updated_at': '2013-12-03T00:00:00Z',
  'cartodb_id': 1},
 'geometry': {'type': 'MultiPolygon',
  'coordinates': [[[[16.750409, 41.123855],
     [16.783419, 41.13387],
     [16.7908, 41.132448],
     [16.796722, 41.139817],
     [16.816978, 41.131737],
     [16.81303, 41.127211],
     [16.813116, 41.12359],
     [16.805906, 41.123203],
     [16.806164, 41.117836],
     [16.795692, 41.112727],
     [16.786553, 41.120472],
     [16.76747, 41.111615],
     [16.761169, 41.111583],
     [16.756963, 41.114145],
     [16.749855, 41.11485],
     [16.751389, 41.118238],
     [16.750409, 41.123855]]]]}}

In [39]:
# define the dataframe columns
bari_column_names = ['Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
bari_neighborhoods = pd.DataFrame(columns=bari_column_names)

In [40]:
for data in bari_neighborhoods_data:
    bari_neighborhood_name = data['properties']['name']
        
    bari_neighborhood_latlon = data['geometry']['coordinates']
    bari_neighborhood_lat = bari_neighborhood_latlon[0][0][0][1]
    bari_neighborhood_lon = bari_neighborhood_latlon[0][0][0][0]
    
    bari_neighborhoods = bari_neighborhoods.append({'Neighborhood': bari_neighborhood_name,
                                                    'Latitude': bari_neighborhood_lat,
                                                    'Longitude': bari_neighborhood_lon}, ignore_index=True)

In [41]:
bari_neighborhoods.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,San Paolo,41.123855,16.750409
1,Catino - San Pio,41.157946,16.727414
2,Santo Spirito,41.158043,16.727972
3,Palese,41.16024,16.763077
4,Murat,41.128892,16.860538


In [42]:
bari_address = 'Bari, IT'

bari_geolocator = Nominatim(user_agent="bari_explorer")
bari_location = bari_geolocator.geocode(bari_address)
bari_latitude = bari_location.latitude
bari_longitude = bari_location.longitude
print('The geograpical coordinates of Bari are {}, {}.'.format(bari_latitude, bari_longitude))

The geograpical coordinates of Bari are 41.1257843, 16.8620293.


In [43]:
# create map of Bari using latitude and longitude values
map_bari = folium.Map(location=[bari_latitude, bari_longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(bari_neighborhoods['Latitude'], bari_neighborhoods['Longitude'], bari_neighborhoods['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bari)  
    
map_bari

In [44]:
# create URL
bari_url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    bari_latitude, 
    bari_longitude, 
    radius, 
    LIMIT)
bari_url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=COCPIDQM4O1WLNDXXIIDIRSYF11PV3Z55SVGGC44LLXWK1P5&client_secret=ENTTNOM0CLPW0MADTEHOEWDECAGSNOFSTN2SYMA3TSC0FP5N&v=20180605&ll=41.1257843,16.8620293&radius=500&limit=100'

In [45]:
bari_results = requests.get(bari_url).json()
#bari_results

In [46]:
bari_venues = bari_results['response']['groups'][0]['items']
    
bari_nearby_venues = json_normalize(bari_venues) # flatten JSON

# filter columns
bari_filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
bari_nearby_venues = bari_nearby_venues.loc[:, bari_filtered_columns]

# filter the category for each row
bari_nearby_venues['venue.categories'] = bari_nearby_venues.apply(get_category_type, axis=1)

# clean columns
bari_nearby_venues.columns = [col.split(".")[-1] for col in bari_nearby_venues.columns]

bari_nearby_venues.head()

  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,Gianpaolo,Seafood Restaurant,41.126277,16.864611
1,La Muraya,Food,41.125854,16.864414
2,Giampaolo Ristorazione,Restaurant,41.126273,16.865238
3,Ristorante Da Paolo,Seafood Restaurant,41.12535,16.86406
4,Bari - Napoli,Pizza Place,41.125018,16.863795


In [47]:
bari_venues = getNearbyVenues(names = bari_neighborhoods['Neighborhood'],
                              latitudes = bari_neighborhoods['Latitude'],
                              longitudes = bari_neighborhoods['Longitude']
                              )

San Paolo
Catino - San Pio
Santo Spirito
Palese
Murat
Barivecchia
Stanic
Carrassi
Picone
Poggiofranco
Madonnella
San Pasquale
Japigia
Torre a Mare
Loseto
Carbonara
Ceglie
Marconi
San Girolamo
Fesca
Libertá


In [48]:
bari_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,San Paolo,41.123855,16.750409,Babayaga,41.120262,16.750777,Ice Cream Shop
1,Palese,41.16024,16.763077,Ristorante Al Paradise,41.160167,16.766801,Italian Restaurant
2,Palese,41.16024,16.763077,Titolo,41.160093,16.761844,Beach
3,Palese,41.16024,16.763077,Lido pubblico (fondo sabbioso),41.160168,16.762014,Beach
4,Palese,41.16024,16.763077,Palumbo Hotel Bari,41.158376,16.767342,Hotel


In [49]:
# one hot encoding
bari_onehot = pd.get_dummies(bari_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bari_onehot['Neighborhood'] = bari_venues['Neighborhood'] 

# move neighborhood column to the first column
bari_fixed_columns = [bari_onehot.columns[-1]] + list(bari_onehot.columns[:-1])
bari_onehot = bari_onehot[bari_fixed_columns]

bari_onehot.head()

Unnamed: 0,Neighborhood,Arts & Entertainment,Bakery,Bar,Beach,Bed & Breakfast,Beer Garden,Bike Rental / Bike Share,Bistro,Boat or Ferry,...,Sandwich Place,Seafood Restaurant,Soccer Stadium,Sports Bar,Steakhouse,Theater,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Wine Bar
0,San Paolo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Palese,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Palese,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Palese,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Palese,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [50]:
bari_grouped = bari_onehot.groupby('Neighborhood').mean().reset_index()
bari_grouped.head()

Unnamed: 0,Neighborhood,Arts & Entertainment,Bakery,Bar,Beach,Bed & Breakfast,Beer Garden,Bike Rental / Bike Share,Bistro,Boat or Ferry,...,Sandwich Place,Seafood Restaurant,Soccer Stadium,Sports Bar,Steakhouse,Theater,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Wine Bar
0,Barivecchia,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,...,0.041667,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Carbonara,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Carrassi,0.033333,0.033333,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,...,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0
3,Fesca,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0
4,Japigia,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0


In [51]:
# create a new dataframe
bari_neighborhoods_venues_sorted = pd.DataFrame(columns = columns)
bari_neighborhoods_venues_sorted['Neighborhood'] = bari_grouped['Neighborhood']

for ind in np.arange(bari_grouped.shape[0]):
    bari_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(bari_grouped.iloc[ind, :], num_top_venues)

bari_neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Barivecchia,Café,Pizza Place,Boat or Ferry,Seafood Restaurant,Plaza,History Museum,Food,Ice Cream Shop,Diner,Japanese Restaurant
1,Carbonara,Bed & Breakfast,Miscellaneous Shop,Wine Bar,Clothing Store,Gym / Fitness Center,Gym,Greek Restaurant,Food Truck,Food,Fast Food Restaurant
2,Carrassi,Hotel,Café,Pizza Place,Plaza,Pub,Fast Food Restaurant,Ice Cream Shop,Vegetarian / Vegan Restaurant,Hostel,Gym / Fitness Center
3,Fesca,Sports Bar,Soccer Stadium,Wine Bar,Café,Gym / Fitness Center,Gym,Greek Restaurant,Food Truck,Food,Fast Food Restaurant
4,Japigia,Café,Train Station,Beach,Pub,Wine Bar,Gym / Fitness Center,Gym,Greek Restaurant,Food Truck,Food


In [52]:
# set number of clusters
bari_k_num_clusters = 3

bari_grouped_clustering = bari_grouped.drop('Neighborhood', 1)

# run k-means clustering
bari_kmeans = KMeans(n_clusters=bari_k_num_clusters, random_state=0).fit(bari_grouped_clustering)

# check cluster labels generated for each row in the dataframe
bari_kmeans.labels_[0:100]

array([2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 2, 1, 1], dtype=int32)

In [53]:
# add clustering labels
bari_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', bari_kmeans.labels_)

In [54]:
bari_merged = bari_neighborhoods

bari_merged = bari_merged.join(bari_neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

bari_merged.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,San Paolo,41.123855,16.750409,0.0,Ice Cream Shop,Hostel,History Museum,Gym / Fitness Center,Gym,Greek Restaurant,Food Truck,Food,Fast Food Restaurant,Diner
1,Catino - San Pio,41.157946,16.727414,,,,,,,,,,,
2,Santo Spirito,41.158043,16.727972,,,,,,,,,,,
3,Palese,41.16024,16.763077,2.0,Hotel,Beach,Italian Restaurant,Café,Bar,Bed & Breakfast,Bakery,Gym / Fitness Center,Gym,Greek Restaurant
4,Murat,41.128892,16.860538,2.0,Seafood Restaurant,Plaza,Food,Restaurant,Hotel,Pizza Place,Food Truck,Fast Food Restaurant,Diner,Burger Joint


In [55]:
bari_merged_nonan = bari_merged.dropna(subset=['Cluster Labels'])

In [56]:
bari_map_clusters = folium.Map(location=[bari_latitude, bari_longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(bari_k_num_clusters)
ys = [i + x + (i*x)**2 for i in range(bari_k_num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bari_merged_nonan['Latitude'], bari_merged_nonan['Longitude'], bari_merged_nonan['Neighborhood'], bari_merged_nonan['Cluster Labels']):
    label = folium.Popup('Cluster ' + str(int(cluster) +1) + '\n' + str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)]
        ).add_to(bari_map_clusters)
        
bari_map_clusters

In [57]:
bari_merged_nonan.loc[bari_merged_nonan['Cluster Labels'] == 0, bari_merged_nonan.columns[[1] + list(range(4, bari_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,41.123855,Ice Cream Shop,Hostel,History Museum,Gym / Fitness Center,Gym,Greek Restaurant,Food Truck,Food,Fast Food Restaurant,Diner


In [58]:
bari_merged_nonan.loc[bari_merged_nonan['Cluster Labels'] == 1, bari_merged_nonan.columns[[1] + list(range(4, bari_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,41.117513,Pizza Place,Train Station,Food,Steakhouse,Wine Bar,Gym,Greek Restaurant,Food Truck,Fast Food Restaurant,Diner
13,41.09326,Pizza Place,Wine Bar,Hostel,Gym / Fitness Center,Gym,Greek Restaurant,Food Truck,Food,Fast Food Restaurant,Diner
17,41.117513,Pizza Place,Train Station,Food,Steakhouse,Wine Bar,Gym,Greek Restaurant,Food Truck,Fast Food Restaurant,Diner


In [59]:
bari_merged_nonan.loc[bari_merged_nonan['Cluster Labels'] == 2, bari_merged_nonan.columns[[1] + list(range(4, bari_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,41.16024,Hotel,Beach,Italian Restaurant,Café,Bar,Bed & Breakfast,Bakery,Gym / Fitness Center,Gym,Greek Restaurant
4,41.128892,Seafood Restaurant,Plaza,Food,Restaurant,Hotel,Pizza Place,Food Truck,Fast Food Restaurant,Diner,Burger Joint
5,41.130347,Café,Pizza Place,Boat or Ferry,Seafood Restaurant,Plaza,History Museum,Food,Ice Cream Shop,Diner,Japanese Restaurant
7,41.117448,Hotel,Café,Pizza Place,Plaza,Pub,Fast Food Restaurant,Ice Cream Shop,Vegetarian / Vegan Restaurant,Hostel,Gym / Fitness Center
8,41.117448,Hotel,Café,Pizza Place,Plaza,Pub,Fast Food Restaurant,Ice Cream Shop,Vegetarian / Vegan Restaurant,Hostel,Gym / Fitness Center
9,41.090931,Café,Italian Restaurant,Food Truck,Office,Pub,Wine Bar,Gym,Greek Restaurant,Food,Fast Food Restaurant
10,41.125934,Italian Restaurant,Café,Ice Cream Shop,Restaurant,Plaza,Pizza Place,Theater,Bar,Cupcake Shop,Cocktail Bar
11,41.089314,Burger Joint,Soccer Stadium,Multiplex,Coffee Shop,Café,Gym / Fitness Center,Gym,Greek Restaurant,Food Truck,Food
12,41.118826,Café,Train Station,Beach,Pub,Wine Bar,Gym / Fitness Center,Gym,Greek Restaurant,Food Truck,Food
15,41.088667,Bed & Breakfast,Miscellaneous Shop,Wine Bar,Clothing Store,Gym / Fitness Center,Gym,Greek Restaurant,Food Truck,Food,Fast Food Restaurant


## Milan

In [61]:
#Downloading the data in json format
milan_request = requests.get('https://raw.githubusercontent.com/blackmad/neighborhoods/master/gn-milan.geojson')
milan_json_data = json.loads(milan_request.text)

In [62]:
#All the relevant data is in the features key, which is basically a list of the neighborhoods
milan_neighborhoods_data = milan_json_data['features']

#Let's take a look at the first item in this list.
milan_neighborhoods_data[0]

{'type': 'Feature',
 'properties': {'fclass': 'P',
  'name': 'Umbria - Molise',
  'countryCode': 'IT',
  'geonameid': None,
  'created_at': '2013-11-27T21:33:33+0100',
  'cartodb_id': 25,
  'updated_at': '2013-11-27T21:39:29+0100',
  'fcode': 'PPLX',
  'lat': 45.451582245882435,
  'parents': '3173435',
  'adminCode4': '',
  'lng': 9.219244003944912,
  'adminCode1': '09',
  'adminCode2': 'MI',
  'adminCode3': '015146'},
 'geometry': {'type': 'MultiPolygon',
  'coordinates': [[[[9.224413, 45.449961],
     [9.224448, 45.447392],
     [9.224453, 45.447199],
     [9.224463, 45.445702],
     [9.221928, 45.445586],
     [9.221076, 45.44556],
     [9.220237, 45.445519],
     [9.216728, 45.445374],
     [9.215007, 45.445301],
     [9.213599, 45.44526],
     [9.213599, 45.445102],
     [9.213018, 45.445122],
     [9.212854, 45.445229],
     [9.21208, 45.445717],
     [9.211013, 45.446366],
     [9.210522, 45.446636],
     [9.21037, 45.446718],
     [9.210735, 45.447128],
     [9.211752, 45.44829

In [63]:
# define the dataframe columns
milan_column_names = ['Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
milan_neighborhoods = pd.DataFrame(columns=milan_column_names)

In [64]:
for data in milan_neighborhoods_data:
    milan_neighborhood_name = data['properties']['name']
        
    milan_neighborhood_latlon = data['geometry']['coordinates']
    milan_neighborhood_lat = milan_neighborhood_latlon[0][0][0][1]
    milan_neighborhood_lon = milan_neighborhood_latlon[0][0][0][0]
    
    milan_neighborhoods = milan_neighborhoods.append({'Neighborhood': milan_neighborhood_name,
                                                    'Latitude': milan_neighborhood_lat,
                                                    'Longitude': milan_neighborhood_lon}, ignore_index=True)

In [65]:
milan_neighborhoods.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Umbria - Molise,45.449961,9.224413
1,Guastalla,45.471271,9.206922
2,Bovisa,45.497928,9.172141
3,Forze Armate,45.457958,9.126574
4,Sacco,45.515351,9.1223


In [66]:
milan_address = 'Milan, IT'

milan_geolocator = Nominatim(user_agent="milan_explorer")
milan_location = milan_geolocator.geocode(milan_address)
milan_latitude = milan_location.latitude
milan_longitude = milan_location.longitude
print('The geograpical coordinates of Milan are {}, {}.'.format(milan_latitude, milan_longitude))

The geograpical coordinates of Milan are 45.4668, 9.1905.


In [67]:
# create map of Milan using latitude and longitude values
map_milan = folium.Map(location=[milan_latitude, milan_longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(milan_neighborhoods['Latitude'], milan_neighborhoods['Longitude'], milan_neighborhoods['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_milan)  
    
map_milan

In [68]:
# create URL
milan_url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    milan_latitude, 
    milan_longitude, 
    radius, 
    LIMIT)
milan_url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=COCPIDQM4O1WLNDXXIIDIRSYF11PV3Z55SVGGC44LLXWK1P5&client_secret=ENTTNOM0CLPW0MADTEHOEWDECAGSNOFSTN2SYMA3TSC0FP5N&v=20180605&ll=45.4668,9.1905&radius=500&limit=100'

In [69]:
milan_results = requests.get(milan_url).json()
#milan_results

In [70]:
milan_venues = milan_results['response']['groups'][0]['items']
    
milan_nearby_venues = json_normalize(milan_venues) # flatten JSON

# filter columns
milan_filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
milan_nearby_venues = milan_nearby_venues.loc[:, milan_filtered_columns]

# filter the category for each row
milan_nearby_venues['venue.categories'] = milan_nearby_venues.apply(get_category_type, axis=1)

# clean columns
milan_nearby_venues.columns = [col.split(".")[-1] for col in milan_nearby_venues.columns]

milan_nearby_venues.head()

  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,Galleria Vittorio Emanuele II,Monument / Landmark,45.465577,9.190024
1,Room Mate Giulia Hotel,Hotel,45.46525,9.189396
2,Teatro alla Scala,Opera House,45.467027,9.189686
3,Luini,Bakery,45.465707,9.191431
4,Gallerie d'Italia,Art Gallery,45.467183,9.190056


In [71]:
milan_venues = getNearbyVenues(names = milan_neighborhoods['Neighborhood'],
                              latitudes = milan_neighborhoods['Latitude'],
                              longitudes = milan_neighborhoods['Longitude']
                              )

Umbria - Molise
Guastalla
Bovisa
Forze Armate
Sacco
Ex Om - Morivione
Comasina
Stephenson
Qt 8
Ortomercato
Tre Torri
Maggiore - Musocco
Parco Lambro - Cimiano
Corsica
Gallaratese
S. Siro
Ghisolfa
Baggio
Quarto Cagnino
Lorenteggio
Giambellino
S. Cristoforo
Ronchetto Sul Naviglio
Tibaldi
Cascina Triulza - Expo
Quarto Oggiaro
Affori
Padova
Adriano
Farini
Triulzo Superiore
Citta' Studi
Selinunte
Parco Monlue' - Ponte Lambro
Niguarda - Ca' Granda
Stadera
Figino
Brera
Duomo
Parco Sempione
Scalo Romana
Magenta - S. Vittore
Bovisasca
Lodi - Corvetto
Lambrate
Barona
Bruzzano
Trenno
Gratosoglio - Ticinello
Giardini Porta Venezia
Quinto Romano
Villapizzone
Dergano
Tortona
Parco Agricolo Sud
Parco Nord
Bicocca
Muggiano
Portello
Navigli
XXII Marzo
Buenos Aires - Venezia
Quintosole
Ronchetto Delle Rane
Chiaravalle
Mecenate
Parco Delle Abbazie
Rogoredo
Ripamonti
Viale Monza
Isola
Centrale
Greco
Porta Romana
Loreto
Parco Forlanini - Ortica
Maciachini - Maggiolina
Sarpi
De Angeli - Monte Rosa
Washingto

In [72]:
milan_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Umbria - Molise,45.449961,9.224413,Ristorante La Sirenetta,45.450428,9.221963,Italian Restaurant
1,Umbria - Molise,45.449961,9.224413,Zelig Circus,45.448472,9.225172,Comedy Club
2,Umbria - Molise,45.449961,9.224413,Reebok CrossFit Officine,45.446556,9.222724,Gym
3,Umbria - Molise,45.449961,9.224413,Parco Alessandrini,45.450407,9.22689,Park
4,Umbria - Molise,45.449961,9.224413,S.S.D. Ausonia 1931 s.c.a.r.l.,45.448076,9.228586,Soccer Field


In [73]:
# one hot encoding
milan_onehot = pd.get_dummies(milan_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
milan_onehot['Neighborhood'] = milan_venues['Neighborhood'] 

# move neighborhood column to the first column
milan_fixed_columns = [milan_onehot.columns[-1]] + list(milan_onehot.columns[:-1])
milan_onehot = milan_onehot[milan_fixed_columns]

milan_onehot.head()

Unnamed: 0,Neighborhood,Abruzzo Restaurant,Accessories Store,Adult Education Center,African Restaurant,American Restaurant,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,...,Video Store,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio
0,Umbria - Molise,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Umbria - Molise,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Umbria - Molise,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Umbria - Molise,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Umbria - Molise,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [74]:
milan_grouped = milan_onehot.groupby('Neighborhood').mean().reset_index()
milan_grouped.head()

Unnamed: 0,Neighborhood,Abruzzo Restaurant,Accessories Store,Adult Education Center,African Restaurant,American Restaurant,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,...,Video Store,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio
0,Adriano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Affori,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Baggio,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bande Nere,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Barona,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [81]:
# create a new dataframe
milan_neighborhoods_venues_sorted = pd.DataFrame(columns = columns)
milan_neighborhoods_venues_sorted['Neighborhood'] = milan_grouped['Neighborhood']

for ind in np.arange(milan_grouped.shape[0]):
    milan_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(milan_grouped.iloc[ind, :], num_top_venues)

milan_neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adriano,Locksmith,Yoga Studio,Fabric Shop,Food Stand,Food Court,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish & Chips Shop
1,Affori,Pizza Place,Café,Italian Restaurant,Peruvian Restaurant,Food Court,Food & Drink Shop,Food,Flower Shop,Flea Market,Fabric Shop
2,Baggio,Bar,Food & Drink Shop,Outdoors & Recreation,Football Stadium,Food Stand,Food Court,Food,Flower Shop,Flea Market,Fish & Chips Shop
3,Bande Nere,Pizza Place,Café,Italian Restaurant,Metro Station,Pharmacy,Restaurant,Flea Market,Park,Sporting Goods Shop,Japanese Restaurant
4,Barona,Café,Dog Run,Park,Yoga Studio,Farm,Food Stand,Food Court,Food & Drink Shop,Food,Flower Shop


In [114]:
# set number of clusters
milan_k_num_clusters = 6

milan_grouped_clustering = milan_grouped.drop('Neighborhood', 1)

# run k-means clustering
milan_kmeans = KMeans(n_clusters = milan_k_num_clusters, random_state=0).fit(milan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
milan_kmeans.labels_[0:200]

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 5, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 1,
       2, 2, 2, 0, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
      dtype=int32)

In [116]:
# add clustering labels
milan_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', milan_kmeans.labels_)

In [117]:
milan_merged = milan_neighborhoods

milan_merged = milan_merged.join(milan_neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

milan_merged.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Umbria - Molise,45.449961,9.224413,2.0,Theater,Flea Market,Comedy Club,Chinese Restaurant,Performing Arts Venue,Tram Station,Rock Club,Nightclub,Park,Gym
1,Guastalla,45.471271,9.206922,2.0,Italian Restaurant,Pizza Place,Café,Plaza,Seafood Restaurant,Ice Cream Shop,Sushi Restaurant,Bistro,Ramen Restaurant,Tapas Restaurant
2,Bovisa,45.497928,9.172141,2.0,Hotel,Pizza Place,Diner,Bus Stop,Snack Place,Japanese Restaurant,Italian Restaurant,Café,Electronics Store,Bar
3,Forze Armate,45.457958,9.126574,2.0,Café,Pizza Place,Metro Station,Japanese Restaurant,Bar,Supermarket,Bus Stop,Arcade,Italian Restaurant,Hockey Arena
4,Sacco,45.515351,9.1223,0.0,Hotel,Restaurant,Café,Tram Station,Supermarket,Farm,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Yoga Studio


In [118]:
milan_merged_nonan = milan_merged.dropna(subset=['Cluster Labels'])

In [119]:
milan_map_clusters = folium.Map(location=[milan_latitude, milan_longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(milan_k_num_clusters)
ys = [i + x + (i*x)**2 for i in range(milan_k_num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(milan_merged_nonan['Latitude'], milan_merged_nonan['Longitude'], milan_merged_nonan['Neighborhood'], milan_merged_nonan['Cluster Labels']):
    label = folium.Popup('Cluster ' + str(int(cluster) +1) + '\n' + str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)]
        ).add_to(milan_map_clusters)
        
milan_map_clusters

In [120]:
milan_merged_nonan.loc[milan_merged_nonan['Cluster Labels'] == 0, milan_merged_nonan.columns[[1] + list(range(4, milan_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,45.515351,Hotel,Restaurant,Café,Tram Station,Supermarket,Farm,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Yoga Studio
24,45.514629,Hotel,Spa,Restaurant,Café,Fast Food Restaurant,Yoga Studio,Fabric Shop,Food & Drink Shop,Food,Flower Shop
84,45.416915,Mediterranean Restaurant,Hotel,Café,Yoga Studio,Falafel Restaurant,Food Stand,Food Court,Food & Drink Shop,Food,Flower Shop


In [121]:
milan_merged_nonan.loc[milan_merged_nonan['Cluster Labels'] == 1, milan_merged_nonan.columns[[1] + list(range(4, milan_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,45.40055,Bed & Breakfast,Yoga Studio,Fountain,Food Truck,Food Stand,Food Court,Food & Drink Shop,Food,Flower Shop,Flea Market


In [106]:
milan_merged_nonan.loc[milan_merged_nonan['Cluster Labels'] == 2, milan_merged_nonan.columns[[1] + list(range(4, milan_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,45.449961,Theater,Flea Market,Comedy Club,Chinese Restaurant,Performing Arts Venue,Tram Station,Rock Club,Nightclub,Park,Gym
1,45.471271,Italian Restaurant,Pizza Place,Café,Plaza,Seafood Restaurant,Ice Cream Shop,Sushi Restaurant,Bistro,Ramen Restaurant,Tapas Restaurant
2,45.497928,Hotel,Pizza Place,Diner,Bus Stop,Snack Place,Japanese Restaurant,Italian Restaurant,Café,Electronics Store,Bar
3,45.457958,Café,Pizza Place,Metro Station,Japanese Restaurant,Bar,Supermarket,Bus Stop,Arcade,Italian Restaurant,Hockey Arena
5,45.436371,Nightclub,Supermarket,Tram Station,Café,Pizza Place,Gym,Sri Lankan Restaurant,Breakfast Spot,Chinese Restaurant,Gastropub
...,...,...,...,...,...,...,...,...,...,...,...
81,45.451365,Italian Restaurant,Pizza Place,Wine Bar,Bistro,Café,Cocktail Bar,Pub,Bar,Piadineria,Gourmet Shop
82,45.451678,Italian Restaurant,Café,Hotel,Restaurant,Lounge,Ice Cream Shop,Ramen Restaurant,Sushi Restaurant,New American Restaurant,Music Venue
83,45.487221,Pub,Plaza,Pizza Place,Soccer Field,Park,Fabric Shop,Food & Drink Shop,Food,Flower Shop,Flea Market
86,45.466171,Italian Restaurant,Café,Plaza,Ice Cream Shop,Clothing Store,Gym,Historic Site,Hotel,Movie Theater,Sandwich Place


In [122]:
milan_merged_nonan.loc[milan_merged_nonan['Cluster Labels'] == 3, milan_merged_nonan.columns[[1] + list(range(4, milan_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,45.484181,Campground,Yoga Studio,Department Store,Food Truck,Food Stand,Food Court,Food & Drink Shop,Food,Flower Shop,Flea Market


In [123]:
milan_merged_nonan.loc[milan_merged_nonan['Cluster Labels'] == 4, milan_merged_nonan.columns[[1] + list(range(4, milan_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
35,45.427158,Distribution Center,Yoga Studio,Falafel Restaurant,Food Truck,Food Stand,Food Court,Food & Drink Shop,Food,Flower Shop,Flea Market


In [124]:
milan_merged_nonan.loc[milan_merged_nonan['Cluster Labels'] == 5, milan_merged_nonan.columns[[1] + list(range(4, milan_merged_nonan.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
64,45.416813,Restaurant,Yoga Studio,Fabric Shop,Food Stand,Food Court,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish & Chips Shop


In [None]:
#Bari URL: https://raw.githubusercontent.com/blackmad/neighborhoods/master/bari.geojson
#Milan URL: https://raw.githubusercontent.com/blackmad/neighborhoods/master/gn-milan.geojson