# The Battle of Neighborhoods - Applied Data Science Capstone 

## Project: 

First step: Install the python packages and libraries

In [2]:
#Install packages if needed

#pip install folium
#pip install geopy
#pip install geopandas
#pip install geojson
#pip install lxml
#pip install bs4

from urllib.request import urlopen

import pandas as pd
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt

import folium # map rendering library
from folium import plugins
from folium.plugins import HeatMap

import requests
from bs4 import BeautifulSoup
import numpy as np # library to handle data in a vectorized manner
import json # library to handle JSON files
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geopandas as gpd



print('Libraries imported.')

Libraries imported.


#### Scraping the dataset from the Web Page of Lisbon's City Council

In [3]:
list_url = "https://geodados-cml.hub.arcgis.com/datasets/miradouros/data"
source = requests.get(list_url).text
soup = BeautifulSoup(source, 'xml')
table=soup.find('table')

In [4]:
#Create dataframe
column_names = ['OBJECTID','COD_SIG','IDTIPO','CAT_NOME','CAT_ID','INF_ID','INF_NOME','INF_MORADA','INF_TELEFONE','INF_FAX','INF_EMAIL','INF_SITE','INF_DESCRICAO','INF_AUTOR_DESCRICAO','INF_FONTE','INF_OBS','INF_ACTIVO','INF_MUNICIPAL','GlobalID']
df = pd.DataFrame(columns = column_names)

In [7]:
for tr_cell in table.find_all('tr'):
    row_data=[]
    for td_cell in tr_cell.find_all('td'):
        row_data.append(td_cell.text.strip())
    if len(row_data)==3:
        df.loc[len(df)] = row_data
df

Unnamed: 0,OBJECTID,COD_SIG,IDTIPO,CAT_NOME,CAT_ID,INF_ID,INF_NOME,INF_MORADA,INF_TELEFONE,INF_FAX,INF_EMAIL,INF_SITE,INF_DESCRICAO,INF_AUTOR_DESCRICAO,INF_FONTE,INF_OBS,INF_ACTIVO,INF_MUNICIPAL,GlobalID
0,1,805531053001,2,Espaços Verdes e Lazer - Miradouros,211819,12514,Miradouro do Panorâmico de Monsanto,Estrada Bela Vista,,,,,"Localizado no Parque Florestal de Monsanto, na...",,DMEVAE,,1,0,6eed37fd-2b1c-4d74-bf46-651b8b94737d
1,2,202512431001,2,Espaços Verdes e Lazer - Miradouros,211819,12522,Experiência Pilar 7,"Avenida da Índia, Ponte 25 de Abril - Pilar 7",+351 211117880,,info@lismarketing.pt,www.visitlisboa.com,,,www.visitlisboa.com,,1,0,783577af-9715-49a8-a797-b6cef6a60d69
2,3,16,7,Espaços Verdes e Lazer - Miradouros,211819,3232,Miradouro de Santo Estêvão,Largo de Santo Estêvão,+351 213 912 600,,daev@cm-lisboa.pt,,"Situado no coração de Alfama, do adro da Igrej...",,-,SG/DMC,1,0,51166c6a-a6bf-4d6a-9fdf-b2490dc737b6
3,4,41,7,Espaços Verdes e Lazer - Miradouros,211819,1948,Miradouro do Castelo de São Jorge,Castelo de São Jorge,+351 218 800 620,,info@castelodesaojorge.pt,www.castelodesaojorge.pt,O Castelo de São Jorge oferece uma vista privi...,Isabel Santos,-,,1,0,8ced2062-5a41-479a-927f-d17cadd6352a
4,5,20,7,Espaços Verdes e Lazer - Miradouros,211819,1958,Miradouro Sophia de Mello Breyner Andresen (Mi...,Largo da Graça,(351) 213 912 600,,daev@cm-lisboa.pt,,Localizado junto à Igreja e antigo Convento da...,Oásis Alfacinhas: Guia Ambiental de Lisboa - I...,-,,1,1,fd96eabc-2570-4094-831c-3c54e5a4133d
5,6,23,7,Espaços Verdes e Lazer - Miradouros,211819,1960,Miradouro de Santa Clara,Campo de Santa Clara,+351 218 533 270,,daev@cm-lisboa.pt,,É um ponto alto da cidade com vista panorâmica...,,-,SG/DMC,1,0,8cf5bdec-709c-45c9-9c61-4be75aeb0146
6,7,32,7,Espaços Verdes e Lazer - Miradouros,211819,3233,Miradouro da Basílica da Estrela,Praça da Estrela\n1200-667 LISBOA,+351 213 960 915,,,,Do zimbório desfruta-se de bela vista: a oeste...,,-,SG-DMC,1,0,48fbfa88-0ee2-449a-a6fa-98bf29696738
7,8,4,7,Espaços Verdes e Lazer - Miradouros,211819,1965,Miradouro do Torel,Rua Júlio de Andradel,,,,,Deste miradouro disfruta-se de uma vista desaf...,,-,SG/DMC,1,0,1cd6490b-e68e-4a34-9a27-c654ef3124e8
8,9,21,7,Espaços Verdes e Lazer - Miradouros,211819,1959,Miradouro das Portas do Sol,Largo das Portas do Sol\n,+351 213 912 600,,daev@cm-lisboa.pt,,"Localizado no Largo das Portas do Sol, daí o s...","http://revelarlx.cm-lisboa.pt, www.guiadacidad...",-,,1,1,229ec52c-ec31-4bf4-a471-b4db75912df9
9,10,11,7,Espaços Verdes e Lazer - Miradouros,211819,1953,Miradouro do Padrão dos Descobrimentos,Avenida de Brasília\n,+351 213 031 950,,info@padraodosdescobrimentos.pt,www.padraodosdescobrimentos.pt,"No terraço existe um espetacular miradouro, do...",,-,,1,1,ce95a050-404d-4678-8cae-fdff207bc983


In [8]:
df.shape

(20, 19)

In [9]:
Lisbon_viewpoints_geodf = gpd.read_file('https://opendata.arcgis.com/datasets/02763aa9ec6b4d719841f713e682b12a_1.geojson')
df = Lisbon_viewpoints_geodf

df

Unnamed: 0,OBJECTID,COD_SIG,IDTIPO,CAT_NOME,CAT_ID,INF_ID,INF_NOME,INF_MORADA,INF_TELEFONE,INF_FAX,INF_EMAIL,INF_SITE,INF_DESCRICAO,INF_AUTOR_DESCRICAO,INF_FONTE,INF_OBS,INF_ACTIVO,INF_MUNICIPAL,GlobalID,geometry
0,1,805531053001,2,Espaços Verdes e Lazer - Miradouros,211819,12514,Miradouro do Panorâmico de Monsanto,Estrada Bela Vista,,,,,"Localizado no Parque Florestal de Monsanto, na...",,DMEVAE,,1,0,6eed37fd-2b1c-4d74-bf46-651b8b94737d,POINT (-9.18461 38.72862)
1,2,202512431001,2,Espaços Verdes e Lazer - Miradouros,211819,12522,Experiência Pilar 7,"Avenida da Índia, Ponte 25 de Abril - Pilar 7",+351 211117880,,info@lismarketing.pt,www.visitlisboa.com,,,www.visitlisboa.com,,1,0,783577af-9715-49a8-a797-b6cef6a60d69,POINT (-9.17955 38.70001)
2,3,16,7,Espaços Verdes e Lazer - Miradouros,211819,3232,Miradouro de Santo Estêvão,Largo de Santo Estêvão,+351 213 912 600,,daev@cm-lisboa.pt,,"Situado no coração de Alfama, do adro da Igrej...",,-,SG/DMC,1,0,51166c6a-a6bf-4d6a-9fdf-b2490dc737b6,POINT (-9.12773 38.71207)
3,4,41,7,Espaços Verdes e Lazer - Miradouros,211819,1948,Miradouro do Castelo de São Jorge,Castelo de São Jorge,+351 218 800 620,,info@castelodesaojorge.pt,www.castelodesaojorge.pt,O Castelo de São Jorge oferece uma vista privi...,Isabel Santos,-,,1,0,8ced2062-5a41-479a-927f-d17cadd6352a,POINT (-9.13421 38.71299)
4,5,20,7,Espaços Verdes e Lazer - Miradouros,211819,1958,Miradouro Sophia de Mello Breyner Andresen (Mi...,Largo da Graça,(351) 213 912 600,,daev@cm-lisboa.pt,,Localizado junto à Igreja e antigo Convento da...,Oásis Alfacinhas: Guia Ambiental de Lisboa - I...,-,,1,1,fd96eabc-2570-4094-831c-3c54e5a4133d,POINT (-9.13158 38.71643)
5,6,23,7,Espaços Verdes e Lazer - Miradouros,211819,1960,Miradouro de Santa Clara,Campo de Santa Clara,+351 218 533 270,,daev@cm-lisboa.pt,,É um ponto alto da cidade com vista panorâmica...,,-,SG/DMC,1,0,8cf5bdec-709c-45c9-9c61-4be75aeb0146,POINT (-9.12469 38.71497)
6,7,32,7,Espaços Verdes e Lazer - Miradouros,211819,3233,Miradouro da Basílica da Estrela,Praça da Estrela\n1200-667 LISBOA,+351 213 960 915,,,,Do zimbório desfruta-se de bela vista: a oeste...,,-,SG-DMC,1,0,48fbfa88-0ee2-449a-a6fa-98bf29696738,POINT (-9.16058 38.71310)
7,8,4,7,Espaços Verdes e Lazer - Miradouros,211819,1965,Miradouro do Torel,Rua Júlio de Andradel,,,,,Deste miradouro disfruta-se de uma vista desaf...,,-,SG/DMC,1,0,1cd6490b-e68e-4a34-9a27-c654ef3124e8,POINT (-9.14129 38.71932)
8,9,21,7,Espaços Verdes e Lazer - Miradouros,211819,1959,Miradouro das Portas do Sol,Largo das Portas do Sol\n,+351 213 912 600,,daev@cm-lisboa.pt,,"Localizado no Largo das Portas do Sol, daí o s...","http://revelarlx.cm-lisboa.pt, www.guiadacidad...",-,,1,1,229ec52c-ec31-4bf4-a471-b4db75912df9,POINT (-9.13037 38.71235)
9,10,11,7,Espaços Verdes e Lazer - Miradouros,211819,1953,Miradouro do Padrão dos Descobrimentos,Avenida de Brasília\n,+351 213 031 950,,info@padraodosdescobrimentos.pt,www.padraodosdescobrimentos.pt,"No terraço existe um espetacular miradouro, do...",,-,,1,1,ce95a050-404d-4678-8cae-fdff207bc983,POINT (-9.20570 38.69361)


In [10]:
df.shape

(20, 20)

The dataset contains a lot of information for each viewpoint of Lisbon. For this problem, the information needed is just the name (INF_NOME), adress (INF_MORADA) and spatial coordinates (geometry), so let's create a dataframe only with this information. 

In [11]:
df_lisbonviews = df [["INF_NOME", "INF_MORADA", "geometry"]]
df_lisbonviews.columns = ['Viewpoint', 'Adress', 'Coordinates']

In [12]:
from shapely.wkt import loads as load_wkt

centroid_list = []

for polygon in df_lisbonviews["Coordinates"]:
    box_str = str(polygon)
    p1 = load_wkt(box_str)
    point = p1.centroid
#     print(type(p1.centroid.x))
#     print(p1.centroid.y)
    centroid_list.append((p1.centroid.y, p1.centroid.x))

lat_centr, lon_centr = zip(*centroid_list)

df_lisbonviews['Latitude'] = lat_centr
df_lisbonviews['Longitude'] = lon_centr

df_lisbonviews.drop(["Coordinates"], axis = 1, inplace = True) 
df_lisbonviews


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,Viewpoint,Adress,Latitude,Longitude
0,Miradouro do Panorâmico de Monsanto,Estrada Bela Vista,38.728618,-9.184607
1,Experiência Pilar 7,"Avenida da Índia, Ponte 25 de Abril - Pilar 7",38.700008,-9.179553
2,Miradouro de Santo Estêvão,Largo de Santo Estêvão,38.71207,-9.127729
3,Miradouro do Castelo de São Jorge,Castelo de São Jorge,38.712991,-9.13421
4,Miradouro Sophia de Mello Breyner Andresen (Mi...,Largo da Graça,38.716427,-9.131584
5,Miradouro de Santa Clara,Campo de Santa Clara,38.71497,-9.124694
6,Miradouro da Basílica da Estrela,Praça da Estrela\n1200-667 LISBOA,38.713097,-9.160581
7,Miradouro do Torel,Rua Júlio de Andradel,38.719323,-9.141286
8,Miradouro das Portas do Sol,Largo das Portas do Sol\n,38.712346,-9.130366
9,Miradouro do Padrão dos Descobrimentos,Avenida de Brasília\n,38.693614,-9.205697


In [13]:
address = 'Lisbon'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Lisbon are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Lisbon are 38.7077507, -9.1365919.


In [14]:
map_lisbonviewpoints = folium.Map(location=[latitude, longitude], zoom_start=15)

# add markers to map
for lat, lng, viewpoint in zip(df_lisbonviews['Latitude'], 
                                          df_lisbonviews['Longitude'],  
                                           df_lisbonviews['Viewpoint']):
    label = '{}'.format(viewpoint)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=10,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=1.0,
        parse_html=False).add_to(map_lisbonviewpoints)  

map_lisbonviewpoints    

In [15]:
map_lisbonviewpoints2 = folium.Map([latitude, longitude], zoom_start=15)

folium.GeoJson(Lisbon_viewpoints_geodf,
    style_function=lambda x: {
        'color' : 'darkred',
        'opacity': 1.0,
        'fillColor' : 'darkred',
        }).add_to(map_lisbonviewpoints2)

map_lisbonviewpoints2

Now it's time to use the foursquare API to examinate the city of Milan

In [16]:
CLIENT_ID = 'GM0MXCVLIDQ2RTLGBJJ0NJBXFYZFYOLBNCNOS102GWZYTXPF' # your Foursquare ID
CLIENT_SECRET = 'IOQ4NZ15N2RBYYRU2Y2CYUNQ0NZWGYWRWYCO3ZDU2XXL1PC5' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

Extract venues for each neighborhood in Milan

In [17]:
def getNearbyRestaurants(names, latitudes, longitudes, radius):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&section=food&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        resp = requests.get(url).json()["response"]
        
        if "groups" in resp:
            results = resp['groups'][0]['items']
            # return only relevant information for each nearby venue
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Viewpoint', 
                  'Viewpoint Latitude', 
                  'Viewpoint Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
lisbon_viewpoints_venues = getNearbyRestaurants(names=df_lisbonviews['Viewpoint'],
                                         latitudes=df_lisbonviews['Latitude'],
                                         longitudes=df_lisbonviews['Longitude'],
                                         radius=1000)

In [19]:
print(lisbon_viewpoints_venues.shape)
lisbon_viewpoints_venues.head(20)

(1733, 7)


Unnamed: 0,Viewpoint,Viewpoint Latitude,Viewpoint Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Miradouro do Panorâmico de Monsanto,38.728618,-9.184607,Messe Monsanto,38.729066,-9.18907,Portuguese Restaurant
1,Miradouro do Panorâmico de Monsanto,38.728618,-9.184607,Monte Verde,38.726703,-9.194175,Portuguese Restaurant
2,Miradouro do Panorâmico de Monsanto,38.728618,-9.184607,Restaurante Monteverde,38.72504,-9.19078,Portuguese Restaurant
3,Miradouro do Panorâmico de Monsanto,38.728618,-9.184607,Pastelaria Snack Bar Creme e Canela,38.724917,-9.177665,Breakfast Spot
4,Miradouro do Panorâmico de Monsanto,38.728618,-9.184607,Papagaio da Serafina,38.733658,-9.17786,Portuguese Restaurant
5,Experiência Pilar 7,38.700008,-9.179553,Village Underground,38.700861,-9.17815,Café
6,Experiência Pilar 7,38.700008,-9.179553,Rio Maravilha,38.701798,-9.178076,Portuguese Restaurant
7,Experiência Pilar 7,38.700008,-9.179553,5 Oceanos,38.699654,-9.178412,Seafood Restaurant
8,Experiência Pilar 7,38.700008,-9.179553,A Padaria Portuguesa,38.702512,-9.180872,Bakery
9,Experiência Pilar 7,38.700008,-9.179553,1300 Taberna,38.702888,-9.178968,Restaurant


In [20]:
venues_unique = lisbon_viewpoints_venues.groupby(['Viewpoint',
                                         'Venue',         
                                         'Venue Latitude', 
                                         'Venue Longitude', 
                                         'Venue Category']).size().reset_index(name='Counts') 
venues_unique.head(10)

Unnamed: 0,Viewpoint,Venue,Venue Latitude,Venue Longitude,Venue Category,Counts
0,Experiência Pilar 7,1300 Taberna,38.702888,-9.178968,Restaurant,1
1,Experiência Pilar 7,5 Oceanos,38.699654,-9.178412,Seafood Restaurant,1
2,Experiência Pilar 7,A Padaria Portuguesa,38.702512,-9.180872,Bakery,1
3,Experiência Pilar 7,A Praça,38.70304,-9.178872,Italian Restaurant,1
4,Experiência Pilar 7,A Tapadinha,38.706576,-9.17873,Eastern European Restaurant,1
5,Experiência Pilar 7,Alcântara 50,38.705462,-9.173533,Portuguese Restaurant,1
6,Experiência Pilar 7,Café Dias,38.702917,-9.184385,Café,1
7,Experiência Pilar 7,Café na Fábrica,38.703206,-9.179032,Breakfast Spot,1
8,Experiência Pilar 7,Campinas,38.700569,-9.182483,Restaurant,1
9,Experiência Pilar 7,Cantina LX Factory,38.703228,-9.178896,Portuguese Restaurant,1


Now let's see all the points in a map

In [21]:
venues_map = folium.Map([latitude, longitude], zoom_start=12)

for label, lat, lng in zip(venues_unique['Venue'], 
                           venues_unique['Venue Latitude'], 
                           venues_unique['Venue Longitude']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        location=[lat, lng],
        radius=2,
        popup=label,
        color='red',
        fill=False,
        #fill_color='red',
        fill_opacity=0.6,
        parse_html=False
    ).add_to(map_lisbonviewpoints2)
   
from IPython.display import display
display(map_lisbonviewpoints2)

With a group by "Neighborhood", I can easly see how many places I have

In [22]:
resDF = venues_unique.groupby('Viewpoint').size().reset_index(name='Counts') 
resDF

Unnamed: 0,Viewpoint,Counts
0,Experiência Pilar 7,44
1,Miradouro Sophia de Mello Breyner Andresen (Mi...,100
2,Miradouro da Basílica da Estrela,100
3,Miradouro da Penha de França,100
4,Miradouro da Rocha de Conde de Óbidos,88
5,Miradouro da Senhora do Monte,100
6,Miradouro da Torre de Belém,39
7,Miradouro das Portas do Sol,100
8,Miradouro de Santa Catarina,100
9,Miradouro de Santa Clara,100


In [23]:
venueDF = venues_unique.groupby('Venue Category').size().reset_index(name='Counts')
venueDF.sort_values(by=['Counts'], ascending=False).head(10)

Unnamed: 0,Venue Category,Counts
46,Portuguese Restaurant,502
48,Restaurant,177
12,Café,175
34,Indian Restaurant,84
7,Bakery,75
38,Mediterranean Restaurant,64
65,Vegetarian / Vegan Restaurant,46
52,Seafood Restaurant,46
35,Italian Restaurant,41
44,Pizza Place,35


In [24]:
lisbonviews_onehot = pd.get_dummies(venues_unique['Venue Category'])
lisbonviews_onehot.insert(loc=0, column='Viewpoint', value=venues_unique['Viewpoint'])
lisbonviews_grouped = lisbonviews_onehot.groupby('Viewpoint').mean().reset_index()
lisbonviews_grouped.head(10)

Unnamed: 0,Viewpoint,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bistro,Brazilian Restaurant,Breakfast Spot,Burger Joint,Café,Cajun / Creole Restaurant,Chinese Restaurant,Comfort Food Restaurant,Creperie,Deli / Bodega,Diner,Doner Restaurant,Eastern European Restaurant,Empanada Restaurant,Falafel Restaurant,Fast Food Restaurant,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Gastropub,Greek Restaurant,Hawaiian Restaurant,Himalayan Restaurant,Hot Dog Joint,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Pastelaria,Peruvian Restaurant,Pizza Place,Poke Place,Portuguese Restaurant,Ramen Restaurant,Restaurant,Russian Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Snack Place,Soup Place,South American Restaurant,South Indian Restaurant,Spanish Restaurant,Steakhouse,Sushi Restaurant,Swiss Restaurant,Taco Place,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
0,Experiência Pilar 7,0.0,0.0,0.022727,0.0,0.0,0.045455,0.0,0.068182,0.0,0.0,0.022727,0.0,0.090909,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.045455,0.0,0.0,0.113636,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.159091,0.0,0.227273,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.022727,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Miradouro Sophia de Mello Breyner Andresen (Mi...,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.05,0.02,0.01,0.02,0.0,0.13,0.0,0.03,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.34,0.01,0.06,0.0,0.01,0.01,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0
2,Miradouro da Basílica da Estrela,0.01,0.0,0.0,0.0,0.01,0.02,0.01,0.04,0.01,0.0,0.04,0.02,0.08,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.05,0.05,0.03,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.26,0.01,0.07,0.0,0.0,0.0,0.05,0.01,0.0,0.01,0.0,0.0,0.04,0.01,0.01,0.0,0.03,0.0,0.0,0.05,0.0,0.0
3,Miradouro da Penha de França,0.0,0.0,0.0,0.02,0.0,0.03,0.01,0.07,0.0,0.01,0.01,0.01,0.16,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.11,0.05,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.0,0.21,0.01,0.09,0.0,0.0,0.01,0.03,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.01
4,Miradouro da Rocha de Conde de Óbidos,0.0,0.011364,0.0,0.0,0.0,0.022727,0.0,0.034091,0.0,0.0,0.022727,0.011364,0.056818,0.0,0.0,0.0,0.0,0.011364,0.011364,0.0,0.0,0.0,0.011364,0.011364,0.0,0.0,0.0,0.011364,0.0,0.0,0.022727,0.0,0.011364,0.0,0.022727,0.045455,0.022727,0.0,0.056818,0.022727,0.0,0.0,0.0,0.0,0.011364,0.0,0.238636,0.0,0.136364,0.0,0.0,0.0,0.045455,0.011364,0.022727,0.011364,0.0,0.0,0.022727,0.011364,0.0,0.0,0.034091,0.0,0.011364,0.022727,0.0,0.0
5,Miradouro da Senhora do Monte,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.05,0.02,0.01,0.03,0.0,0.14,0.0,0.03,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.31,0.01,0.06,0.0,0.01,0.01,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0
6,Miradouro da Torre de Belém,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.051282,0.179487,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.025641,0.0,0.0,0.0,0.025641,0.025641,0.0,0.051282,0.0,0.025641,0.0,0.0,0.0,0.025641,0.0,0.025641,0.0,0.230769,0.0,0.102564,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Miradouro das Portas do Sol,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.03,0.01,0.01,0.01,0.11,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.4,0.01,0.07,0.0,0.01,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0
8,Miradouro de Santa Catarina,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.04,0.02,0.02,0.02,0.03,0.06,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.04,0.0,0.0,0.02,0.01,0.01,0.01,0.0,0.03,0.01,0.0,0.27,0.0,0.11,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.06,0.0,0.0,0.05,0.0,0.0
9,Miradouro de Santa Clara,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.05,0.04,0.0,0.03,0.01,0.13,0.0,0.03,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.35,0.0,0.04,0.0,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.04,0.01,0.0


This function return the most common venues for each row

In [25]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

extract the first ten venues for every neighborhood

In [26]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Viewpoint']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
lisbonviewpoint_venues_sorted = pd.DataFrame(columns=columns)
lisbonviewpoint_venues_sorted['Viewpoint'] = lisbonviews_grouped['Viewpoint']

for ind in np.arange(lisbonviews_grouped.shape[0]):
    lisbonviewpoint_venues_sorted.iloc[ind, 1:] = return_most_common_venues(lisbonviews_grouped.iloc[ind, :], num_top_venues)

lisbonviewpoint_venues_sorted.head(10)

Unnamed: 0,Viewpoint,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Experiência Pilar 7,Restaurant,Portuguese Restaurant,Mediterranean Restaurant,Café,Bakery,Seafood Restaurant,Italian Restaurant,Pizza Place,BBQ Joint,Sushi Restaurant
1,Miradouro Sophia de Mello Breyner Andresen (Mi...,Portuguese Restaurant,Café,Indian Restaurant,Restaurant,Mediterranean Restaurant,Bakery,Chinese Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Breakfast Spot
2,Miradouro da Basílica da Estrela,Portuguese Restaurant,Café,Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Indian Restaurant,Italian Restaurant,Breakfast Spot,Bakery,Steakhouse
3,Miradouro da Penha de França,Portuguese Restaurant,Café,Indian Restaurant,Restaurant,Bakery,Italian Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Pizza Place,BBQ Joint
4,Miradouro da Rocha de Conde de Óbidos,Portuguese Restaurant,Restaurant,Café,Mediterranean Restaurant,Italian Restaurant,Seafood Restaurant,Tapas Restaurant,Bakery,Breakfast Spot,Mexican Restaurant
5,Miradouro da Senhora do Monte,Portuguese Restaurant,Café,Indian Restaurant,Restaurant,Bakery,Vegetarian / Vegan Restaurant,Seafood Restaurant,Breakfast Spot,Mediterranean Restaurant,Chinese Restaurant
6,Miradouro da Torre de Belém,Portuguese Restaurant,Café,Restaurant,Food Truck,Japanese Restaurant,Sushi Restaurant,Bakery,Burger Joint,Seafood Restaurant,Indian Restaurant
7,Miradouro das Portas do Sol,Portuguese Restaurant,Café,Restaurant,Indian Restaurant,Mediterranean Restaurant,Vegetarian / Vegan Restaurant,Bistro,African Restaurant,Snack Place,Pizza Place
8,Miradouro de Santa Catarina,Portuguese Restaurant,Restaurant,Tapas Restaurant,Café,Vegetarian / Vegan Restaurant,Italian Restaurant,Bakery,Seafood Restaurant,Peruvian Restaurant,Gastropub
9,Miradouro de Santa Clara,Portuguese Restaurant,Café,Mediterranean Restaurant,Bakery,Bistro,Vegetarian / Vegan Restaurant,Restaurant,Indian Restaurant,Chinese Restaurant,Breakfast Spot


### Clustering

The first step is identify the best “K” using a famous analytical approach: the elbow method

In [27]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

lisbonviews_clustering = lisbonviews_grouped.drop('Viewpoint', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(lisbonviews_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 1, 3, 4, 3, 1, 4, 1, 3, 1], dtype=int32)

In [28]:
# cluster and top 10 venues for each neighborhood

# add clustering labels
lisbonviewpoint_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

lisbonviewpoint_merged = df_lisbonviews

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
lisbonviewpoint_merged = lisbonviewpoint_merged.join(lisbonviewpoint_venues_sorted.set_index('Viewpoint'), on='Viewpoint')

lisbonviewpoint_venues_sorted.head()

Unnamed: 0,Cluster Labels,Viewpoint,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,Experiência Pilar 7,Restaurant,Portuguese Restaurant,Mediterranean Restaurant,Café,Bakery,Seafood Restaurant,Italian Restaurant,Pizza Place,BBQ Joint,Sushi Restaurant
1,1,Miradouro Sophia de Mello Breyner Andresen (Mi...,Portuguese Restaurant,Café,Indian Restaurant,Restaurant,Mediterranean Restaurant,Bakery,Chinese Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Breakfast Spot
2,3,Miradouro da Basílica da Estrela,Portuguese Restaurant,Café,Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Indian Restaurant,Italian Restaurant,Breakfast Spot,Bakery,Steakhouse
3,4,Miradouro da Penha de França,Portuguese Restaurant,Café,Indian Restaurant,Restaurant,Bakery,Italian Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Pizza Place,BBQ Joint
4,3,Miradouro da Rocha de Conde de Óbidos,Portuguese Restaurant,Restaurant,Café,Mediterranean Restaurant,Italian Restaurant,Seafood Restaurant,Tapas Restaurant,Bakery,Breakfast Spot,Mexican Restaurant


In [29]:
lisbonviewpoint_merged.head()

Unnamed: 0,Viewpoint,Adress,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Miradouro do Panorâmico de Monsanto,Estrada Bela Vista,38.728618,-9.184607,2,Portuguese Restaurant,Breakfast Spot,Wings Joint,Fast Food Restaurant,Doner Restaurant,Eastern European Restaurant,Empanada Restaurant,Falafel Restaurant,Food Court,Deli / Bodega
1,Experiência Pilar 7,"Avenida da Índia, Ponte 25 de Abril - Pilar 7",38.700008,-9.179553,0,Restaurant,Portuguese Restaurant,Mediterranean Restaurant,Café,Bakery,Seafood Restaurant,Italian Restaurant,Pizza Place,BBQ Joint,Sushi Restaurant
2,Miradouro de Santo Estêvão,Largo de Santo Estêvão,38.71207,-9.127729,1,Portuguese Restaurant,Café,Mediterranean Restaurant,Restaurant,Indian Restaurant,Bakery,Creperie,Bistro,Breakfast Spot,Snack Place
3,Miradouro do Castelo de São Jorge,Castelo de São Jorge,38.712991,-9.13421,1,Portuguese Restaurant,Restaurant,Café,Indian Restaurant,Mediterranean Restaurant,Bistro,Chinese Restaurant,Pizza Place,Burger Joint,Seafood Restaurant
4,Miradouro Sophia de Mello Breyner Andresen (Mi...,Largo da Graça,38.716427,-9.131584,1,Portuguese Restaurant,Café,Indian Restaurant,Restaurant,Mediterranean Restaurant,Bakery,Chinese Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Breakfast Spot


Before to start to analyze all the clusters, let’s take a look on a folium map

In [30]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(lisbonviewpoint_merged['Latitude'], lisbonviewpoint_merged['Longitude'], lisbonviewpoint_merged['Viewpoint'], lisbonviewpoint_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=10,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Examine Clusters

In [31]:
#Cluster 1
lisbonviewpoint_merged.loc[lisbonviewpoint_merged['Cluster Labels'] == 0, lisbonviewpoint_merged.columns[[0] + list(range(5, lisbonviewpoint_merged.shape[1]))]]

Unnamed: 0,Viewpoint,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Experiência Pilar 7,Restaurant,Portuguese Restaurant,Mediterranean Restaurant,Café,Bakery,Seafood Restaurant,Italian Restaurant,Pizza Place,BBQ Joint,Sushi Restaurant
13,Miradouro de Santo Amaro,Restaurant,Portuguese Restaurant,Café,Mediterranean Restaurant,Italian Restaurant,Seafood Restaurant,Bakery,BBQ Joint,Pizza Place,Burger Joint
14,Miradouro do Parque Eduardo VII,Portuguese Restaurant,Restaurant,Italian Restaurant,Café,Japanese Restaurant,Bakery,Burger Joint,Steakhouse,Sushi Restaurant,Snack Place


In [32]:
#Cluster 2
lisbonviewpoint_merged.loc[lisbonviewpoint_merged['Cluster Labels'] == 1, lisbonviewpoint_merged.columns[[0] + list(range(5, lisbonviewpoint_merged.shape[1]))]]

Unnamed: 0,Viewpoint,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Miradouro de Santo Estêvão,Portuguese Restaurant,Café,Mediterranean Restaurant,Restaurant,Indian Restaurant,Bakery,Creperie,Bistro,Breakfast Spot,Snack Place
3,Miradouro do Castelo de São Jorge,Portuguese Restaurant,Restaurant,Café,Indian Restaurant,Mediterranean Restaurant,Bistro,Chinese Restaurant,Pizza Place,Burger Joint,Seafood Restaurant
4,Miradouro Sophia de Mello Breyner Andresen (Mi...,Portuguese Restaurant,Café,Indian Restaurant,Restaurant,Mediterranean Restaurant,Bakery,Chinese Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Breakfast Spot
5,Miradouro de Santa Clara,Portuguese Restaurant,Café,Mediterranean Restaurant,Bakery,Bistro,Vegetarian / Vegan Restaurant,Restaurant,Indian Restaurant,Chinese Restaurant,Breakfast Spot
8,Miradouro das Portas do Sol,Portuguese Restaurant,Café,Restaurant,Indian Restaurant,Mediterranean Restaurant,Vegetarian / Vegan Restaurant,Bistro,African Restaurant,Snack Place,Pizza Place
12,Miradouro de Santa Luzia,Portuguese Restaurant,Café,Restaurant,Indian Restaurant,Mediterranean Restaurant,Bistro,Burger Joint,Pizza Place,Snack Place,African Restaurant
19,Miradouro da Senhora do Monte,Portuguese Restaurant,Café,Indian Restaurant,Restaurant,Bakery,Vegetarian / Vegan Restaurant,Seafood Restaurant,Breakfast Spot,Mediterranean Restaurant,Chinese Restaurant


In [33]:
#Cluster 3
lisbonviewpoint_merged.loc[lisbonviewpoint_merged['Cluster Labels'] == 2, lisbonviewpoint_merged.columns[[0] + list(range(5, lisbonviewpoint_merged.shape[1]))]]

Unnamed: 0,Viewpoint,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Miradouro do Panorâmico de Monsanto,Portuguese Restaurant,Breakfast Spot,Wings Joint,Fast Food Restaurant,Doner Restaurant,Eastern European Restaurant,Empanada Restaurant,Falafel Restaurant,Food Court,Deli / Bodega


In [34]:
#Cluster 4
lisbonviewpoint_merged.loc[lisbonviewpoint_merged['Cluster Labels'] == 3, lisbonviewpoint_merged.columns[[0] + list(range(5, lisbonviewpoint_merged.shape[1]))]]

Unnamed: 0,Viewpoint,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Miradouro da Basílica da Estrela,Portuguese Restaurant,Café,Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Indian Restaurant,Italian Restaurant,Breakfast Spot,Bakery,Steakhouse
7,Miradouro do Torel,Portuguese Restaurant,Restaurant,Café,Vegetarian / Vegan Restaurant,Indian Restaurant,Tapas Restaurant,Breakfast Spot,Pizza Place,Gastropub,Bakery
9,Miradouro do Padrão dos Descobrimentos,Portuguese Restaurant,Restaurant,Café,Food Truck,Mediterranean Restaurant,Sandwich Place,Bakery,BBQ Joint,Sushi Restaurant,Japanese Restaurant
10,Miradouro da Rocha de Conde de Óbidos,Portuguese Restaurant,Restaurant,Café,Mediterranean Restaurant,Italian Restaurant,Seafood Restaurant,Tapas Restaurant,Bakery,Breakfast Spot,Mexican Restaurant
15,Miradouro de São Pedro de Alcântara,Portuguese Restaurant,Restaurant,Café,Indian Restaurant,Tapas Restaurant,Burger Joint,Seafood Restaurant,Peruvian Restaurant,Pizza Place,Russian Restaurant
18,Miradouro de Santa Catarina,Portuguese Restaurant,Restaurant,Tapas Restaurant,Café,Vegetarian / Vegan Restaurant,Italian Restaurant,Bakery,Seafood Restaurant,Peruvian Restaurant,Gastropub


In [35]:
#Cluster 5 
lisbonviewpoint_merged.loc[lisbonviewpoint_merged['Cluster Labels'] == 4, lisbonviewpoint_merged.columns[[0] + list(range(5, lisbonviewpoint_merged.shape[1]))]]

Unnamed: 0,Viewpoint,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Miradouro da Penha de França,Portuguese Restaurant,Café,Indian Restaurant,Restaurant,Bakery,Italian Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Pizza Place,BBQ Joint
16,Miradouro do Monte Agudo,Portuguese Restaurant,Café,Bakery,Indian Restaurant,Restaurant,BBQ Joint,Seafood Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,Pizza Place
17,Miradouro da Torre de Belém,Portuguese Restaurant,Café,Restaurant,Food Truck,Japanese Restaurant,Sushi Restaurant,Bakery,Burger Joint,Seafood Restaurant,Indian Restaurant
