Install all the necessary dependencies

In [2]:
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.0.2p             |       h470a237_1         3.1 MB  conda-forge
    geopy-1.17.0               |             py_0          49 KB  conda-forge
    certifi-2018.11.29         |        py36_1000         145 KB  conda-forge
    conda-4.5.11               |        py36_1000         651 KB  conda-forge
    ca-certificates-2018.11.29 |       ha4d7672_0         143 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.1 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.49-py_0            conda-forge
    geopy:           

In [3]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np
import requests
import json
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

Import csv file containing the list of Paris neighborhoods and their coordinates

In [4]:
paris_hoods = pd.read_csv('https://opendata.paris.fr/explore/dataset/quartier_paris/download/?format=csv&timezone=Europe/Berlin&use_labels_for_header=true', sep=';')
paris_hoods.head()

Unnamed: 0,N_SQ_QU,C_QU,C_QUINSEE,L_QU,C_AR,N_SQ_AR,PERIMETRE,SURFACE,Geometry X Y,Geometry,OBJECTID,LONGUEUR
0,750000010,10,7510302,Enfants-Rouges,3,750000003,2139.625388,271750.323937,"48.863887392, 2.36312330099","{""type"": ""Polygon"", ""coordinates"": [[[2.367101...",50,2139.535591
1,750000016,16,7510404,Notre-Dame,4,750000004,3283.163371,378252.153674,"48.8528955862, 2.35277501212","{""type"": ""Polygon"", ""coordinates"": [[[2.361313...",56,3282.999717
2,750000018,18,7510502,Jardin-des-Plantes,5,750000005,4052.729521,798389.398463,"48.8419401934, 2.35689388962","{""type"": ""Polygon"", ""coordinates"": [[[2.364561...",58,4052.473226
3,750000025,25,7510701,Saint-Thomas-d'Aquin,7,750000007,3827.253353,826559.43678,"48.8552632694, 2.32558765258","{""type"": ""Polygon"", ""coordinates"": [[[2.322133...",7,3827.053421
4,750000035,35,7510903,Faubourg-Montmartre,9,750000009,2786.541926,417335.080621,"48.8739346918, 2.34325257947","{""type"": ""Polygon"", ""coordinates"": [[[2.340255...",17,2786.448978


Keep only the relevant columns (the ones containing the names and the coordinates)

In [5]:
paris_hoods = paris_hoods.filter(items=['L_QU', 'Geometry X Y'])
paris_hoods.head()

Unnamed: 0,L_QU,Geometry X Y
0,Enfants-Rouges,"48.863887392, 2.36312330099"
1,Notre-Dame,"48.8528955862, 2.35277501212"
2,Jardin-des-Plantes,"48.8419401934, 2.35689388962"
3,Saint-Thomas-d'Aquin,"48.8552632694, 2.32558765258"
4,Faubourg-Montmartre,"48.8739346918, 2.34325257947"


Rename the column containing the names of neighborhoods, split the coordinates column into two separate columns for latitude and longitude, and filter the dataframe again to have only the columns District, Latitude and Longitude.

In [6]:
paris_hoods.rename(columns={'L_QU':'Neighborhood'}, inplace=True)
paris_hoods['Latitude'], paris_hoods['Longitude'] = paris_hoods['Geometry X Y'].str.split(', ', 1).str
paris_hoods = paris_hoods.filter(items=['Neighborhood', 'Latitude', 'Longitude'])
paris_hoods.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Enfants-Rouges,48.863887392,2.36312330099
1,Notre-Dame,48.8528955862,2.35277501212
2,Jardin-des-Plantes,48.8419401934,2.35689388962
3,Saint-Thomas-d'Aquin,48.8552632694,2.32558765258
4,Faubourg-Montmartre,48.8739346918,2.34325257947


Check that the dataframe has all 80 neighborhoods of Paris.

In [7]:
print('The dataframe has {} neighborhoods.'.format(
        paris_hoods.shape[0]
    )
)

The dataframe has 80 neighborhoods.


Get the latitude and longitude of Paris, France.

In [8]:
address = 'Paris, France'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Paris, France, are {}, {}.'.format(latitude, longitude))



The geograpical coordinates of Paris, France, are 48.8566101, 2.3514992.


Create a map of Paris, France, with the neighborhoods superimposed.

In [9]:
map_paris = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, district in zip(paris_hoods['Latitude'], paris_hoods['Longitude'], paris_hoods['Neighborhood']):
    label = district
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [float(lat), float(lng)],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_paris)  
    
map_paris

Define the Foursquare credentials, in order to use the Foursquare API and explore the Paris neighborhoods

In [10]:
CLIENT_ID = 'BCWF2L2DKA2GIK5Q34XCMQSBGLZRGEGPODRYLDSBRASUXP3I' # your Foursquare ID
CLIENT_SECRET = '2JG5DU425LE24ZYDTJB3CSBUFVEUDAAOTYJE3NFE51DHYEWM' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

Explore the first neighborhood in the list.

In [11]:
district_latitude = paris_hoods.loc[0, 'Latitude'] # neighborhood latitude value
district_longitude = paris_hoods.loc[0, 'Longitude'] # neighborhood longitude value

district_name = paris_hoods.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(district_name, 
                                                               district_latitude, 
                                                               district_longitude))

Latitude and longitude values of Enfants-Rouges are 48.863887392, 2.36312330099.


Define a limit for the number of results returned, and the url for the request to the Foursquare API.

In [12]:
section = 'arts'
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&section={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    district_latitude, 
    district_longitude,
    section)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=BCWF2L2DKA2GIK5Q34XCMQSBGLZRGEGPODRYLDSBRASUXP3I&client_secret=2JG5DU425LE24ZYDTJB3CSBUFVEUDAAOTYJE3NFE51DHYEWM&v=20180605&ll=48.863887392,2.36312330099&section=arts'

Send the request to the Foursquare API.

In [13]:
results = requests.get(url).json()

Define the function that extracts the category for each venue.

In [14]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Transform the results returned by the Foursquare API into a dataframe with columns for name, category, latitude and longitude.

In [15]:
venues = results['response']['groups'][0]['items']
    
local_venues = json_normalize(venues)

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
local_venues =local_venues.loc[:, filtered_columns]

local_venues['venue.categories'] = local_venues.apply(get_category_type, axis=1)

local_venues.columns = [col.split(".")[-1] for col in local_venues.columns]

local_venues

Unnamed: 0,name,categories,lat,lng
0,Slow Galerie,Art Gallery,48.86486,2.366554
1,Galerie Les Filles du Calvaire,Art Gallery,48.86296,2.365809
2,Cirque d'Hiver Bouglione,Circus,48.863251,2.366959
3,Galerie Thaddaeus Ropac,Art Gallery,48.860644,2.363725
4,Galerie Chantal Crousel,Art Gallery,48.861425,2.361085
5,Galerie Emmanuel Perrotin,Art Gallery,48.860538,2.365103
6,Galerie Particulière,Art Gallery,48.861127,2.360891
7,Galerie Karsten Greve,Art Gallery,48.860517,2.363767
8,Espace Marais Marais,Art Gallery,48.862808,2.360992
9,Musée de la Chasse et de la Nature,Museum,48.861507,2.358624


Define function for getting all the most common arts venues for all the neighborhoods of Paris.

In [16]:
def getLocalVenues(names, latitudes, longitudes):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&section={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,
            section)
        
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    local_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    local_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(local_venues)

Call the function above, to get the most common arts venues for each of all neighborhoods of Paris.

In [17]:
paris_venues = getLocalVenues(names=paris_hoods['Neighborhood'],
                                   latitudes=paris_hoods['Latitude'],
                                   longitudes=paris_hoods['Longitude']
                                  )

Enfants-Rouges
Notre-Dame
Jardin-des-Plantes
Saint-Thomas-d'Aquin
Faubourg-Montmartre
Rochechouart
Porte-Saint-Denis
Porte-Saint-Martin
Sainte-Marguerite
Bercy
St-Germain-l'Auxerrois
Combat
Ternes
Epinettes
Javel 15Art
Pont-de-Flandre
Plaisance
Palais-Royal
Maison-Blanche
Parc-de-Montsouris
Notre-Dame-des-Champs
Sainte-Avoie
Saint-Georges
Monnaie
Ecole-Militaire
Picpus
Chaillot
Muette
Mail
Archives
Arsenal
Champs-Elysées
Chaussée-d'Antin
Saint-Vincent-de-Paul
Hôpital-Saint-Louis
Petit-Montrouge
Necker
Charonne
Porte-Dauphine
Plaine de Monceaux
Batignolles
Bonne-Nouvelle
Arts-et-Metiers
Saint-Merri
Saint-Gervais
Saint-Victor
Sorbonne
Odeon
Folie-Méricourt
Bel-Air
Quinze-Vingts
Salpêtrière
Saint-Lambert
Grenelle
Saint-Fargeau
Père-Lachaise
Montparnasse
Grandes-Carrières
La Chapelle
Villette
Amérique
Gare
Roquette
Europe
Belleville
Gaillon
Saint-Ambroise
Croulebarbe
Place-Vendôme
Halles
Gros-Caillou
Madeleine
Val-de-Grace
Vivienne
Invalides
Saint-Germain-des-Prés
Faubourg-du-Roule
Auteuil

Check the size and the first five rows of the new dataframe.

In [18]:
print(paris_venues.shape)
paris_venues.head()

(2098, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Enfants-Rouges,48.863887392,2.36312330099,Slow Galerie,48.86486,2.366554,Art Gallery
1,Enfants-Rouges,48.863887392,2.36312330099,Galerie Les Filles du Calvaire,48.86296,2.365809,Art Gallery
2,Enfants-Rouges,48.863887392,2.36312330099,Cirque d'Hiver Bouglione,48.863251,2.366959,Circus
3,Enfants-Rouges,48.863887392,2.36312330099,Galerie Thaddaeus Ropac,48.860644,2.363725,Art Gallery
4,Enfants-Rouges,48.863887392,2.36312330099,Galerie Chantal Crousel,48.861425,2.361085,Art Gallery


Use one hot encoding to map all arts venues across all neighborhoods of Paris.

In [19]:
# one hot encoding
paris_onehot = pd.get_dummies(paris_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
paris_onehot['Neighborhood'] = paris_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [paris_onehot.columns[-1]] + list(paris_onehot.columns[:-1])
paris_onehot = paris_onehot[fixed_columns]

paris_onehot.head()

Unnamed: 0,Neighborhood,Art Gallery,Art Museum,Arts & Entertainment,Circus,Comedy Club,Concert Hall,Country Dance Club,Dance Studio,Disc Golf,Exhibit,History Museum,Indie Movie Theater,Jazz Club,Laser Tag,Memorial Site,Movie Theater,Multiplex,Museum,Music Venue,Opera House,Outdoor Sculpture,Performing Arts Venue,Piano Bar,Planetarium,Public Art,Racecourse,Rock Club,Rugby Stadium,Science Museum,Street Art,Theater,Tour Provider,Zoo Exhibit
0,Enfants-Rouges,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Enfants-Rouges,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Enfants-Rouges,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Enfants-Rouges,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Enfants-Rouges,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Create a new dataframe grouped by neighborhood and containing the frequencies for each type of arts venue.

In [20]:
paris_grouped = paris_onehot.groupby('Neighborhood').mean().reset_index()
paris_grouped

Unnamed: 0,Neighborhood,Art Gallery,Art Museum,Arts & Entertainment,Circus,Comedy Club,Concert Hall,Country Dance Club,Dance Studio,Disc Golf,Exhibit,History Museum,Indie Movie Theater,Jazz Club,Laser Tag,Memorial Site,Movie Theater,Multiplex,Museum,Music Venue,Opera House,Outdoor Sculpture,Performing Arts Venue,Piano Bar,Planetarium,Public Art,Racecourse,Rock Club,Rugby Stadium,Science Museum,Street Art,Theater,Tour Provider,Zoo Exhibit
0,Amérique,0.0,0.033333,0.0,0.033333,0.0,0.233333,0.0,0.066667,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.133333,0.0,0.033333,0.1,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.033333,0.2,0.0,0.0
1,Archives,0.566667,0.1,0.0,0.033333,0.066667,0.0,0.0,0.033333,0.0,0.0,0.033333,0.033333,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0
2,Arsenal,0.266667,0.066667,0.0,0.0,0.033333,0.0,0.0,0.1,0.0,0.0,0.0,0.033333,0.033333,0.0,0.033333,0.066667,0.033333,0.1,0.066667,0.033333,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333
3,Arts-et-Metiers,0.233333,0.0,0.0,0.0,0.1,0.066667,0.0,0.0,0.0,0.0,0.033333,0.066667,0.0,0.0,0.0,0.033333,0.0,0.1,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0
4,Auteuil,0.227273,0.090909,0.0,0.045455,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.045455,0.136364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.045455,0.045455,0.0,0.0,0.045455,0.090909,0.0
5,Batignolles,0.176471,0.176471,0.0,0.0,0.0,0.117647,0.0,0.058824,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.058824,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.176471,0.0,0.0
6,Bel-Air,0.1,0.0,0.033333,0.033333,0.033333,0.033333,0.0,0.133333,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.2,0.0,0.0,0.066667,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.233333,0.0,0.0
7,Belleville,0.066667,0.066667,0.0,0.0,0.066667,0.1,0.0,0.1,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.066667,0.2,0.0,0.0
8,Bercy,0.2,0.04,0.0,0.0,0.0,0.04,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.12,0.16,0.08,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.08,0.04,0.0
9,Bonne-Nouvelle,0.1,0.0,0.0,0.0,0.133333,0.066667,0.0,0.0,0.0,0.0,0.033333,0.133333,0.033333,0.0,0.0,0.066667,0.033333,0.066667,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.0,0.0


Display the top 5 arts venues for each neighborhood of Paris.

In [21]:
num_top_venues = 5

for hood in paris_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = paris_grouped[paris_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Amérique----
           venue  freq
0   Concert Hall  0.23
1        Theater  0.20
2  Movie Theater  0.13
3    Music Venue  0.10
4   Dance Studio  0.07


----Archives----
                 venue  freq
0          Art Gallery  0.57
1           Art Museum  0.10
2          Comedy Club  0.07
3              Theater  0.07
4  Indie Movie Theater  0.03


----Arsenal----
          venue  freq
0   Art Gallery  0.27
1  Dance Studio  0.10
2        Museum  0.10
3   Music Venue  0.07
4    Art Museum  0.07


----Arts-et-Metiers----
                 venue  freq
0              Theater  0.33
1          Art Gallery  0.23
2          Comedy Club  0.10
3               Museum  0.10
4  Indie Movie Theater  0.07


----Auteuil----
           venue  freq
0    Art Gallery  0.23
1         Museum  0.14
2  Tour Provider  0.09
3     Racecourse  0.09
4     Art Museum  0.09


----Batignolles----
          venue  freq
0   Art Gallery  0.18
1       Theater  0.18
2    Art Museum  0.18
3  Concert Hall  0.12
4    Public Ar

Define function that returns the most common venues.

In [22]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create new dataframe that lists the top 5 most common arts venues for each neighborhood of Paris.

In [23]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = paris_grouped['Neighborhood']

for ind in np.arange(paris_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(paris_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Amérique,Concert Hall,Theater,Movie Theater,Music Venue,Indie Movie Theater
1,Archives,Art Gallery,Art Museum,Theater,Comedy Club,History Museum
2,Arsenal,Art Gallery,Dance Studio,Museum,Art Museum,Movie Theater
3,Arts-et-Metiers,Theater,Art Gallery,Museum,Comedy Club,Indie Movie Theater
4,Auteuil,Art Gallery,Museum,Art Museum,Racecourse,Tour Provider
5,Batignolles,Art Gallery,Theater,Art Museum,Concert Hall,Outdoor Sculpture
6,Bel-Air,Theater,Music Venue,Dance Studio,Art Gallery,Performing Arts Venue
7,Belleville,Theater,Music Venue,Concert Hall,Dance Studio,Rock Club
8,Bercy,Art Gallery,Museum,Dance Studio,Multiplex,Music Venue
9,Bonne-Nouvelle,Theater,Comedy Club,Indie Movie Theater,Art Gallery,Concert Hall


Use k-means to group the neighborhoods into 5 clusters.

In [24]:
kclusters = 5

paris_grouped_clustering = paris_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(paris_grouped_clustering)

kmeans.labels_[0:10] 

array([0, 3, 2, 1, 2, 2, 0, 0, 2, 0], dtype=int32)

Create a new dataframe that also includes the cluster labels in addition to the top 5 most common arts venues for each neighborhood of Paris.

In [25]:
paris_merged = paris_hoods

paris_merged['Cluster Labels'] = kmeans.labels_

paris_merged = paris_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

paris_merged.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Enfants-Rouges,48.863887392,2.36312330099,0,Art Gallery,Theater,Museum,Comedy Club,History Museum
1,Notre-Dame,48.8528955862,2.35277501212,3,Art Gallery,Art Museum,Indie Movie Theater,Comedy Club,History Museum
2,Jardin-des-Plantes,48.8419401934,2.35689388962,2,Museum,Science Museum,Indie Movie Theater,Zoo Exhibit,Art Museum
3,Saint-Thomas-d'Aquin,48.8552632694,2.32558765258,1,Art Gallery,Art Museum,Exhibit,Indie Movie Theater,History Museum
4,Faubourg-Montmartre,48.8739346918,2.34325257947,2,Theater,Dance Studio,Comedy Club,Museum,Indie Movie Theater


Visualize the 5 clusters grouping the neighborhoods of Paris.

In [26]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(paris_merged['Latitude'], paris_merged['Longitude'], paris_merged['Neighborhood'], paris_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [float(lat), float(lon)],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Explore each individual cluster.

Explore cluster 1.

In [27]:
paris_merged.loc[paris_merged['Cluster Labels'] == 0, paris_merged.columns[[0] + list(range(4, paris_merged.shape[1]))]].reset_index(drop=True)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Enfants-Rouges,Art Gallery,Theater,Museum,Comedy Club,History Museum
1,Porte-Saint-Denis,Theater,Comedy Club,Indie Movie Theater,Music Venue,Art Gallery
2,Porte-Saint-Martin,Theater,Art Gallery,Comedy Club,Indie Movie Theater,Music Venue
3,Bercy,Art Gallery,Museum,Dance Studio,Multiplex,Music Venue
4,Ternes,Movie Theater,Multiplex,Circus,Comedy Club,Disc Golf
5,Pont-de-Flandre,Concert Hall,Multiplex,Performing Arts Venue,Movie Theater,Music Venue
6,Sainte-Avoie,Art Gallery,Comedy Club,Art Museum,Museum,Jazz Club
7,Monnaie,Indie Movie Theater,Theater,Art Gallery,Jazz Club,Piano Bar
8,Muette,Museum,Art Museum,Art Gallery,Theater,Circus
9,Charonne,Theater,Dance Studio,Music Venue,Memorial Site,Comedy Club


Explore cluster 2.

In [28]:
paris_merged.loc[paris_merged['Cluster Labels'] == 1, paris_merged.columns[[0] + list(range(4, paris_merged.shape[1]))]].reset_index(drop=True)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Saint-Thomas-d'Aquin,Art Gallery,Art Museum,Exhibit,Indie Movie Theater,History Museum
1,Epinettes,Art Gallery,Theater,Performing Arts Venue,Art Museum,Comedy Club
2,Javel 15Art,Art Gallery,Music Venue,Multiplex,Dance Studio,Outdoor Sculpture
3,Parc-de-Montsouris,Multiplex,Theater,Dance Studio,Outdoor Sculpture,Comedy Club
4,Notre-Dame-des-Champs,Theater,Art Gallery,Indie Movie Theater,Art Museum,Multiplex
5,Ecole-Militaire,Art Gallery,Art Museum,History Museum,Concert Hall,Movie Theater
6,Chaillot,Art Museum,Museum,Movie Theater,Art Gallery,Performing Arts Venue
7,Arsenal,Art Gallery,Dance Studio,Museum,Art Museum,Movie Theater
8,Champs-Elysées,Art Gallery,Theater,Movie Theater,Museum,Performing Arts Venue
9,Petit-Montrouge,Multiplex,Theater,Indie Movie Theater,Jazz Club,Outdoor Sculpture


Explore cluster 3.

In [29]:
paris_merged.loc[paris_merged['Cluster Labels'] == 2, paris_merged.columns[[0] + list(range(4, paris_merged.shape[1]))]].reset_index(drop=True)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Jardin-des-Plantes,Museum,Science Museum,Indie Movie Theater,Zoo Exhibit,Art Museum
1,Faubourg-Montmartre,Theater,Dance Studio,Comedy Club,Museum,Indie Movie Theater
2,Rochechouart,Theater,Art Gallery,Music Venue,Comedy Club,Art Museum
3,Sainte-Marguerite,Theater,Dance Studio,Music Venue,Comedy Club,Art Gallery
4,St-Germain-l'Auxerrois,Exhibit,Art Museum,Theater,Museum,Art Gallery
5,Combat,Music Venue,Theater,Art Gallery,Street Art,Dance Studio
6,Palais-Royal,Theater,Art Museum,Exhibit,Multiplex,Outdoor Sculpture
7,Picpus,Theater,Music Venue,Art Gallery,Dance Studio,Arts & Entertainment
8,Mail,Theater,Comedy Club,Art Gallery,Concert Hall,Dance Studio
9,Archives,Art Gallery,Art Museum,Theater,Comedy Club,History Museum


Explore cluster 4.

In [30]:
paris_merged.loc[paris_merged['Cluster Labels'] == 3, paris_merged.columns[[0] + list(range(4, paris_merged.shape[1]))]].reset_index(drop=True)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Notre-Dame,Art Gallery,Art Museum,Indie Movie Theater,Comedy Club,History Museum
1,Maison-Blanche,Theater,Multiplex,Dance Studio,Music Venue,Comedy Club
2,Saint-Georges,Theater,Comedy Club,Music Venue,Movie Theater,Multiplex
3,Belleville,Theater,Music Venue,Concert Hall,Dance Studio,Rock Club
4,Gaillon,Theater,Art Gallery,Multiplex,Concert Hall,Opera House
5,Croulebarbe,Art Gallery,Indie Movie Theater,Movie Theater,Theater,Dance Studio
6,Place-Vendôme,Theater,Art Gallery,Art Museum,Multiplex,Concert Hall
7,Madeleine,Art Gallery,Theater,Art Museum,Museum,Movie Theater


Explore cluster 5.

In [31]:
paris_merged.loc[paris_merged['Cluster Labels'] == 4, paris_merged.columns[[0] + list(range(4, paris_merged.shape[1]))]].reset_index(drop=True)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Plaisance,Theater,Comedy Club,Jazz Club,Multiplex,Art Museum
1,Plaine de Monceaux,Art Museum,Multiplex,Outdoor Sculpture,Circus,Concert Hall
2,Saint-Merri,Art Gallery,Theater,Jazz Club,Art Museum,Comedy Club
3,Saint-Victor,Museum,Science Museum,Indie Movie Theater,History Museum,Piano Bar
4,Folie-Méricourt,Theater,Music Venue,Art Gallery,Comedy Club,Concert Hall
5,Halles,Art Gallery,Theater,Art Museum,Jazz Club,Concert Hall
6,Invalides,Art Museum,History Museum,Theater,Art Gallery,Music Venue
7,Faubourg-du-Roule,Art Gallery,Movie Theater,Multiplex,Concert Hall,Indie Movie Theater
8,Auteuil,Art Gallery,Museum,Art Museum,Racecourse,Tour Provider
