# Capstone Project

## Neighbourhoods Prices and Venues Analysis in Paris

In [45]:
#Importing the database with prices and coordinates for each borough 

import numpy as np  # useful for many scientific computing in Python
import pandas as pd # primary data structure library
df = pd.read_csv('Arrondissement_Price_Paris.csv')
df.head()

Unnamed: 0,Arrondissement,Price,Latitude,Longitude
0,1,8800,48.8649,2.331
1,2,9750,48.8677,2.3439
2,3,11310,48.8642,2.3591
3,4,11380,48.8542,2.3582
4,5,11890,48.8409,2.3517


In [46]:
# import folium library to edit the map

!conda install -c conda-forge folium=0.5.0 --yes
import folium
print('Folium installed and imported!')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Folium installed and imported!


In [48]:
# Importing datas to delimitate ach borough of Paris

from urllib.request import urlopen
import json
with urlopen('https://www.data.gouv.fr/fr/datasets/r/4765fe48-35fd-4536-b029-4727380ce23c') as response:
    arrondissements = json.load(response)

In [49]:
Paris_latitude = 48.8534
Paris_longitude = 2.3488

# define the world map centered around Paris with a low zoom level
map_Paris = folium.Map(location=[Paris_latitude, Paris_longitude], zoom_start=12)


In [50]:
# generate choropleth map using the total immigration of each country to Paris
map_Paris.choropleth(
    geo_data=arrondissements,
    data=df,
    columns=['Arrondissement', 'Price'],
    key_on='feature.properties.objectid',
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='House Price per m2'
)

# display map
map_Paris

In [7]:
#Importing the database with prices and coordinates for each neighbouhood
df_Borough = pd.read_csv('Arrondissement_Borough_Paris_2.csv')
df_Borough.head()

Unnamed: 0,Arrondissement,Borough,Latitude,Longitude
0,1,Saint-Germain-l'Auxerrois,48.8594,2.3409
1,1,Halles,48.8631,2.3434
2,1,Palais-Royal,48.8636,2.3353
3,1,Place-Vendôme,48.8674,2.3295
4,2,Gaillon,48.8707,2.3323


In [8]:
import folium
# create map of Paris using latitude and longitude values
# add markers to map
for lat, lng, Arrondissement, Borough in zip(df_Borough['Latitude'], df_Borough['Longitude'], df_Borough['Arrondissement'], df_Borough['Borough']):
    label = '{},{}'.format(Arrondissement,Borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Paris)  
    
map_Paris

In [9]:
# Define Foursquare API credentials
import requests
CLIENT_ID = 'WVUAL3VW1MOLE4LWOTBOHT4LYLQPU3XGMPGCXQML0OQQAJHA' # my Foursquare ID
CLIENT_SECRET = 'BSPTJGUFKD5DHMJ0JFCZIH1M2FC0A0N00CZ34VUKEJEG1SVZ' # my Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: WVUAL3VW1MOLE4LWOTBOHT4LYLQPU3XGMPGCXQML0OQQAJHA
CLIENT_SECRET:BSPTJGUFKD5DHMJ0JFCZIH1M2FC0A0N00CZ34VUKEJEG1SVZ


In [10]:
# Send a request to Foursquare API for each neighbourhood
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    LIMIT = 100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Paris_venues = getNearbyVenues(names=df_Borough['Borough'],
                                   latitudes=df_Borough['Latitude'],
                                   longitudes=df_Borough['Longitude']
                                  )

Saint-Germain-l'Auxerrois
Halles
Palais-Royal
Place-Vendôme
Gaillon
Vivienne
Mail
Bonne-Nouvelle
Arts et Métiers
Enfants-Rouges
Archives
Sainte-Avoye
Saint-Merri
Saint Gervais
Arsenal
Notre Dame
Saint Victor
Jardin des Plantes
Val-de-Grâce
Sorbonne
Monnaie
Odéon
Notre-Dame-des-Champs
Saint-Germain-des-Prés
Saint Thomas d'Aquin
Invalides
École Militaire
Gros caillou
Champs-Élysées
Faubourg-du-Roule
Madeleine
Europe
Saint Georges
Chaussée-d'Antin
Faubourg-Montmartre
Rochechouart
Saint-Vincent-de-Paul
Porte-Saint-Denis
Porte-Saint-Martin
Hôpital Saint-Louis
Folie-Méricourt
Saint-Ambroise
Roquette
sainte-marguerite
Bel-Air
Picpus
Bercy
Quinze-Vingts
Salpêtrière
Gare
Maison-Blanche
Croulebarbe
Montparnasse
Parc-de-Montsouris
Petit-Montrouge
Plaisance
Saint-Lambert
Necker
Grenelle
Javel
Auteuil
Muette
Porte-Dauphine
Chaillot
Ternes
Plaine-de-Monceaux
Batignolles
Épinettes
Grandes-Carrières
Clignancourt
Goutte-d'Or
Chapelle
Villette
Pont-de-Flandre
Amérique
Combat
Belleville
Saint-Fargeau
Pèr

In [24]:
Paris_venues.groupby('Borough').count()

Unnamed: 0_level_0,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Amérique,14,14,14,14,14,14
Archives,100,100,100,100,100,100
Arsenal,100,100,100,100,100,100
Arts et Métiers,100,100,100,100,100,100
Auteuil,44,44,44,44,44,44
...,...,...,...,...,...,...
Villette,44,44,44,44,44,44
Vivienne,100,100,100,100,100,100
sainte-marguerite,57,57,57,57,57,57
École Militaire,52,52,52,52,52,52


In [29]:
print('There are {} uniques categories.'.format(len(Paris_venues['Venue Category'].unique())))

There are 296 uniques categories.


In [30]:
# one hot encoding
Paris_onehot = pd.get_dummies(Paris_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Paris_onehot['Borough'] = Paris_venues['Borough'] 

# move neighborhood column to the first column
fixed_columns = [Paris_onehot.columns[-1]] + list(Paris_onehot.columns[:-1])
Paris_onehot = Paris_onehot[fixed_columns]

Paris_onehot.head()

Unnamed: 0,Borough,Accessories Store,Afghan Restaurant,African Restaurant,Alsatian Restaurant,American Restaurant,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,...,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo Exhibit
0,Saint-Germain-l'Auxerrois,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Saint-Germain-l'Auxerrois,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Saint-Germain-l'Auxerrois,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Saint-Germain-l'Auxerrois,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Saint-Germain-l'Auxerrois,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
Paris_grouped = Paris_onehot.groupby('Borough').mean().reset_index()
Paris_grouped

Unnamed: 0,Borough,Accessories Store,Afghan Restaurant,African Restaurant,Alsatian Restaurant,American Restaurant,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,...,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo Exhibit
0,Amérique,0.0,0.0,0.0,0.0,0.000000,0.0,0.00,0.00,0.000000,...,0.00,0.0,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0
1,Archives,0.0,0.0,0.0,0.0,0.000000,0.0,0.00,0.00,0.040000,...,0.00,0.0,0.00,0.0,0.000000,0.020000,0.000000,0.0,0.0,0.0
2,Arsenal,0.0,0.0,0.0,0.0,0.000000,0.0,0.00,0.00,0.010000,...,0.00,0.0,0.02,0.0,0.000000,0.010000,0.010000,0.0,0.0,0.0
3,Arts et Métiers,0.0,0.0,0.0,0.0,0.000000,0.0,0.00,0.01,0.020000,...,0.00,0.0,0.01,0.0,0.030000,0.040000,0.020000,0.0,0.0,0.0
4,Auteuil,0.0,0.0,0.0,0.0,0.000000,0.0,0.00,0.00,0.022727,...,0.00,0.0,0.00,0.0,0.000000,0.000000,0.045455,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,Villette,0.0,0.0,0.0,0.0,0.022727,0.0,0.00,0.00,0.000000,...,0.00,0.0,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0
76,Vivienne,0.0,0.0,0.0,0.0,0.010000,0.0,0.01,0.00,0.000000,...,0.02,0.0,0.00,0.0,0.010000,0.040000,0.010000,0.0,0.0,0.0
77,sainte-marguerite,0.0,0.0,0.0,0.0,0.000000,0.0,0.00,0.00,0.000000,...,0.00,0.0,0.00,0.0,0.035088,0.052632,0.000000,0.0,0.0,0.0
78,École Militaire,0.0,0.0,0.0,0.0,0.000000,0.0,0.00,0.00,0.000000,...,0.00,0.0,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0


In [36]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [37]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Borough'] = Paris_grouped['Borough']

for ind in np.arange(Paris_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Paris_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Amérique,French Restaurant,Supermarket,Pool,Health Food Store,Plaza,Bistro,Bus Stop,Café,Theater,Park
1,Archives,French Restaurant,Clothing Store,Coffee Shop,Art Gallery,Cocktail Bar,Pastry Shop,Burger Joint,Bistro,Tea Room,Sandwich Place
2,Arsenal,French Restaurant,Hotel,Plaza,Bakery,Bar,Italian Restaurant,Tapas Restaurant,Seafood Restaurant,Pizza Place,Cocktail Bar
3,Arts et Métiers,French Restaurant,Hotel,Restaurant,Cocktail Bar,Italian Restaurant,Coffee Shop,Bar,Wine Bar,Vietnamese Restaurant,Moroccan Restaurant
4,Auteuil,French Restaurant,Supermarket,Italian Restaurant,Japanese Restaurant,Wine Shop,Café,Bistro,Bakery,Market,Bike Rental / Bike Share


In [41]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5

Paris_grouped_clustering = Paris_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Paris_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1, 1, 1, 0, 4, 1, 3, 1, 1], dtype=int32)

In [43]:
# add clustering labels
Paris_merged = df_Borough

# merge paris_grouped with paris_data to add latitude/longitude for each neighborhood
Paris_merged = Paris_merged.join(neighborhoods_venues_sorted.set_index('Borough'), on='Borough')

Paris_merged.head(50) 

Unnamed: 0,Arrondissement,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,Saint-Germain-l'Auxerrois,48.8594,2.3409,1,French Restaurant,Café,Plaza,Hotel,Clothing Store,Cosmetics Shop,Bar,Bakery,Pizza Place,Wine Bar
1,1,Halles,48.8631,2.3434,1,French Restaurant,Pizza Place,Bakery,Hotel,Tea Room,Historic Site,Café,Clothing Store,Plaza,Miscellaneous Shop
2,1,Palais-Royal,48.8636,2.3353,1,Japanese Restaurant,Hotel,French Restaurant,Café,Coffee Shop,Plaza,Wine Bar,Udon Restaurant,Art Museum,Ramen Restaurant
3,1,Place-Vendôme,48.8674,2.3295,2,Hotel,French Restaurant,Boutique,Japanese Restaurant,Hotel Bar,Plaza,Bookstore,Cocktail Bar,Italian Restaurant,Pastry Shop
4,2,Gaillon,48.8707,2.3323,1,Hotel,French Restaurant,Japanese Restaurant,Coffee Shop,Italian Restaurant,Cocktail Bar,Plaza,Creperie,Salad Place,Pastry Shop
5,2,Vivienne,48.8688,2.3386,1,French Restaurant,Japanese Restaurant,Bistro,Wine Bar,Italian Restaurant,Korean Restaurant,Ramen Restaurant,Café,Bookstore,Bakery
6,2,Mail,48.8689,2.3447,1,Cocktail Bar,French Restaurant,Hotel,Italian Restaurant,Wine Bar,Thai Restaurant,Bistro,Coffee Shop,Bakery,Women's Store
7,2,Bonne-Nouvelle,48.8675,2.3491,1,Hotel,Cocktail Bar,French Restaurant,Italian Restaurant,Bakery,Wine Bar,Bar,Pizza Place,Coffee Shop,Pastry Shop
8,3,Arts et Métiers,48.8667,2.3569,1,French Restaurant,Hotel,Restaurant,Cocktail Bar,Italian Restaurant,Coffee Shop,Bar,Wine Bar,Vietnamese Restaurant,Moroccan Restaurant
9,3,Enfants-Rouges,48.8642,2.3632,1,Café,Art Gallery,Wine Bar,Coffee Shop,Bistro,Italian Restaurant,Hotel,French Restaurant,Creperie,Cocktail Bar


In [44]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Paris_merged['Latitude'], Paris_merged['Longitude'], Paris_merged['Borough'], Paris_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_Paris)
       
map_Paris