# The Battle of the Neighborhoods

For my capstone project, I want to explore the arrondissements and famous landmarks and Foursquare venues of Paris in order to create a travel itinerary which takes into account travel time, must-see attractions, and local hot spots.

#### First, I will install the necessary packages for our analysis

In [2]:
import requests
from bs4 import BeautifulSoup
import csv
import json
import xml
import pandas as pd
import numpy as np

# !conda install -c conda-forge folium=0.5.0 --yes
import folium
from folium import plugins

import json
from pprint import pprint

# !conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe


#### Get the HTML of the entire Wikipedia page of the 20 Arrondissements of Paris as a string

In [3]:
url = requests.get('https://en.wikipedia.org/wiki/Arrondissements_of_Paris#Arrondissements').text

#### Apply Beautiful Soup to make Soup of the URL, then apply Prettify to view the nested tags and look for the tag that contains the table with arrondissement information

In [4]:
soup = BeautifulSoup(url, 'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Arrondissements of Paris - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Arrondissements_of_Paris","wgTitle":"Arrondissements of Paris","wgCurRevisionId":915289231,"wgRevisionId":915289231,"wgArticleId":1175882,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["CS1 French-language sources (fr)","Articles with short description","Articles with hAudio microformats","Use dmy dates from March 2016","Arrondissements of Paris","Government of Paris"],"wgBreakFrames":!1,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","

In [5]:
arrond_table = soup.find("table", class_ = 'wikitable sortable')
arrond_table_rows = arrond_table.find_all('tr')


#### Extract the columns Arrondissement, Name, Area, Population, Density

In [6]:
information = []
for row in arrond_table_rows:
    info = row.text.split('\n')[1:-1]
    information.append(info)

del information[0][1::2] # There are extra column headers "" that we don't need, so delete them    

information

[['Arrondissement (R for Right Bank, L for Left Bank)',
  'Name',
  'Area (km2)',
  'Population(March 1999 census)',
  'Population(July 2005 estimate)',
  'Density (2005)(inhabitants per km2)',
  'Peak of population',
  'Mayor'],
 ['1st (Ie) R',
  'Louvre',
  '1.826\xa0km2 (0.705\xa0sq\xa0mi)',
  '16,888',
  '17,700',
  '9,693',
  'before 1861',
  'Jean-François Legaret (LR)'],
 ['2nd (IIe) R',
  'Bourse',
  '0.992\xa0km2 (0.383\xa0sq\xa0mi)',
  '19,585',
  '20,700',
  '20,867',
  'before 1861',
  'Jacques Boutault (EELV)'],
 ['3rd (IIIe) R',
  'Temple',
  '1.171\xa0km2 (0.452\xa0sq\xa0mi)',
  '34,248',
  '35,100',
  '29,974',
  'before 1861',
  'Pierre Aidenbaum (PS)'],
 ['4th (IVe) R',
  'Hôtel-de-Ville',
  '1.601\xa0km2 (0.618\xa0sq\xa0mi)',
  '30,675',
  '28,600',
  '17,864',
  'before 1861',
  'Ariel Weil (PS)'],
 ['5th (Ve) L',
  'Panthéon',
  '2.541\xa0km2 (0.981\xa0sq\xa0mi)',
  '58,849',
  '60,600',
  '23,849',
  '1911',
  'Florence Berthout (LR)'],
 ['6th (VIe) L',
  'Luxembo

#### Turn above information into a Pandas dataframe

In [7]:
arrond_df = pd.DataFrame(information[1:], columns=information[0])
# where information[1:] contains each row of neighborhoods
# and columns = information[0] gives the column names

arrond_df.head(25)

Unnamed: 0,"Arrondissement (R for Right Bank, L for Left Bank)",Name,Area (km2),Population(March 1999 census),Population(July 2005 estimate),Density (2005)(inhabitants per km2),Peak of population,Mayor
0,1st (Ie) R,Louvre,1.826 km2 (0.705 sq mi),16888,17700,9693,before 1861,Jean-François Legaret (LR)
1,2nd (IIe) R,Bourse,0.992 km2 (0.383 sq mi),19585,20700,20867,before 1861,Jacques Boutault (EELV)
2,3rd (IIIe) R,Temple,1.171 km2 (0.452 sq mi),34248,35100,29974,before 1861,Pierre Aidenbaum (PS)
3,4th (IVe) R,Hôtel-de-Ville,1.601 km2 (0.618 sq mi),30675,28600,17864,before 1861,Ariel Weil (PS)
4,5th (Ve) L,Panthéon,2.541 km2 (0.981 sq mi),58849,60600,23849,1911,Florence Berthout (LR)
5,6th (VIe) L,Luxembourg,2.154 km2 (0.832 sq mi),44919,45200,20984,1911,Jean-Pierre Lecoq (LR)
6,7th (VIIe) L,Palais-Bourbon,4.088 km2 (1.578 sq mi),56985,55400,13552,1926,Rachida Dati (LR)
7,8th (VIIIe) R,Élysée,3.881 km2 (1.498 sq mi),39314,38700,9972,1891,Jeanne d'Hauteserre (LR)
8,9th (IXe) R,Opéra,2.179 km2 (0.841 sq mi),55838,58500,26847,1901,Delphine Bürkli (LR)
9,10th (Xe) R,Entrepôt,2.892 km2 (1.117 sq mi),89612,88800,30705,1881,Alexandra Cordebard (PS)


In [8]:
# Add a column with just the Arrondissement numbers

arrond_number = ['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th','10th',
                '11th', '12th', '13th', '14th', '15th', '16th', '17th', '18th', '19th', '20th']

Arr = pd.DataFrame({'Arrondissement': arrond_number})

arrond_df = pd.concat([arrond_df, Arr], axis = 1)

arrond_df.head()

Unnamed: 0,"Arrondissement (R for Right Bank, L for Left Bank)",Name,Area (km2),Population(March 1999 census),Population(July 2005 estimate),Density (2005)(inhabitants per km2),Peak of population,Mayor,Arrondissement
0,1st (Ie) R,Louvre,1.826 km2 (0.705 sq mi),16888,17700,9693,before 1861,Jean-François Legaret (LR),1st
1,2nd (IIe) R,Bourse,0.992 km2 (0.383 sq mi),19585,20700,20867,before 1861,Jacques Boutault (EELV),2nd
2,3rd (IIIe) R,Temple,1.171 km2 (0.452 sq mi),34248,35100,29974,before 1861,Pierre Aidenbaum (PS),3rd
3,4th (IVe) R,Hôtel-de-Ville,1.601 km2 (0.618 sq mi),30675,28600,17864,before 1861,Ariel Weil (PS),4th
4,5th (Ve) L,Panthéon,2.541 km2 (0.981 sq mi),58849,60600,23849,1911,Florence Berthout (LR),5th


In [9]:
arrond_df.dtypes

Arrondissement (R for Right Bank, L for Left Bank)    object
Name                                                  object
Area (km2)                                            object
Population(March 1999 census)                         object
Population(July 2005 estimate)                        object
Density (2005)(inhabitants per km2)                   object
Peak of population                                    object
Mayor                                                 object
Arrondissement                                        object
dtype: object

#### Here's a dataframe of the top Paris architectural and historical landmarks I've gathered manually from the Architecture Digest website.

In [10]:
landmarks = ['Cathédrale Notre-Dame de Paris', 'Arc de Triomphe', 'Centre Pompidou',
             'Sacré-Cœur Basilica', 'Eiffel Tower',
             'The Louvre', 'La Grande Arche', 'Les Invalides', 'Fondation Louis Vuitton',
             'Panthéon', 'Picasso Museum', 'Cinémathèque Française',
             'Opéra Garnier', 'The Grand Palais', 'Hôtel de Ville',
             'Fondation Cartier', 'Jeu de Paume', 'Jardin des Plantes',
             'Fondation Jerôme Seydoux-Pathé', 'Île de la Cité',
             'The Palais de Tokyo', 'Philharmonie de Paris', 'Musée d’Orsay',
             'Palais du Luxembourg', 'Montmartre', 'Champs-Élysées',
             'Palace of Versailles', 'Tuileries Garden', 'Moulin Rouge',
             'Disneyland Paris', 'Place de la Concorde', 'Catacombs of Paris',
             'Rodin Museum', 'Île de la Cité', 'Orangerie Museum', 'Pont des Arts',
             'Île Saint-Louis', 'Place Vendôme', 'Saint-Jacques Tower', 'La Conciergerie',
            ]

landmarks_df = pd.DataFrame({'Landmark': landmarks})


#### I will find each landmark's latitude and longitude information using the Nominatim package. 

In [11]:
def landmark_loc(list_of_landmarks):
    for place in list_of_landmarks:
        address = (place)
        geolocator = Nominatim(user_agent="Paris_explorer")
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('{''}, {}, {},'.format(place,latitude,longitude))

landmark_loc(landmarks)

Cathédrale Notre-Dame de Paris, 48.85293705, 2.3500501225,
Arc de Triomphe, 48.8737791, 2.29503722603767,
Centre Pompidou, 48.8605918, 2.35247431766534,
Sacré-Cœur Basilica, 48.88680575, 2.34301534488351,
Eiffel Tower, 48.8582602, 2.29449905431968,
The Louvre, 48.8611473, 2.33802768704666,
La Grande Arche, 43.1844505, 5.5748974,
Les Invalides, 48.8570374, 2.3118779,
Fondation Louis Vuitton, 48.87665005, 2.26333809580246,
Panthéon, 48.8454189, 2.3525815263157,
Picasso Museum, 41.3851066, 2.18120508674285,
Cinémathèque Française, 48.8369987, 2.38263520273537,
Opéra Garnier, 48.87203085, 2.3317901855896,
The Grand Palais, 48.86616135, 2.31222295966943,
Hôtel de Ville, 43.94928305, 4.80525979935093,
Fondation Cartier, 48.8373738, 2.33191092011202,
Jeu de Paume, 48.8658156, 2.32408776132074,
Jardin des Plantes, 48.8432224, 2.35950895709484,
Fondation Jerôme Seydoux-Pathé, 48.8334765, 2.35490681339522,
Île de la Cité, 48.85470505, 2.3474846899114,
The Palais de Tokyo, 48.8639905, 2.296521513

#### I will turn the above information into a dataframe (need to find a way to do this more efficiently)

In [12]:
landmarks_loc = ['Cathédrale Notre-Dame de Paris', 48.85293695, 2.35005149954546,
'Arc de Triomphe', 48.8737791, 2.29503722603767,
'Centre Pompidou', 48.8605918, 2.35247431766534,
'Sacré-Cœur Basilica', 48.88680575, 2.34301534488351,
'Eiffel Tower', 48.8582602, 2.29449905431968,
'The Louvre', 48.8611473, 2.33802768704666,
'La Grande Arche', 48.8930151, 2.2376206,
'Les Invalides', 48.8570374, 2.3118779,
'Fondation Louis Vuitton', 48.87665005, 2.26333809580246,
'Panthéon', 48.8454189, 2.35258442491778,
'Picasso Museum', 41.3851066, 2.18120508674285,
'Cinémathèque Française', 48.8369987, 2.38263520273537,
'Opéra Garnier', 48.87203085, 2.3317901855896,
'The Grand Palais', 48.86616135, 2.31222295966943,
'Hôtel de Ville', 43.9492974, 4.80526012041732,
'Fondation Cartier', 48.8373182, 2.33193184219906,
'Jeu de Paume', 48.8658156, 2.32408776132074,
'Jardin des Plantes', 48.8432224, 2.35950895709484,
'Fondation Jerôme Seydoux-Pathé', 48.8334765, 2.35490681339522,
'Île de la Cité', 48.85470505, 2.3474846899114,
'The Palais de Tokyo', 48.8639905, 2.29652151313114,
'Philharmonie de Paris', 48.89192125, 2.39413864295307,
'Musée d’Orsay', 48.85991785, 2.32658283469656,
'Palais du Luxembourg', 48.84852845, 2.33683647732996,
'Montmartre', 46.8021347, -0.7198939,
'Champs-Élysées', 48.8707573, 2.3053312,
'Palace of Versailles', 48.80442525, 2.1202852709926,
'Tuileries Garden', 48.86366215, 2.32683861305756,
'Moulin Rouge', 48.8840787, 2.3324082,
'Disneyland Paris', 48.8711359, 2.77612651196751,
'Place de la Concorde', 29.9002052, -90.0095146,
'Catacombs of Paris', 48.833964, 2.3324924,
'Rodin Museum', 39.9619295, -75.1739508,
'Île de la Cité', 48.85470505, 2.3474846899114,
'Orangerie Museum', 48.8637655, 2.32266020780693,
'Pont des Arts', 48.8578114, 2.3372459,
'Île Saint-Louis', 48.85161305, 2.35673414953666,
'Place Vendôme', 48.8674634, 2.32942811682519,
'Saint-Jacques Tower', 48.8580032, 2.34892171290132,
'La Conciergerie', 48.8559492, 2.3460263]

landmarks_df = pd.DataFrame(np.array(landmarks_loc).reshape(40, 3), columns = ["Landmark","Latitude","Longitude"])

# Change the latitude and longitude to a float
landmarks_df['Latitude'] = landmarks_df.Latitude.astype(float)
landmarks_df['Longitude'] = landmarks_df.Longitude.astype(float)


#### Get latitude/longitude for each arrondissement

In [13]:
# turn numbered arrondissements into a list
arrond_list = arrond_df['Arrondissement'].unique()
arrond_list

def landmark_loc(list_of_arronds):
    for place in list_of_arronds:
        address = (place + ' Arrondissement')
        geolocator = Nominatim(user_agent="arrond_explorer")
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('{}, {}, {},'.format(place,latitude,longitude))

landmark_loc(arrond_list)

1st, 48.8646144, 2.334396,
2nd, 48.868743, 2.341688,
3rd, 48.864212, 2.360936,
4th, 48.8562021, 2.3556193,
5th, 48.8460591, 2.3445228,
6th, 48.8504333, 2.3329507,
7th, 48.8570281, 2.3201953,
8th, 48.8773158, 2.3172611,
9th, 48.876019, 2.339962,
10th, 48.876106, 2.35991,
11th, 48.858416, 2.379703,
12th, 48.8396154, 2.3957517,
13th, 48.8323973, 2.3555829,
14th, 48.8330589, 2.3269591,
15th, 48.8413705, 2.3003827,
16th, 48.8631709, 2.2757648,
17th, 48.8842239, 2.3223639,
18th, 48.8900117, 2.3464668,
19th, 48.889343, 2.38436,
20th, 48.865042, 2.398929,


In [14]:
# turn numbered arrondissements into a list
arrond_list = arrond_df['Arrondissement'].unique()
arrond_list

def landmark_loc(list_of_arronds):
    LL = []
    for place in list_of_arronds:
        address = (place + ' Arrondissement')
        geolocator = Nominatim(user_agent="arrond_explorer")
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        LL.append(place)
        LL.append(latitude)
        LL.append(longitude)
    print(LL)


LL = landmark_loc(arrond_list)

['1st', 48.8646144, 2.334396, '2nd', 48.868743, 2.341688, '3rd', 48.864212, 2.360936, '4th', 48.8562021, 2.3556193, '5th', 48.8460591, 2.3445228, '6th', 48.8504333, 2.3329507, '7th', 48.8570281, 2.3201953, '8th', 48.8773158, 2.3172611, '9th', 48.876019, 2.339962, '10th', 48.876106, 2.35991, '11th', 48.858416, 2.379703, '12th', 48.8396154, 2.3957517, '13th', 48.8323973, 2.3555829, '14th', 48.8330589, 2.3269591, '15th', 48.8413705, 2.3003827, '16th', 48.8631709, 2.2757648, '17th', 48.8842239, 2.3223639, '18th', 48.8900117, 2.3464668, '19th', 48.889343, 2.38436, '20th', 48.865042, 2.398929]


In [15]:
# not sure why the LL above couldn't be reshaped, so copied them below to insert into a dataframe converter

LL = ['1st', 48.8646144, 2.334396, '2nd', 48.868743, 2.341688, '3rd', 48.864212, 2.360936, 
      '4th', 48.8562021, 2.3556193, '5th', 48.8460591, 2.3445228, '6th', 48.8504333, 2.3329507, 
      '7th', 48.8570281, 2.3201953, '8th', 48.8773158, 2.3172611, '9th', 48.876019, 2.339962, 
      '10th', 48.876106, 2.35991, '11th', 48.858416, 2.379703, '12th', 48.8396154, 2.3957517, 
      '13th', 48.8323973, 2.3555829, '14th', 48.8330589, 2.3269591, '15th', 48.8413705, 2.3003827, 
      '16th', 48.8631709, 2.2757648, '17th', 48.8842239, 2.3223639, '18th', 48.8900117, 2.3464668, 
      '19th', 48.889343, 2.38436, '20th', 48.865042, 2.398929]


In [16]:
arrond_LL = pd.DataFrame(np.array(LL).reshape(20,3), columns = ["Arrondissement","Latitude","Longitude"])

# Change the latitude and longitude to a float
arrond_LL['Latitude'] = arrond_LL.Latitude.astype(float)
arrond_LL['Longitude'] = arrond_LL.Longitude.astype(float)

arrond_LL

Unnamed: 0,Arrondissement,Latitude,Longitude
0,1st,48.864614,2.334396
1,2nd,48.868743,2.341688
2,3rd,48.864212,2.360936
3,4th,48.856202,2.355619
4,5th,48.846059,2.344523
5,6th,48.850433,2.332951
6,7th,48.857028,2.320195
7,8th,48.877316,2.317261
8,9th,48.876019,2.339962
9,10th,48.876106,2.35991


#### Next, I will upload the GEO JSON file of Paris and attempt to create a choroplath map

In [17]:
paris = {'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.350948, 48.863407],
       [2.350155, 48.86199],
       [2.34459, 48.854052],
       [2.339003, 48.857565],
       [2.339953, 48.858226],
       [2.332909, 48.85935],
       [2.320902, 48.863057],
       [2.325926, 48.869581],
       [2.327989, 48.869929],
       [2.330662, 48.868191],
       [2.350948, 48.863407]]]]},
   'properties': {'cartodb_id': 54,
    'insee': 75101,
    'nom': '1er Arrondissement',
    'wikipedia': 'fr:1er arrondissement de Paris',
    'surf_m2': 1814728}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.327989, 48.869929],
       [2.340045, 48.871981],
       [2.347892, 48.870675],
       [2.354261, 48.86933],
       [2.350948, 48.863407],
       [2.330662, 48.868191],
       [2.327989, 48.869929]]]]},
   'properties': {'cartodb_id': 7,
    'insee': 75102,
    'nom': '2e Arrondissement',
    'wikipedia': 'fr:2e arrondissement de Paris',
    'surf_m2': 991370}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.350155, 48.86199],
       [2.350948, 48.863407],
       [2.354261, 48.86933],
       [2.363818, 48.867505],
       [2.366734, 48.863101],
       [2.368513, 48.855733],
       [2.36176, 48.857248],
       [2.356905, 48.860064],
       [2.350155, 48.86199]]]]},
   'properties': {'cartodb_id': 90,
    'insee': 75103,
    'nom': '3e Arrondissement',
    'wikipedia': 'fr:3e arrondissement de Paris',
    'surf_m2': 1172150}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.368513, 48.855733],
       [2.369137, 48.853162],
       [2.366504, 48.847371],
       [2.36443, 48.84614],
       [2.35923, 48.849456],
       [2.34459, 48.854052],
       [2.350155, 48.86199],
       [2.356905, 48.860064],
       [2.36176, 48.857248],
       [2.368513, 48.855733]]]]},
   'properties': {'cartodb_id': 73,
    'insee': 75104,
    'nom': '4e Arrondissement',
    'wikipedia': 'fr:4e arrondissement de Paris',
    'surf_m2': 1600062}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.36443, 48.84614],
       [2.366029, 48.84493],
       [2.361817, 48.839962],
       [2.351836, 48.836786],
       [2.342072, 48.838364],
       [2.336728, 48.839655],
       [2.34459, 48.854052],
       [2.35923, 48.849456],
       [2.36443, 48.84614]]]]},
   'properties': {'cartodb_id': 56,
    'insee': 75105,
    'nom': '5e Arrondissement',
    'wikipedia': 'fr:5e arrondissement de Paris',
    'surf_m2': 2539915}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.332909, 48.85935],
       [2.339953, 48.858226],
       [2.339003, 48.857565],
       [2.34459, 48.854052],
       [2.336728, 48.839655],
       [2.324692, 48.843607],
       [2.316573, 48.846824],
       [2.328421, 48.851827],
       [2.333267, 48.858266],
       [2.332909, 48.85935]]]]},
   'properties': {'cartodb_id': 10,
    'insee': 75106,
    'nom': '6e Arrondissement',
    'wikipedia': 'fr:6e arrondissement de Paris',
    'surf_m2': 2164575}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.320902, 48.863057],
       [2.332909, 48.85935],
       [2.333267, 48.858266],
       [2.328421, 48.851827],
       [2.316573, 48.846824],
       [2.313728, 48.845934],
       [2.310526, 48.847987],
       [2.307339, 48.847139],
       [2.289824, 48.858229],
       [2.295157, 48.861992],
       [2.301591, 48.863474],
       [2.318543, 48.863778],
       [2.320902, 48.863057]]]]},
   'properties': {'cartodb_id': 57,
    'insee': 75107,
    'nom': '7e Arrondissement',
    'wikipedia': 'fr:7e arrondissement de Paris',
    'surf_m2': 4090694}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.327182, 48.883481],
       [2.325926, 48.869581],
       [2.320902, 48.863057],
       [2.318543, 48.863778],
       [2.301591, 48.863474],
       [2.29504, 48.873779],
       [2.298157, 48.87808],
       [2.327182, 48.883481]]]]},
   'properties': {'cartodb_id': 36527,
    'insee': 75108,
    'nom': '8e Arrondissement',
    'wikipedia': 'fr:8e arrondissement de Paris',
    'surf_m2': 3881428}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.325926, 48.869581],
       [2.327182, 48.883481],
       [2.327428, 48.883522],
       [2.329443, 48.884554],
       [2.339816, 48.882025],
       [2.349504, 48.883725],
       [2.347892, 48.870675],
       [2.340045, 48.871981],
       [2.327989, 48.869929],
       [2.325926, 48.869581]]]]},
   'properties': {'cartodb_id': 36529,
    'insee': 75109,
    'nom': '9e Arrondissement',
    'wikipedia': 'fr:9e arrondissement de Paris',
    'surf_m2': 2178559}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.363818, 48.867505],
       [2.354261, 48.86933],
       [2.347892, 48.870675],
       [2.349504, 48.883725],
       [2.364686, 48.884368],
       [2.368465, 48.884068],
       [2.370194, 48.882718],
       [2.370278, 48.877992],
       [2.37699, 48.872062],
       [2.363818, 48.867505]]]]},
   'properties': {'cartodb_id': 71,
    'insee': 75110,
    'nom': '10e Arrondissement',
    'wikipedia': 'fr:10e arrondissement de Paris',
    'surf_m2': 2892691}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.363818, 48.867505],
       [2.37699, 48.872062],
       [2.3873, 48.863085],
       [2.389583, 48.858532],
       [2.394444, 48.85657],
       [2.398433, 48.851345],
       [2.399118, 48.848098],
       [2.378827, 48.850629],
       [2.369137, 48.853162],
       [2.368513, 48.855733],
       [2.366734, 48.863101],
       [2.363818, 48.867505]]]]},
   'properties': {'cartodb_id': 72,
    'insee': 75111,
    'nom': '11e Arrondissement',
    'wikipedia': 'fr:11e arrondissement de Paris',
    'surf_m2': 3666492}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.399118, 48.848098],
       [2.415993, 48.846614],
       [2.411227, 48.833867],
       [2.416111, 48.833761],
       [2.422176, 48.835814],
       [2.419424, 48.842487],
       [2.422106, 48.844498],
       [2.42476, 48.84177],
       [2.427563, 48.841509],
       [2.437192, 48.840891],
       [2.437941, 48.844569],
       [2.440512, 48.844346],
       [2.440766, 48.845916],
       [2.446526, 48.84575],
       [2.447732, 48.844802],
       [2.46174, 48.842694],
       [2.467232, 48.839094],
       [2.469703, 48.835556],
       [2.46523, 48.831151],
       [2.46455, 48.827941],
       [2.466178, 48.827333],
       [2.465761, 48.826282],
       [2.462803, 48.819028],
       [2.461308, 48.818298],
       [2.458633, 48.817012],
       [2.43735, 48.818219],
       [2.436968, 48.819363],
       [2.43418, 48.819281],
       [2.430239, 48.823403],
       [2.419946, 48.824152],
       [2.410244, 48.825192],
       [2.402488, 48.829647],
       [2.390256, 48.825726],
       [2.366029, 48.84493],
       [2.36443, 48.84614],
       [2.366504, 48.847371],
       [2.369137, 48.853162],
       [2.378827, 48.850629],
       [2.399118, 48.848098]]]]},
   'properties': {'cartodb_id': 5,
    'insee': 75112,
    'nom': '12e Arrondissement',
    'wikipedia': 'fr:12e arrondissement de Paris',
    'surf_m2': 16319745}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.366029, 48.84493],
       [2.390256, 48.825726],
       [2.364276, 48.816334],
       [2.356613, 48.815981],
       [2.352399, 48.818534],
       [2.34692, 48.815865],
       [2.343974, 48.815766],
       [2.344629, 48.81954],
       [2.341436, 48.823668],
       [2.342072, 48.838364],
       [2.351836, 48.836786],
       [2.361817, 48.839962],
       [2.366029, 48.84493]]]]},
   'properties': {'cartodb_id': 6,
    'insee': 75113,
    'nom': '13e Arrondissement',
    'wikipedia': 'fr:13e arrondissement de Paris',
    'surf_m2': 7144131}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.336728, 48.839655],
       [2.342072, 48.838364],
       [2.341436, 48.823668],
       [2.344629, 48.81954],
       [2.343974, 48.815766],
       [2.331733, 48.817011],
       [2.332371, 48.818213],
       [2.314133, 48.822263],
       [2.301318, 48.825125],
       [2.319948, 48.84052],
       [2.321391, 48.83979],
       [2.324692, 48.843607],
       [2.336728, 48.839655]]]]},
   'properties': {'cartodb_id': 30,
    'insee': 75114,
    'nom': '14e Arrondissement',
    'wikipedia': 'fr:14e arrondissement de Paris',
    'surf_m2': 5605448}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.316573, 48.846824],
       [2.324692, 48.843607],
       [2.321391, 48.83979],
       [2.319948, 48.84052],
       [2.301318, 48.825125],
       [2.289388, 48.828324],
       [2.279023, 48.832459],
       [2.272713, 48.827947],
       [2.267806, 48.82785],
       [2.2673, 48.831559],
       [2.27003, 48.833008],
       [2.267948, 48.834576],
       [2.26296, 48.833899],
       [2.275715, 48.846963],
       [2.289824, 48.858229],
       [2.307339, 48.847139],
       [2.310526, 48.847987],
       [2.313728, 48.845934],
       [2.316573, 48.846824]]]]},
   'properties': {'cartodb_id': 21,
    'insee': 75115,
    'nom': '15e Arrondissement',
    'wikipedia': 'fr:15e arrondissement de Paris',
    'surf_m2': 8510577}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.29504, 48.873779],
       [2.301591, 48.863474],
       [2.295157, 48.861992],
       [2.289824, 48.858229],
       [2.275715, 48.846963],
       [2.26296, 48.833899],
       [2.262776, 48.833918],
       [2.255154, 48.834805],
       [2.251649, 48.838906],
       [2.251219, 48.842891],
       [2.252538, 48.84558],
       [2.242437, 48.847734],
       [2.239324, 48.850107],
       [2.224169, 48.853442],
       [2.225689, 48.859437],
       [2.228244, 48.865145],
       [2.23212, 48.869553],
       [2.240463, 48.871888],
       [2.245623, 48.876364],
       [2.254815, 48.874081],
       [2.258999, 48.880267],
       [2.27749, 48.877963],
       [2.279801, 48.878653],
       [2.29504, 48.873779]]]]},
   'properties': {'cartodb_id': 29,
    'insee': 75116,
    'nom': '16e Arrondissement',
    'wikipedia': 'fr:16e arrondissement de Paris',
    'surf_m2': 16371053}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.327182, 48.883481],
       [2.298157, 48.87808],
       [2.29504, 48.873779],
       [2.279801, 48.878653],
       [2.280906, 48.882878],
       [2.284459, 48.885638],
       [2.291504, 48.889459],
       [2.295047, 48.889869],
       [2.303793, 48.894171],
       [2.31989, 48.900459],
       [2.330183, 48.901028],
       [2.32559, 48.887498],
       [2.327428, 48.883522],
       [2.327182, 48.883481]]]]},
   'properties': {'cartodb_id': 36526,
    'insee': 75117,
    'nom': '17e Arrondissement',
    'wikipedia': 'fr:17e arrondissement de Paris',
    'surf_m2': 5670222}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.370292, 48.90186],
       [2.370388, 48.896633],
       [2.37179, 48.895423],
       [2.364686, 48.884368],
       [2.349504, 48.883725],
       [2.339816, 48.882025],
       [2.329443, 48.884554],
       [2.327428, 48.883522],
       [2.32559, 48.887498],
       [2.330183, 48.901028],
       [2.351985, 48.901493],
       [2.365608, 48.901763],
       [2.370292, 48.90186]]]]},
   'properties': {'cartodb_id': 13,
    'insee': 75118,
    'nom': '18e Arrondissement',
    'wikipedia': 'fr:18e arrondissement de Paris',
    'surf_m2': 5994849}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.370292, 48.90186],
       [2.384429, 48.902156],
       [2.389426, 48.901219],
       [2.395527, 48.898262],
       [2.398959, 48.889546],
       [2.400073, 48.883818],
       [2.403792, 48.881447],
       [2.40929, 48.880275],
       [2.410836, 48.878423],
       [2.402577, 48.876009],
       [2.389972, 48.875368],
       [2.37699, 48.872062],
       [2.370278, 48.877992],
       [2.370194, 48.882718],
       [2.368465, 48.884068],
       [2.364686, 48.884368],
       [2.37179, 48.895423],
       [2.370388, 48.896633],
       [2.370292, 48.90186]]]]},
   'properties': {'cartodb_id': 14,
    'insee': 75119,
    'nom': '19e Arrondissement',
    'wikipedia': 'fr:19e arrondissement de Paris',
    'surf_m2': 6785521}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.415993, 48.846614],
       [2.399118, 48.848098],
       [2.398433, 48.851345],
       [2.394444, 48.85657],
       [2.389583, 48.858532],
       [2.3873, 48.863085],
       [2.37699, 48.872062],
       [2.389972, 48.875368],
       [2.402577, 48.876009],
       [2.410836, 48.878423],
       [2.413429, 48.87315],
       [2.415281, 48.85518],
       [2.41636, 48.849235],
       [2.415993, 48.846614]]]]},
   'properties': {'cartodb_id': 31,
    'insee': 75120,
    'nom': '20e Arrondissement',
    'wikipedia': 'fr:20e arrondissement de Paris',
    'surf_m2': 5980553}}]}

paris

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.350948, 48.863407],
       [2.350155, 48.86199],
       [2.34459, 48.854052],
       [2.339003, 48.857565],
       [2.339953, 48.858226],
       [2.332909, 48.85935],
       [2.320902, 48.863057],
       [2.325926, 48.869581],
       [2.327989, 48.869929],
       [2.330662, 48.868191],
       [2.350948, 48.863407]]]]},
   'properties': {'cartodb_id': 54,
    'insee': 75101,
    'nom': '1er Arrondissement',
    'wikipedia': 'fr:1er arrondissement de Paris',
    'surf_m2': 1814728}},
  {'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[2.327989, 48.869929],
       [2.340045, 48.871981],
       [2.347892, 48.870675],
       [2.354261, 48.86933],
       [2.350948, 48.863407],
       [2.330662, 48.868191],
       [2.327989, 48.869929]]]]},
   'properties': {'cartodb_id': 7,
    'insee': 75102,
    'nom': '2e Arrondissement',
    'wi

### Exploration of the data
I will explore the data by making
- maps (choropleth, superimposed landmarks etc)
- utilizing Foursquare data to find venues by arrondissement
- cluster by K-Means
- other visualizations (restaurants, museums, cafes etc by arrondissement)


#### Then, I will create a map template of Paris using its latitude/longitude 

In [18]:
address = ('Paris, France')
geolocator = Nominatim(user_agent="Paris_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('{''}, {}, {},'.format(address,latitude,longitude))

Paris, France, 48.8566101, 2.3514992,


In [19]:
paris_lat = 48.8566101
paris_long = 2.3514992

paris_map = folium.Map(location=[paris_lat, paris_long], zoom_start = 12)
paris_map

#### Generate a map that show the borders of the arrondissement and labels

In [20]:
arronds_map = folium.Map(location = [paris_lat, paris_long], zoom_start = 14)

arronds_map.choropleth(
    geo_data=paris,
    key_on='feature.properties.nom',
    fill_color='PuBu', 
    fill_opacity=0.1, 
    line_opacity=0.8,
)

#add neighborhood markers to the Paris map
for lat, long, arrond in zip(arrond_LL['Latitude'], arrond_LL['Longitude'], 
                                 arrond_LL['Arrondissement']):
    label = '{}'.format(arrond)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 7, 
        popup = label,
        color = 'red',
        fill = True,
        fill_color = 'blue',
        fill_opacity = 0.7,
        parse_html = False).add_to(arronds_map)
        
# display map
arronds_map



#### Generate choropleth map using the geo json file and the population information

In [21]:
# make new list so that "nom" in geojson matches the arrond in the data

arrond_new = ['1er Arrondissement', '2e Arrondissement','3e Arrondissement',
             '4e Arrondissement', '5e Arrondissement', '6e Arrondissement', 
             '7e Arrondissement', '8e Arrondissement','9e Arrondissement',
             '10e Arrondissement', '11e Arrondissement', '12e Arrondissement', 
             '13e Arrondissement', '14e Arrondissement', '15e Arrondissement',
             '16e Arrondissement', '17e Arrondissement', '18e Arrondissement', 
             '19e Arrondissement', '20e Arrondissement']

# this is the population list. Was going to take this from the dataframe, but it had commas and was an object.o
population_2005 = [17700, 20700, 35100, 28600, 60600,
45200,55400,38700,58500,88800,152500,138300,181300,134700,
232400,149500,160300,188700,187200,191800]


pop_2005 = pd.DataFrame({'Arrondissement': arrond_new, 'Population': population_2005})


#### Here is a choropleth map of Paris, where darker shades signify a higher Parisan population

In [22]:
Paris_map = folium.Map(location = [paris_lat, paris_long], zoom_start = 14)

Paris_map.choropleth(
    geo_data=paris,
    data=pop_2005,
    columns=['Arrondissement', 'Population'],
    key_on='feature.properties.nom',
    fill_color='Reds', 
    fill_opacity=0.5, 
    line_opacity=0.2,
    legend_name='Population by Arrondissement in Paris 2005'
)


# display map
Paris_map

#### Map of Parisian arrondissement (labeled) along with population information

In [23]:
paris_map_v2 = folium.Map(location = [paris_lat, paris_long], zoom_start = 12)

paris_map_v2.choropleth(
    geo_data=paris,
    data=pop_2005,
    columns=['Arrondissement', 'Population'],
    key_on='feature.properties.nom',
    fill_color='PuBu', 
    fill_opacity=0.5, 
    line_opacity=0.2,
    legend_name='Population by Arrondissement in Paris 2005'
)

#add neighborhood markers to the Paris map
for lat, long, arrond in zip(arrond_LL['Latitude'], arrond_LL['Longitude'], 
                                 arrond_LL['Arrondissement']):
    label = '{}'.format(arrond)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 7, 
        popup = label,
        color = 'red',
        fill = True,
        fill_color = 'blue',
        fill_opacity = 0.7,
        parse_html = False).add_to(paris_map_v2)
        

# display map
paris_map_v2

#### Another option for a choroplath map with arrond long/lat points

In [24]:
from folium import IFrame

paris_map_v3 = folium.Map(location = [paris_lat, paris_long], zoom_start = 12)

paris_map_v3.choropleth(
    geo_data=paris,
    data=pop_2005,
    columns=['Arrondissement', 'Population'],
    key_on='feature.properties.nom',
    fill_color='PuBu', 
    fill_opacity=0.5, 
    line_opacity=0.2,
    legend_name='Population by Arrondissement in Paris 2005'
)

#add neighborhood markers to the Paris map
for lat, long, arrond in zip(arrond_LL['Latitude'], arrond_LL['Longitude'], 
                                 arrond_LL['Arrondissement']):
    label = '{}'.format(arrond)
    iframe = folium.IFrame(label, width=700, height=450)
    label = folium.Popup(iframe, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 7, 
        color = 'red',
        fill = True,
        fill_color = 'blue',
        fill_opacity = 0.7,
        parse_html = False).add_to(paris_map_v3)
        

# display map
paris_map_v3



#### Map of Paris with markers of top tourists landmarks

In [25]:
# create a map of Paris
map_landmarks = folium.Map(location = [paris_lat, paris_long], zoom_start = 10)

map_landmarks.choropleth(
    geo_data=paris,
    key_on='feature.properties.nom',
    fill_color='PuBu', 
    fill_opacity=0.1, 
    line_opacity=0.8,
)

#add neighborhood markers to the Paris map
for lat, long, landmark in zip(landmarks_df['Latitude'], landmarks_df['Longitude'], 
                                 landmarks_df['Landmark']):
    label = '{}'.format(landmark)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 7, 
        popup = label,
        color = 'red',
        fill = True,
        fill_color = 'blue',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_landmarks)
        
map_landmarks

#### Map of Paris with landmarks and arrondissements

In [26]:
# create a map of Paris
map_landmarks2 = folium.Map(location = [paris_lat, paris_long], zoom_start = 10)

map_landmarks2.choropleth(
    geo_data=paris,
    data=pop_2005,
    columns=['Arrondissement', 'Population'],
    key_on='feature.properties.nom',
    fill_color='PuBu', 
    fill_opacity=0.1, 
    line_opacity=0.5,
)

#add landmarks markers to the Paris map
for lat, long, landmark in zip(landmarks_df['Latitude'], landmarks_df['Longitude'], 
                                 landmarks_df['Landmark']):
    label = '{}'.format(landmark)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 7, 
        popup = label,
        color = 'red',
        fill = True,
        fill_color = 'blue',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_landmarks2)
    
#add neighborhood markers to the Paris map
for lat, long, arrond in zip(arrond_LL['Latitude'], arrond_LL['Longitude'], 
                                 arrond_LL['Arrondissement']):
    label = '{}'.format(arrond)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 5, 
        popup = label,
        color = 'white',
        fill = True,
        fill_color = 'green',
        fill_opacity = 0.5,
        parse_html = False).add_to(map_landmarks2)
        
map_landmarks2

#### Let's bring in the Foursquare data

In [27]:
CLIENT_ID = 'NBGXQTF5JJ5EQZHXHXZCG2AABE4C20W5JXQ25XXSXNM34F0C' # your Foursquare ID
CLIENT_SECRET = 'NW2Q1KMTFNFR4EJCCS1W5OUCA5INYO1Z0QPJUG3KOPH2DXJS' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version


#### Now, let's get the top 250 venues from each of the arrondissement with the most landmarks,  the 1st, 4th, 7th, and 8th. From online research, other vistor favorites are the 5th, 18th, 6th, and 3rd arrondissements.

In [28]:
# Make dataframe of the most popular arrondissement


popular_arrond = arrond_LL[arrond_LL['Arrondissement'].isin(
    ['1st', '3rd', '4th', '5th', '6th', '7th', '8th', '18th'])]

popular_arrond

Unnamed: 0,Arrondissement,Latitude,Longitude
0,1st,48.864614,2.334396
2,3rd,48.864212,2.360936
3,4th,48.856202,2.355619
4,5th,48.846059,2.344523
5,6th,48.850433,2.332951
6,7th,48.857028,2.320195
7,8th,48.877316,2.317261
17,18th,48.890012,2.346467


In [29]:
# Example call of 1st arrondissement

arrond_1_lat = arrond_LL.iloc[0,1]

arrond_1_long = arrond_LL.iloc[0,2]

LIMIT = 250

radius = 2000



url1 = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    arrond_1_lat, 
    arrond_1_long, 
    VERSION, 
    radius, 
    LIMIT)

url1

'https://api.foursquare.com/v2/venues/explore?client_id=NBGXQTF5JJ5EQZHXHXZCG2AABE4C20W5JXQ25XXSXNM34F0C&client_secret=NW2Q1KMTFNFR4EJCCS1W5OUCA5INYO1Z0QPJUG3KOPH2DXJS&ll=48.8646144,2.334396&v=20180605&radius=2000&limit=250'

#### Send the GET request and examine the results

In [31]:
results1 = requests.get(url1).json()
results1

{'meta': {'code': 200, 'requestId': '5d9d66f5c53093002cf213c4'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Palais-Royal',
  'headerFullLocation': 'Palais-Royal, Paris',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 242,
  'suggestedBounds': {'ne': {'lat': 48.88261441800002,
    'lng': 2.3617071938645045},
   'sw': {'lat': 48.846614381999984, 'lng': 2.3070848061354954}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4f0b706de5e8207020057ed7',
       'name': 'Sanukiya',
       'location': {'address': "9 rue d'Argenteuil",
        'lat': 48.86471330060748,
        'lng': 2.333805151283741,
        'labeledLatLngs': [{'label': 'display',
          'lat': 48.8

#### Get Foursquare data for most popular arrondissements and their top venues

In [32]:


def getNearbyVenues(names, latitudes, longitudes, radius, LIMIT):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])
            

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Arrondissement', 
                  'Arrondissement Latitude', 
                  'Arrondissement Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [33]:
popular_arrond_venues = getNearbyVenues(names=popular_arrond['Arrondissement'],
                                   latitudes=popular_arrond['Latitude'],
                                   longitudes=popular_arrond['Longitude'],
                                   radius=2000, LIMIT=300
                                  )


1st
3rd
4th
5th
6th
7th
8th
18th


#### On second thought, I'm going to run the Foursquare gathering data for all of the arrondissement

In [34]:
arrond_venues = getNearbyVenues(names=arrond_LL['Arrondissement'],
                                   latitudes=arrond_LL['Latitude'],
                                   longitudes=arrond_LL['Longitude'],
                                   radius=2000, LIMIT=300
                                  )


1st
2nd
3rd
4th
5th
6th
7th
8th
9th
10th
11th
12th
13th
14th
15th
16th
17th
18th
19th
20th


#### Here's the output from the code above, with 800 venues

In [35]:
print(arrond_venues.shape)
arrond_venues

(2000, 7)


Unnamed: 0,Arrondissement,Arrondissement Latitude,Arrondissement Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,1st,48.864614,2.334396,Sanukiya,48.864713,2.333805,Udon Restaurant
1,1st,48.864614,2.334396,Brasserie Réjane,48.865486,2.334824,Restaurant
2,1st,48.864614,2.334396,Jardin du Palais Royal,48.864941,2.337728,Garden
3,1st,48.864614,2.334396,Comédie-Française,48.863088,2.336612,Theater
4,1st,48.864614,2.334396,Ellsworth,48.865528,2.337057,French Restaurant
5,1st,48.864614,2.334396,Le Roch Hotel & Spa Paris,48.8662,2.332995,Hotel
6,1st,48.864614,2.334396,Palais Royal,48.863236,2.337127,Historic Site
7,1st,48.864614,2.334396,Les Arts Décoratifs,48.863077,2.333393,Art Museum
8,1st,48.864614,2.334396,LouLou,48.862804,2.3335,Italian Restaurant
9,1st,48.864614,2.334396,Kunitoraya,48.865973,2.33688,Udon Restaurant


#### There are some duplicates of venues, so we will get rid of the duplicates and keep one copy

In [36]:
arrond_venues = arrond_venues.drop_duplicates(subset=['Venue'], keep = 'first')

#### Here's the venues returned by arrondissement

In [37]:
arrond_venues.groupby('Arrondissement').count()


Unnamed: 0_level_0,Arrondissement Latitude,Arrondissement Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Arrondissement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10th,35,35,35,35,35,35
11th,73,73,73,73,73,73
12th,82,82,82,82,82,82
13th,87,87,87,87,87,87
14th,81,81,81,81,81,81
15th,79,79,79,79,79,79
16th,94,94,94,94,94,94
17th,39,39,39,39,39,39
18th,52,52,52,52,52,52
19th,93,93,93,93,93,93


#### Next, I'm going to analysize each arrondissement's venues/


In [38]:
# one hot encoding
arrond_onehot = pd.get_dummies(arrond_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
arrond_onehot['Arrondissement'] = arrond_venues['Arrondissement'] 

# move neighborhood column to the first column
fixed_columns = [arrond_onehot.columns[-1]] + list(arrond_onehot.columns[:-1])
arrond_onehot = arrond_onehot[fixed_columns]

arrond_onehot.head()


Unnamed: 0,Arrondissement,Accessories Store,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auvergne Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Basketball Court,Basketball Stadium,Basque Restaurant,Beach Bar,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Bike Rental / Bike Share,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Boxing Gym,Brasserie,Brazilian Restaurant,Breakfast Spot,Breton Restaurant,Brewery,Bridge,Bubble Tea Shop,Burger Joint,Butcher,Café,Cambodian Restaurant,Canal,Candy Store,Caribbean Restaurant,Cemetery,Champagne Bar,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Corsican Restaurant,Cosmetics Shop,Creperie,Cultural Center,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Drive-in Theater,Electronics Store,Ethiopian Restaurant,Event Space,Exhibit,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flower Shop,Food & Drink Shop,Food Truck,Fountain,French Restaurant,Furniture / Home Store,Garden,Gastropub,General Entertainment,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym / Fitness Center,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Island,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jiangxi Restaurant,Juice Bar,Karaoke Bar,Kids Store,Korean Restaurant,Lebanese Restaurant,Library,Liquor Store,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Multiplex,Museum,Music Venue,New American Restaurant,Noodle House,Opera House,Organic Grocery,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pharmacy,Pizza Place,Planetarium,Playground,Plaza,Pool,Pop-Up Shop,Portuguese Restaurant,Provençal Restaurant,Pub,Radio Station,Ramen Restaurant,Record Shop,Recording Studio,Restaurant,Rock Club,Roof Deck,Russian Restaurant,Salad Place,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Science Museum,Seafood Restaurant,Shanxi Restaurant,Shopping Mall,Shopping Plaza,Southwestern French Restaurant,Souvenir Shop,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Street Art,Street Food Gathering,Supermarket,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Tapas Restaurant,Tea Room,Tech Startup,Thai Restaurant,Theater,Theme Park Ride / Attraction,Toy / Game Store,Trail,Trattoria/Osteria,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Vineyard,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo
0,1st,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,1st,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1st,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1st,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1st,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category


In [39]:
arrond_grouped = arrond_onehot.groupby('Arrondissement').mean().reset_index()
arrond_grouped

Unnamed: 0,Arrondissement,Accessories Store,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auvergne Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Basketball Court,Basketball Stadium,Basque Restaurant,Beach Bar,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Bike Rental / Bike Share,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Boxing Gym,Brasserie,Brazilian Restaurant,Breakfast Spot,Breton Restaurant,Brewery,Bridge,Bubble Tea Shop,Burger Joint,Butcher,Café,Cambodian Restaurant,Canal,Candy Store,Caribbean Restaurant,Cemetery,Champagne Bar,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Corsican Restaurant,Cosmetics Shop,Creperie,Cultural Center,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Drive-in Theater,Electronics Store,Ethiopian Restaurant,Event Space,Exhibit,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flower Shop,Food & Drink Shop,Food Truck,Fountain,French Restaurant,Furniture / Home Store,Garden,Gastropub,General Entertainment,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym / Fitness Center,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Island,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jiangxi Restaurant,Juice Bar,Karaoke Bar,Kids Store,Korean Restaurant,Lebanese Restaurant,Library,Liquor Store,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Multiplex,Museum,Music Venue,New American Restaurant,Noodle House,Opera House,Organic Grocery,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pharmacy,Pizza Place,Planetarium,Playground,Plaza,Pool,Pop-Up Shop,Portuguese Restaurant,Provençal Restaurant,Pub,Radio Station,Ramen Restaurant,Record Shop,Recording Studio,Restaurant,Rock Club,Roof Deck,Russian Restaurant,Salad Place,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Science Museum,Seafood Restaurant,Shanxi Restaurant,Shopping Mall,Shopping Plaza,Southwestern French Restaurant,Souvenir Shop,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Street Art,Street Food Gathering,Supermarket,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Tapas Restaurant,Tea Room,Tech Startup,Thai Restaurant,Theater,Theme Park Ride / Attraction,Toy / Game Store,Trail,Trattoria/Osteria,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Vineyard,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo
0,10th,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.028571,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.057143,0.028571,0.0,0.0,0.0,0.028571,0.028571,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,11th,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.013699,0.0,0.0,0.0,0.0,0.027397,0.041096,0.0,0.0,0.0,0.0,0.013699,0.027397,0.0,0.013699,0.0,0.041096,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027397,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.041096,0.054795,0.013699,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.013699,0.013699,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,0.136986,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.027397,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.013699,0.0,0.0,0.013699,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.027397,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.013699,0.0,0.0,0.0,0.0,0.027397,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.013699,0.013699,0.013699,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.027397,0.0,0.0,0.068493,0.0,0.0,0.0,0.0
2,12th,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.012195,0.036585,0.04878,0.0,0.0,0.0,0.0,0.0,0.02439,0.02439,0.0,0.0,0.04878,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.02439,0.0,0.0,0.012195,0.012195,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.012195,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.085366,0.012195,0.036585,0.02439,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.012195,0.0,0.0,0.0,0.036585,0.0,0.012195,0.0,0.0,0.0,0.0,0.012195,0.012195,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.012195,0.0,0.02439,0.02439,0.012195,0.0,0.0,0.0,0.0,0.036585,0.0,0.012195,0.012195,0.0,0.0,0.0,0.012195,0.0,0.0,0.036585,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.012195,0.0,0.02439,0.0,0.0,0.0,0.0
3,13th,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.057471,0.022989,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.011494,0.011494,0.034483,0.022989,0.0,0.0,0.0,0.0,0.0,0.0,0.022989,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.114943,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.08046,0.0,0.0,0.011494,0.0,0.0,0.0,0.045977,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.011494,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.022989,0.0,0.0,0.0,0.0,0.011494,0.0,0.022989,0.0,0.0,0.022989,0.011494,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022989,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.103448,0.0,0.0,0.0,0.0,0.0,0.011494
4,14th,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346,0.024691,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.024691,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.012346,0.0,0.0,0.0,0.012346,0.209877,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.012346,0.0,0.012346,0.0,0.0,0.135802,0.0,0.0,0.0,0.0,0.0,0.0,0.049383,0.049383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.012346,0.0,0.012346,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.024691,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.012346,0.012346,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.024691,0.0,0.012346,0.024691,0.0,0.024691,0.0
5,15th,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037975,0.0,0.0,0.0,0.012658,0.0,0.0,0.012658,0.0,0.0,0.012658,0.037975,0.0,0.012658,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.189873,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.037975,0.012658,0.0,0.0,0.012658,0.0,0.126582,0.0,0.0,0.025316,0.0,0.0,0.0,0.012658,0.050633,0.0,0.0,0.0,0.0,0.0,0.0,0.050633,0.025316,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.037975,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037975,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.0
6,16th,0.0,0.0,0.0,0.0,0.0,0.031915,0.0,0.0,0.0,0.0,0.0,0.0,0.042553,0.0,0.0,0.0,0.0,0.010638,0.010638,0.0,0.0,0.0,0.0,0.010638,0.0,0.010638,0.0,0.0,0.0,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.010638,0.0,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.255319,0.0,0.021277,0.0,0.0,0.0,0.010638,0.0,0.0,0.021277,0.0,0.0,0.010638,0.0,0.010638,0.12766,0.021277,0.0,0.0,0.0,0.010638,0.0,0.053191,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.010638,0.0,0.0,0.010638,0.0,0.0,0.0,0.021277,0.010638,0.0,0.0,0.0,0.0,0.010638,0.0,0.0,0.0,0.010638,0.0,0.0,0.021277,0.0,0.010638,0.042553,0.0,0.0,0.0,0.0,0.010638,0.010638,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010638,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,17th,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.051282,0.025641,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.179487,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.076923,0.051282,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.0,0.025641,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0
8,18th,0.0,0.019231,0.019231,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.019231,0.076923,0.0,0.0,0.0,0.0,0.0,0.019231,0.019231,0.038462,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.019231,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.192308,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057692,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.057692,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.019231,0.019231,0.0,0.0,0.0
9,19th,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010753,0.010753,0.0,0.0,0.010753,0.010753,0.043011,0.0,0.010753,0.0,0.0,0.010753,0.021505,0.010753,0.010753,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.010753,0.0,0.0,0.0,0.010753,0.0,0.0,0.010753,0.0,0.032258,0.0,0.021505,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.010753,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010753,0.010753,0.0,0.0,0.010753,0.0,0.0,0.010753,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.139785,0.0,0.010753,0.010753,0.010753,0.0,0.0,0.0,0.0,0.0,0.010753,0.0,0.0,0.021505,0.0,0.010753,0.0,0.0,0.0,0.0,0.0,0.0,0.021505,0.021505,0.010753,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010753,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010753,0.0,0.043011,0.0,0.032258,0.0,0.0,0.0,0.010753,0.032258,0.0,0.0,0.010753,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.021505,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.021505,0.0,0.0,0.0,0.0,0.0,0.021505,0.010753,0.010753,0.0,0.010753,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010753,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010753,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010753,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### It's hard to see all the frequencies in this table, so I will print the top 10 venues for each neighborhood

In [40]:
num_top_venues = 10

for hood in arrond_grouped['Arrondissement']:
    print("----"+hood+"----")
    temp = arrond_grouped[arrond_grouped['Arrondissement'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----10th----
                venue  freq
0          Restaurant  0.09
1         Coffee Shop  0.09
2         Pizza Place  0.09
3   French Restaurant  0.09
4  Italian Restaurant  0.06
5              Bakery  0.06
6   Indian Restaurant  0.03
7             Theater  0.03
8  Dim Sum Restaurant  0.03
9   Shanxi Restaurant  0.03


----11th----
               venue  freq
0  French Restaurant  0.14
1           Wine Bar  0.07
2       Cocktail Bar  0.05
3             Bistro  0.04
4     Clothing Store  0.04
5                Bar  0.04
6             Bakery  0.03
7           Beer Bar  0.03
8               Café  0.03
9        Pizza Place  0.03


----12th----
               venue  freq
0  French Restaurant  0.09
1             Bistro  0.05
2                Bar  0.05
3             Garden  0.04
4               Park  0.04
5             Bakery  0.04
6              Hotel  0.04
7              Plaza  0.04
8           Beer Bar  0.02
9          Gastropub  0.02


----13th----
                   venue  freq
0      Fr

#### Put the top venues information into a dataframe that's sorted in descending order

In [41]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Create a new dataframe and display the top 10 venues

In [42]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Arrondissement']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
arrond_venues_sorted = pd.DataFrame(columns=columns)
arrond_venues_sorted['Arrondissement'] = arrond_grouped['Arrondissement']

for ind in np.arange(arrond_grouped.shape[0]):
    arrond_venues_sorted.iloc[ind, 1:] = return_most_common_venues(arrond_grouped.iloc[ind, :], num_top_venues)

arrond_venues_sorted

Unnamed: 0,Arrondissement,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,10th,Pizza Place,Coffee Shop,Restaurant,French Restaurant,Bakery,Italian Restaurant,Dim Sum Restaurant,Shanxi Restaurant,Café,Cambodian Restaurant
1,11th,French Restaurant,Wine Bar,Cocktail Bar,Clothing Store,Bistro,Bar,Italian Restaurant,Bakery,Pizza Place,Beer Bar
2,12th,French Restaurant,Bistro,Bar,Plaza,Bakery,Garden,Hotel,Park,Creperie,Multiplex
3,13th,French Restaurant,Vietnamese Restaurant,Hotel,Bakery,Italian Restaurant,Café,Asian Restaurant,Sushi Restaurant,Thai Restaurant,Bar
4,14th,French Restaurant,Hotel,Italian Restaurant,Japanese Restaurant,Creperie,Bistro,Yoga Studio,Wine Shop,Bar,Vietnamese Restaurant
5,15th,French Restaurant,Hotel,Japanese Restaurant,Korean Restaurant,Park,Gym / Fitness Center,Bistro,Plaza,Bakery,Creperie
6,16th,French Restaurant,Hotel,Italian Restaurant,Plaza,Bakery,Art Museum,Burger Joint,Hotel Bar,Restaurant,Café
7,17th,French Restaurant,Bakery,Hotel,Hotel Bar,Bar,Restaurant,Bistro,Street Art,Museum,Farmers Market
8,18th,French Restaurant,Bistro,Bar,Plaza,Italian Restaurant,Burger Joint,Restaurant,Beer Store,Pizza Place,Candy Store
9,19th,French Restaurant,Multiplex,Bar,Cocktail Bar,Bistro,Pizza Place,Music Venue,Park,Concert Hall,Café


#### Cluster the arrondissements of Paris

In [43]:
# run K-means to cluster the arrondissements into 5 clusters

# set number of clusters
kclusters = 5

arrond_grouped_clustering = arrond_LL.drop('Arrondissement', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(arrond_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 2, 2, 3, 3, 3, 0, 0, 2], dtype=int32)

In [44]:
# add clustering labels
arrond_venues_sorted.insert(0, 'Cluster Label', kmeans.labels_)

arrond_merge = arrond_LL

# merge grouped with data to add latitude/longitude for each arrondissement
arrond_merge = arrond_merge.join(arrond_venues_sorted.set_index('Arrondissement'), on='Arrondissement')

# arrond_merge = arrond_merge.drop('Cluster',1) # accidentally added added another cluster column so this was to get rid of that extra column

arrond_merge.head(20) # check the last columns!



Unnamed: 0,Arrondissement,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1st,48.864614,2.334396,4,Hotel,Plaza,French Restaurant,Art Museum,Cocktail Bar,Theater,Boutique,Italian Restaurant,Historic Site,Garden
1,2nd,48.868743,2.341688,3,Hotel,Sandwich Place,Seafood Restaurant,Cocktail Bar,French Restaurant,Ice Cream Shop,Gastropub,Plaza,Concert Hall,Corsican Restaurant
2,3rd,48.864212,2.360936,3,French Restaurant,Art Gallery,Italian Restaurant,Bookstore,Wine Bar,Seafood Restaurant,Cocktail Bar,Restaurant,Tea Room,Breakfast Spot
3,4th,48.856202,2.355619,1,Plaza,Japanese Restaurant,French Restaurant,Cocktail Bar,Bakery,Ice Cream Shop,Bookstore,Burger Joint,Restaurant,Church
4,5th,48.846059,2.344523,1,French Restaurant,Plaza,Coffee Shop,Wine Bar,Fountain,Hotel,Seafood Restaurant,Bistro,Italian Restaurant,Bookstore
5,6th,48.850433,2.332951,0,Hotel,French Restaurant,Garden,Coffee Shop,Department Store,Park,Cupcake Shop,Scenic Lookout,Tailor Shop,Creperie
6,7th,48.857028,2.320195,0,Garden,French Restaurant,Historic Site,Tea Room,Plaza,Accessories Store,Pastry Shop,Pedestrian Plaza,Cocktail Bar,Chocolate Shop
7,8th,48.877316,2.317261,2,Hotel,French Restaurant,Park,Wine Bar,Plaza,Italian Restaurant,Coffee Shop,Japanese Restaurant,Bar,Cosmetics Shop
8,9th,48.876019,2.339962,4,French Restaurant,Italian Restaurant,Bakery,Cocktail Bar,Vegetarian / Vegan Restaurant,Japanese Restaurant,Bar,Fish & Chips Shop,Music Venue,Deli / Bodega
9,10th,48.876106,2.35991,0,Pizza Place,Coffee Shop,Restaurant,French Restaurant,Bakery,Italian Restaurant,Dim Sum Restaurant,Shanxi Restaurant,Café,Cambodian Restaurant


#### Examining the different arrondissement clusters

##### Cluster 1

In [45]:
cluster1 = arrond_merge.loc[arrond_merge['Cluster Label'] == 0]

cluster1

Unnamed: 0,Arrondissement,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,6th,48.850433,2.332951,0,Hotel,French Restaurant,Garden,Coffee Shop,Department Store,Park,Cupcake Shop,Scenic Lookout,Tailor Shop,Creperie
6,7th,48.857028,2.320195,0,Garden,French Restaurant,Historic Site,Tea Room,Plaza,Accessories Store,Pastry Shop,Pedestrian Plaza,Cocktail Bar,Chocolate Shop
9,10th,48.876106,2.35991,0,Pizza Place,Coffee Shop,Restaurant,French Restaurant,Bakery,Italian Restaurant,Dim Sum Restaurant,Shanxi Restaurant,Café,Cambodian Restaurant
10,11th,48.858416,2.379703,0,French Restaurant,Wine Bar,Cocktail Bar,Clothing Store,Bistro,Bar,Italian Restaurant,Bakery,Pizza Place,Beer Bar
16,17th,48.884224,2.322364,0,French Restaurant,Bakery,Hotel,Hotel Bar,Bar,Restaurant,Bistro,Street Art,Museum,Farmers Market
17,18th,48.890012,2.346467,0,French Restaurant,Bistro,Bar,Plaza,Italian Restaurant,Burger Joint,Restaurant,Beer Store,Pizza Place,Candy Store


##### Cluster 2

In [46]:
cluster2 = arrond_merge.loc[arrond_merge['Cluster Label'] == 1]

cluster2

Unnamed: 0,Arrondissement,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,4th,48.856202,2.355619,1,Plaza,Japanese Restaurant,French Restaurant,Cocktail Bar,Bakery,Ice Cream Shop,Bookstore,Burger Joint,Restaurant,Church
4,5th,48.846059,2.344523,1,French Restaurant,Plaza,Coffee Shop,Wine Bar,Fountain,Hotel,Seafood Restaurant,Bistro,Italian Restaurant,Bookstore


##### Cluster 3

In [47]:
cluster3 = arrond_merge.loc[arrond_merge['Cluster Label'] == 2]

cluster3

Unnamed: 0,Arrondissement,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,8th,48.877316,2.317261,2,Hotel,French Restaurant,Park,Wine Bar,Plaza,Italian Restaurant,Coffee Shop,Japanese Restaurant,Bar,Cosmetics Shop
11,12th,48.839615,2.395752,2,French Restaurant,Bistro,Bar,Plaza,Bakery,Garden,Hotel,Park,Creperie,Multiplex
12,13th,48.832397,2.355583,2,French Restaurant,Vietnamese Restaurant,Hotel,Bakery,Italian Restaurant,Café,Asian Restaurant,Sushi Restaurant,Thai Restaurant,Bar
18,19th,48.889343,2.38436,2,French Restaurant,Multiplex,Bar,Cocktail Bar,Bistro,Pizza Place,Music Venue,Park,Concert Hall,Café


##### Cluster 4

In [48]:
cluster4 = arrond_merge.loc[arrond_merge['Cluster Label'] == 3]

cluster4

Unnamed: 0,Arrondissement,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,2nd,48.868743,2.341688,3,Hotel,Sandwich Place,Seafood Restaurant,Cocktail Bar,French Restaurant,Ice Cream Shop,Gastropub,Plaza,Concert Hall,Corsican Restaurant
2,3rd,48.864212,2.360936,3,French Restaurant,Art Gallery,Italian Restaurant,Bookstore,Wine Bar,Seafood Restaurant,Cocktail Bar,Restaurant,Tea Room,Breakfast Spot
13,14th,48.833059,2.326959,3,French Restaurant,Hotel,Italian Restaurant,Japanese Restaurant,Creperie,Bistro,Yoga Studio,Wine Shop,Bar,Vietnamese Restaurant
14,15th,48.84137,2.300383,3,French Restaurant,Hotel,Japanese Restaurant,Korean Restaurant,Park,Gym / Fitness Center,Bistro,Plaza,Bakery,Creperie
15,16th,48.863171,2.275765,3,French Restaurant,Hotel,Italian Restaurant,Plaza,Bakery,Art Museum,Burger Joint,Hotel Bar,Restaurant,Café


##### Cluster 5

In [49]:
cluster5 = arrond_merge.loc[arrond_merge['Cluster Label'] == 4]

cluster5

Unnamed: 0,Arrondissement,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1st,48.864614,2.334396,4,Hotel,Plaza,French Restaurant,Art Museum,Cocktail Bar,Theater,Boutique,Italian Restaurant,Historic Site,Garden
8,9th,48.876019,2.339962,4,French Restaurant,Italian Restaurant,Bakery,Cocktail Bar,Vegetarian / Vegan Restaurant,Japanese Restaurant,Bar,Fish & Chips Shop,Music Venue,Deli / Bodega
19,20th,48.865042,2.398929,4,Bar,French Restaurant,Bookstore,Restaurant,Pizza Place,Japanese Restaurant,Bakery,Thai Restaurant,Moroccan Restaurant,Music Venue


In [50]:
## run K-means to cluster the landmarks into 5 clusters

# set number of clusters
kclusters = 8

landmark_clustering = landmarks_df.drop('Landmark', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(landmark_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 0, 0],
      dtype=int32)

#### Create a dataframe with the clusters and landmarks

#### Looking at the results, it's not very useful because there aren't really any distinct clusters. I will have to manually group these landmarks together when I create the travel itinerary

In [51]:
# add clustering labels

landmarks_df.insert(0, 'Cluster Label', kmeans.labels_)

landmarks_merge = landmarks_df

# landmarks_merge = landmarks_merge.drop('Cluster',1) # accidentally added added another cluster column so this was to get rid of that extra column

landmarks_merge = landmarks_merge.sort_values(['Cluster Label'], ascending=False)

landmarks_merge


Unnamed: 0,Cluster Label,Landmark,Latitude,Longitude
26,7,Palace of Versailles,48.804425,2.120285
29,6,Disneyland Paris,48.871136,2.776127
14,5,Hôtel de Ville,43.949297,4.80526
24,4,Montmartre,46.802135,-0.719894
10,3,Picasso Museum,41.385107,2.181205
30,2,Place de la Concorde,29.900205,-90.009515
32,1,Rodin Museum,39.961929,-75.173951
23,0,Palais du Luxembourg,48.848528,2.336836
25,0,Champs-Élysées,48.870757,2.305331
27,0,Tuileries Garden,48.863662,2.326839


#### Printing the full list of venues below

In [52]:
arrond_venues

Unnamed: 0,Arrondissement,Arrondissement Latitude,Arrondissement Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,1st,48.864614,2.334396,Sanukiya,48.864713,2.333805,Udon Restaurant
1,1st,48.864614,2.334396,Brasserie Réjane,48.865486,2.334824,Restaurant
2,1st,48.864614,2.334396,Jardin du Palais Royal,48.864941,2.337728,Garden
3,1st,48.864614,2.334396,Comédie-Française,48.863088,2.336612,Theater
4,1st,48.864614,2.334396,Ellsworth,48.865528,2.337057,French Restaurant
5,1st,48.864614,2.334396,Le Roch Hotel & Spa Paris,48.8662,2.332995,Hotel
6,1st,48.864614,2.334396,Palais Royal,48.863236,2.337127,Historic Site
7,1st,48.864614,2.334396,Les Arts Décoratifs,48.863077,2.333393,Art Museum
8,1st,48.864614,2.334396,LouLou,48.862804,2.3335,Italian Restaurant
9,1st,48.864614,2.334396,Kunitoraya,48.865973,2.33688,Udon Restaurant


#### I'm going to create separate dataframes for different categories below so it'll be easier to call when I create maps to display different activities

In [53]:
# Things to Do
museum = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Museum','Art Museum', 'Science Museum', 'Exhibit', 'Art Gallery', 'History Museum'])]

outdoor_sights = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Bridge', 'Street Art','Plaza', 'Pedestrian Plaza', 'Scenic Lookout', 'Outdoor Sculpture', 'Theme Park Ride / Attraction', 'Canal', 'Park', 'Garden', 'Botanical Garden' ])]
                                   
fashion_store = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Department Store','Boutique',"Men's Store","Women's Store",'Cosmetics Shop', 'Clothing Store', 'Jewelry Store', 'Tailor Shop', 'Gift Shop', 'Shopping Mall'])]

night_life = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Nightclub', 'Hookah Bar', 'Jazz Club', 'Rock Club'])]

bike = arrond_venues.loc[arrond_venues['Venue Category'] == 'Bike Rental / Bike Share']                                   

relax = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Spa', 'Massage Studio'])]


# Things to Eat and Drink
asian_food = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Udon Restaurant', 'Japanese Restaurant', 'Asian Restaurant', 
                                                                     'Szechuan Restaurant', 'Korean Restaurant', 'Vietnamese Restaurant',
                                                                    'Thai Restaurant', 'Cambodian Restaurant', 'Sushi Restaurant',
                                                                    'Jiangxi Restaurant', 'Chinese Restaurant', 'Indian Restaurant'])]

world_food = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Modern European Restaurant', 'Italian Restaurant', 'Argentinian Restaurant', 'American Restaurant', 'Scandinavian Restaurant', 'Trattoria/Osteria' , 
                                                                     'Israeli Restaurant', 'Vegetarian / Vegan Restaurant', 'Seafood Restaurant', 'Lebanese Restaurant', 'Ethiopian Restaurant', 'Mexican Restaurant',
                                                                    'Breton Restaurant','Corsican Restaurant', 'Mediterranean Restaurant', 'Russian Restaurant', 'Tapas Restaurant', 'Falafel Restaurant', 'Moroccan Restaurant',
                                                                    'Turkish Restaurant', 'Portuguese Restaurant', 'Greek Restaurant', 'Brazilian Restaurant', 'Peruvian Restaurant', 'Comfort Food Restaurant', 'Steakhouse',
                                                                    'Basque Restaurant', 'Middle Eastern Restaurant', 'Arepa Restaurant', 'Southern / Soul Food Restaurant', 'African Restaurant', 'Gluten-free Restaurant'])]
                               
french = arrond_venues.loc[arrond_venues['Venue Category'].isin(['French Restaurant', 'Creperie','Cheese Shop', 'Provençal Restaurant', 'Bistro', 'Brasserie', 'Auvergne Restaurant'])]

dessert = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Ice Cream Shop','Pastry Shop', 'Bakery', 'Dessert Shop', 'Donut Shop', 'Chocolate Shop', 'Cupcake Shop'])] 
                           
drink = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Cocktail Bar', 'Beer Bar','Beer Store', 'Roof Deck', 'Wine Bar', 'Tea Room', 'Liquor Store', 'Wine Shop', 
                                                                'Bar', 'Gastropub', 'Speakeasy', 'Lounge', 'Beer Garden', 'Irish Pub', 'Champagne Bar', 'Hotel Bar', 'Vineyard'])]
cafe = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Café', 'Breakfast Spot', 'Coffe Shop', 'Tea Room'])]
                          
fast_food = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Burger Joint', 'Hot Dog Joint', 'Sandwich Place', 'Pizza Place', 
                                                                    'Fish & Chips Shop', 'Deli / Bodega', 'Taco Place', 'Food & Drink Shop',
                                                                   'BBQ Joint', 'Bagel Shop', 'Fast Food Restaurant', 'Diner', 'Food Truck'])]                 

market = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Farmers Market','Cheese Shop', 'Organic Grocery', 'Market', 'Supermarket'])]


# Things to See
historic_site = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Historic Site', 'Monument / Landmark'])]


# Things to hear
perform = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Theater', 'Indie Movie Theater', 'Opera House', 'Comedy Club', 'Concert Hall', 'Music Venue', 'Performing Arts Venue'])]                 


# Places to live 
lodging = arrond_venues.loc[arrond_venues['Venue Category'].isin(['Hotel', 'Hostel'])]

#### The following function shows venues as points along with points designating arrondissement

In [54]:
# create a map of Paris

def map_with_arrond_and_points(points_df, arrondll):

    map_name = folium.Map(location = [48.8566101, 2.3514992], zoom_start = 12)
    
    map_name.choropleth(
        geo_data=paris,
        data=pop_2005,
        columns=['Arrondissement', 'Population'],
        key_on='feature.properties.nom',
        fill_color='PuBu', 
        fill_opacity=0.1, 
        line_opacity=0.5,)

#add points of interest markers to the Paris map
    for lat, long, points, cat in zip(points_df['Venue Latitude'], points_df['Venue Longitude'], 
                                 points_df['Venue'], points_df['Venue Category']):
        label = '{}, {}'.format(points, cat)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, long],
            radius = 7, 
            popup = label,
            color = 'yellow',
            fill = True,
            fill_color = 'blue',
            fill_opacity = 0.7,
            parse_html = False).add_to(map_name)
        
#add arrond markers to the Paris map
    for lat, long, arrond in zip(arrondll['Latitude'], arrondll['Longitude'], 
                                 arrondll['Arrondissement']):
        label = '{}'.format(arrond)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, long],
            radius = 5, 
            popup = label,
            color = 'white',
            fill = True,
            fill_color = 'green',
            fill_opacity = 0.5,
            parse_html = False).add_to(map_name)
        
        
    return map_name

#### This shows French Restaurants

In [55]:
map_with_arrond_and_points(points_df = french, arrondll = arrond_LL)



#### This shows French cafés (including cafés, coffee shops, breakfast spots)

In [56]:
map_with_arrond_and_points(points_df = cafe, arrondll = arrond_LL)

#### The following function shows venues as clusters along with points designating arrondissement

In [57]:
def map_with_arrond_and_points_v1(points_df, arrondll):

# Add the boundaries of the arrondissement
    map_name = folium.Map(location = [48.8566101, 2.3514992], zoom_start = 12)
    
    map_name.choropleth(
        geo_data=paris,
        data=pop_2005,
        columns=['Arrondissement', 'Population'],
        key_on='feature.properties.nom',
        fill_color='PuBu', 
        fill_opacity=0.1, 
        line_opacity=0.5,)

    # instantiate a mark cluster object for the incidents in the dataframe
    venues = plugins.MarkerCluster().add_to(map_name)

    # loop through the dataframe and add each data point to the mark cluster
    for lat, lng, name, cat in zip(points_df['Venue Latitude'], points_df['Venue Longitude'], 
                                 points_df['Venue'], points_df['Venue Category']):
        label = '{}, {}'.format(name, cat)
        label = folium.Popup(label, parse_html=True)
        folium.Marker(
            location=[lat, lng],
            icon=None,
            popup=label).add_to(venues)
        
        
#add arrond markers to the Paris map
    for lat, long, arrond in zip(arrondll['Latitude'], arrondll['Longitude'], 
                                 arrondll['Arrondissement']):
        label = '{}'.format(arrond)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, long],
            radius = 5, 
            popup = label,
            color = 'white',
            fill = True,
            fill_color = 'green',
            fill_opacity = 0.5,
            parse_html = False).add_to(map_name)
        
    return map_name

 


#### This is a function for a map showing the major landmarks of Paris along with the nearby venues in clusters

In [58]:
def map_with_arrond_and_points_v2(points_df, arrondll, landmarks):

# Add the boundaries of the arrondissement
    map_name = folium.Map(location = [48.8566101, 2.3514992], zoom_start = 12)
    
    map_name.choropleth(
        geo_data=paris,
        data=pop_2005,
        columns=['Arrondissement', 'Population'],
        key_on='feature.properties.nom',
        fill_color='PuBu', 
        fill_opacity=0.1, 
        line_opacity=0.5,)

    # instantiate a mark cluster object for the incidents in the dataframe
    venues = plugins.MarkerCluster().add_to(map_name)

    # loop through the dataframe and add each data point to the mark cluster
    for lat, lng, name, cat in zip(points_df['Venue Latitude'], points_df['Venue Longitude'], 
                                 points_df['Venue'], points_df['Venue Category']):
        label = '{}, {}'.format(name, cat)
        label = folium.Popup(label, parse_html=True)
        folium.Marker(
            location=[lat, lng],
            icon=None,
            popup=label).add_to(venues)
        
        
#add arrond markers to the Paris map
    for lat, long, arrond in zip(arrondll['Latitude'], arrondll['Longitude'], 
                                 arrondll['Arrondissement']):
        label = '{}'.format(arrond)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, long],
            radius = 5, 
            popup = label,
            color = 'white',
            fill = True,
            fill_color = 'green',
            fill_opacity = 0.5,
            parse_html = False).add_to(map_name)
        
#add landmarks of interest markers to the Paris map
    for lat, long, landmark in zip(landmarks['Latitude'], landmarks['Longitude'], 
                                 landmarks['Landmark']):
        label = '{}'.format(landmark)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, long],
            radius = 7, 
            popup = label,
            color = 'red',
            fill = True,
            fill_color = 'blue',
            fill_opacity = 0.7,
            parse_html = False).add_to(map_name)
        
    return map_name


#### Here's a map with clusters of French restaurants around famous landmarks

In [59]:
map_with_arrond_and_points_v2(points_df = french, arrondll = arrond_LL, landmarks = landmarks_df)

#### Here's a map of clusters of musuems and their proximity to famous landmarks

In [60]:
map_with_arrond_and_points_v2(points_df = museum, arrondll = arrond_LL, landmarks = landmarks_df)

#### Here's a map showing the local markets and their proximity to famous landmarks

In [61]:
map_with_arrond_and_points_v2(points_df = market, arrondll = arrond_LL, landmarks = landmarks_df)

#### Got a sweet tooth? Here's a cluster of dessert places near famous landmarks

In [62]:
map_with_arrond_and_points_v2(points_df = dessert, arrondll = arrond_LL, landmarks = landmarks_df)

#### Need some rest after a long day of exploring? Here are some lodging options

In [63]:
map_with_arrond_and_points_v2(points_df = lodging, arrondll = arrond_LL, landmarks = landmarks_df)

#### Here are some historical sites near famous landmarks

In [64]:
map_with_arrond_and_points_v1(points_df = historic_site, arrondll = arrond_LL)

#### Here are some outdoor sights (aka parks, gardens, plazas, scenic viewpoints etc)

In [65]:
map_with_arrond_and_points_v1(points_df = outdoor_sights, arrondll = arrond_LL)

#### Here's a map of night life venues near famous landmarks (clubs, etc)

In [66]:
map_with_arrond_and_points_v2(points_df = night_life, arrondll = arrond_LL, landmarks = landmarks_df)

#### Here's a map of bars, liquor stores, cocktail lounges, etc and their proximity from famous landmarks

In [67]:
map_with_arrond_and_points_v2(points_df = drink, arrondll = arrond_LL, landmarks = landmarks_df)

#### Here's a map of cafés and their proximity to famous landmarks

In [68]:
map_with_arrond_and_points_v2(points_df = cafe, arrondll = arrond_LL, landmarks = landmarks_df)

#### Here's a map of fast food establishments (pizza, burgers, hot dog) and their proximity to famous landmarks

In [69]:
map_with_arrond_and_points_v2(points_df = fast_food, arrondll = arrond_LL, landmarks = landmarks_df)

#### Here's a map of bars, cocktail lounges, etc point by point (not clustered)

In [70]:
map_with_arrond_and_points(drink, arrond_LL)

#### Here's a map of stores, boutiques, shops etc by point (not clustered)

In [71]:
map_with_arrond_and_points(fashion_store, arrond_LL)

#### Here's a map of global food options (all other foods besides fast food and French cuisine) and their proximity to famous landmarks

In [72]:
map_with_arrond_and_points_v2(points_df = world_food, arrondll = arrond_LL, landmarks = landmarks_df)

#### Here's a map of performance venues (opera houses, theaters, music venues etc) and their proximity to famous landmarks

In [73]:
map_with_arrond_and_points_v2(points_df = perform, arrondll = arrond_LL, landmarks = landmarks_df)