In [None]:
# The code was removed by Watson Studio for sharing.

In [None]:
PARIS_COORDINATES = [48.866667, 2.333333]
FOURSQUARE_CATEGORY = '4bf58dd8d48988d1ce941735' #category for seafood restaurants, see https://developer.foursquare.com/docs/build-with-foursquare/categories/
FOURSQUARE_RADIUS = 1500 #Radius that should allow to cover every neighborhood
FOURSQUARE_LIMIT = 100
FOURSQUARE_UPDATE = True #Do we search the data in Foursquare or do we use the local data? This field is useful to avoid reaching the maximum queries per day

In [None]:
# The code was removed by Watson Studio for sharing.

## Get data about Paris neighborhoods

Le quartier désigne la division administrative de l’arrondissement. Chaque arrondissement est découpé
en quatre quartiers administratifs. Paris compte ainsi quatre-vingt quartiers administratifs.

https://en.wikipedia.org/wiki/Demographics_of_Paris#/media/File:Paris_Historical_Population.png

ATTENTION AU COPIE DE DF VS NOUVEAU DF => REVOIR COURSERA

### Import libraries

In [None]:
import pandas as pd
print('pandas library imported.')
import numpy as np
print('numpy library imported.')
import requests
print('requests library imported.')
from bs4 import BeautifulSoup
print('BeautifulSoup library imported.')
import json
print('json imported.')

print('All libraries imported.')

### Get the data about neighborhoods of Paris

Get a csv from the site of the City of Paris: https://opendata.paris.fr/explore/dataset/quartier_paris/export/

In [None]:
!wget --quiet https://opendata.paris.fr/explore/dataset/quartier_paris/download/?format=csv -O paris_neighborhoods.csv

print('CSV file downloaded.')

In [None]:
paris_neighborhoods = pd.read_csv('paris_neighborhoods.csv', sep=';')

print('The dataframe shape is {}.'.format(paris_neighborhoods.shape))

paris_neighborhoods.head()

In [None]:
project.save_data(file_name='paris_opendata.csv', data=paris_neighborhoods.to_csv(index=False), overwrite=True)

print('File paris_opendata.csv saved.')

### Cleaning the dataframe

Drop the columns we will not use

In [None]:
paris_neighborhoods.drop(['n_sq_qu', 'geom', 'n_sq_ar'], axis='columns', inplace=True)

print('The dataframe shape is {}.'.format(paris_neighborhoods.shape))
paris_neighborhoods.head()

Rename the columns

In [None]:
paris_neighborhoods.columns = ['NeighborhoodNumber', 'NeighborhoodCode', 'Neighborhood', 'DistrictNumber', 'Perimeter', 'Area', 'Coordinates']

print('The dataframe shape is {}.'.format(paris_neighborhoods.shape))
paris_neighborhoods.head()

Split the coordinates in 2 columns

In [None]:
paris_neighborhoods[['Latitude', 'Longitude']] = paris_neighborhoods.Coordinates.str.split(",", expand=True)
paris_neighborhoods.drop(['Coordinates'], axis='columns', inplace=True)

print('The dataframe shape is {}.'.format(paris_neighborhoods.shape))
paris_neighborhoods.head()

Convert area to square kilometers

In [None]:
paris_neighborhoods[['Area']] = paris_neighborhoods[['Area']]/1e6

print('The dataframe shape is {}.'.format(paris_neighborhoods.shape))
paris_neighborhoods.head()

Get more information from Wikipedia
Scrape the wikipedia page about the neighborhoods of Paris: https://en.wikipedia.org/wiki/Quarters_of_Paris

In [None]:
result = requests.get("https://en.wikipedia.org/wiki/Quarters_of_Paris")
soup = BeautifulSoup(result.content,'lxml')
table = soup.find_all('table')[0] 
paris_neighborhoods_wikipedia = pd.read_html(str(table))[0]

print('The dataframe shape is {}.'.format(paris_neighborhoods_wikipedia.shape))
paris_neighborhoods_wikipedia.head()

Drop the columns we will not use

In [None]:
paris_neighborhoods_wikipedia.drop(['Quartiers(Quarters).1', 'Area(hectares)[3]', 'Map'], axis='columns', inplace=True)

print('The dataframe shape is {}.'.format(paris_neighborhoods_wikipedia.shape))
paris_neighborhoods_wikipedia.head()

Rename the columns

In [None]:
paris_neighborhoods_wikipedia.columns = ['District', 'NeighborhoodNumber', 'Population']

print('The dataframe shape is {}.'.format(paris_neighborhoods_wikipedia.shape))
paris_neighborhoods_wikipedia.head()

Split the district official name (number of the "arrondissement") and the "also called" named (which is not commonly used in Paris)

In [None]:
paris_neighborhoods_wikipedia[['District', 'DistrictName']] = paris_neighborhoods_wikipedia.District.str.split("(",expand=True,)

print('The dataframe shape is {}.'.format(paris_neighborhoods_wikipedia.shape))
paris_neighborhoods_wikipedia.head()

Make the district name more beautiful

In [None]:
paris_neighborhoods_wikipedia[['DistrictName']] = paris_neighborhoods_wikipedia.DistrictName.str.replace('Called \"', 'Arrondissement ')
paris_neighborhoods_wikipedia[['DistrictName']] = paris_neighborhoods_wikipedia.DistrictName.str.replace("\"\)", '')

print('The dataframe shape is {}.'.format(paris_neighborhoods_wikipedia.shape))
paris_neighborhoods_wikipedia.head()

Remove letters from the neighborhood code

In [None]:
paris_neighborhoods_wikipedia[['NeighborhoodNumber']] = paris_neighborhoods_wikipedia.NeighborhoodNumber.str[:-2]
paris_neighborhoods_wikipedia = paris_neighborhoods_wikipedia.astype({'NeighborhoodNumber': int})

print('The dataframe shape is {}.'.format(paris_neighborhoods_wikipedia.shape))
paris_neighborhoods_wikipedia.head()

In [None]:
paris_neighborhoods = pd.merge(paris_neighborhoods, paris_neighborhoods_wikipedia, on='NeighborhoodNumber')

print('The dataframe shape is {}.'.format(paris_neighborhoods.shape))
paris_neighborhoods.head()

Add the postal code

In [None]:
paris_neighborhoods['PostalCode'] = '750' + paris_neighborhoods.District.str.split(' ').str[0].str[:-2].str.rjust(2, '0')

print('The dataframe shape is {}.'.format(paris_neighborhoods.shape))
paris_neighborhoods.head()

Reorganize columns

In [None]:
paris_neighborhoods = paris_neighborhoods[['NeighborhoodCode', 'NeighborhoodNumber', 'Neighborhood', 'DistrictNumber', 'District', 'DistrictName', 'PostalCode', 'Population', 'Area', 'Perimeter', 'Latitude', 'Longitude']]

paris_neighborhoods.head()

Check the type of each column

In [None]:
paris_neighborhoods.dtypes

Cast PostalCode and NeighborhoodCode columns to integer

In [None]:
paris_neighborhoods = paris_neighborhoods.astype({'PostalCode': int, 'Latitude': float, 'Longitude': float})

paris_neighborhoods.dtypes

In [None]:
project.save_data(file_name='paris_neighborhoods.csv', data=paris_neighborhoods.to_csv(index=False), overwrite=True)

print('File paris_neighborhoods.csv saved.')

## Some visual analysis of the neighborhoods

### Choropleth map

In [None]:
try:
    import folium
except:
    print('folium not installed, installing folium.')
    !conda install -c conda-forge shapely
    print('folium installed.')
    import folium
print('folium imported.')

print('All libraries imported.')

In [None]:
!wget --quiet https://opendata.paris.fr/explore/dataset/quartier_paris/download/?format=geojson -O paris_neighborhoods_geo.json

print('GeoJSON file downloaded.')

with open('paris_neighborhoods_geo.json') as geojson:
    paris_neighborhoods_geo = json.load(geojson)
project.save_data(file_name='paris_neighborhoods_geo.json', data=json.dumps(paris_neighborhoods_geo), overwrite=True)
print('File paris_neighborhoods_geo.json saved.')

In [None]:
paris_neighborhoods_geo = r'paris_neighborhoods_geo.json'

In [None]:
paris_map = folium.Map(location=PARIS_COORDINATES, zoom_start=13, tiles='OpenStreetMap')

choropleth = folium.Choropleth(
    geo_data=paris_neighborhoods_geo,
    data=paris_neighborhoods,
    columns=['NeighborhoodCode', 'Population'],
    key_on='feature.properties.c_quinsee',
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Neighborhoods of Paris by population in 1999'
).add_to(paris_map)

choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['l_qu'],labels=False)
).add_to(paris_map)

# display map
paris_map

### Scatter plot

In [None]:
import matplotlib.pyplot as plt
print('pyplot library imported.')

print('All libraries imported.')

In [None]:
scatter_plot_df = paris_neighborhoods[['Neighborhood', 'Population', 'Area']]

print('The dataframe shape is {}.'.format(scatter_plot_df.shape))
scatter_plot_df.head()

In [None]:
scatter_plot_df.plot(kind='scatter', x='Area', y='Population', figsize=(20, 12), color='darkblue')

plt.title('Population in 1999 compared to area for each neighborhood of Paris')
plt.xlabel('Area (km²)')
plt.ylabel('Population')

plt.show()

In [None]:
bar_chart_df = paris_neighborhoods[['Neighborhood', 'District', 'Population', 'Area']]
bar_chart_df.set_index(['District', 'Neighborhood'], inplace=True)

bar_chart_df['Density'] = bar_chart_df['Population'] / bar_chart_df['Area']

print('The dataframe shape is {}.'.format(bar_chart_df.shape))
bar_chart_df.head()

In [None]:
bar_chart_neighborhood_df = bar_chart_df[['Density']].sort_values(by=['Density'], ascending=True)

print('The dataframe shape is {}.'.format(bar_chart_neighborhood_df.shape))
bar_chart_neighborhood_df.head()

In [None]:
bar_chart_neighborhood_df.plot(kind='barh', figsize=(20, 24))

plt.xlabel('Neighborhood')
plt.ylabel('Density')
plt.title('Density of neighborhoods of Paris in 1999')
plt.legend().remove()

plt.show()

In [None]:
bar_chart_district_df = bar_chart_df.groupby('District').sum()
bar_chart_district_df['Density'] = bar_chart_district_df['Population'] / bar_chart_district_df['Area']
bar_chart_district_df = bar_chart_district_df[['Density']].sort_values(by=['Density'], ascending=True)

print('The dataframe shape is {}.'.format(bar_chart_district_df.shape))
bar_chart_district_df.head()

In [None]:
bar_chart_district_df.plot(kind='barh', figsize=(20, 6))

plt.xlabel('District')
plt.ylabel('Density')
plt.title('Density of districts of Paris in 1999')
plt.legend().remove()

plt.show()

## Analyse the localisation of seafood restaurants

In [None]:
def get_venues(latitude, longitude, category=FOURSQUARE_CATEGORY, radius=FOURSQUARE_RADIUS, limit=FOURSQUARE_LIMIT):
    url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}&intent=browse'.format(
            FOURSQUARE_CLIENT_ID, 
            FOURSQUARE_CLIENT_SECRET, 
            FOURSQUARE_VERSION, 
            latitude, 
            longitude,
            category,
            radius, 
            limit)
    
    results = requests.get(url).json()
    
    venues = []
    for venue in results['response']['venues']:
        try:
            id = venue['id']
            name = venue['name']
            latitude = venue['location']['lat']
            longitude = venue['location']['lng']
            venues.append([id, name, latitude, longitude])
        except KeyError:
            pass
    
    columns = ['id', 'Name', 'Latitude', 'Longitude']
    
    df = pd.DataFrame(venues,columns=columns)
    return df

In [None]:
venues = None

if FOURSQUARE_UPDATE:
    for id, neighborhood in paris_neighborhoods.iterrows():
        venues_neighborhood = get_venues(neighborhood['Latitude'], neighborhood['Longitude'])
        venues = pd.concat([venues, venues_neighborhood], ignore_index=True, sort=False)
    venues.drop_duplicates(subset='id', keep='first', inplace=True) 
    project.save_data(file_name='venues.csv', data=venues.to_csv(index=False), overwrite=True)
    print('File venues.csv saved.')
else:
    venues_file = project.get_file('venues.csv')
    venues = pd.read_csv(venues_file)
    print('File venues.csv loaded.')

print('The dataframe shape is {}.'.format(venues.shape))
venues.head()

In [None]:
venues.dtypes

In [None]:
venues.describe()

In [None]:
try:
    from shapely.geometry import Point, shape
except:
    print('shapely not installed, installing folium.')
    !conda install -c conda-forge shapely
    print('shapely installed.')
    from shapely.geometry import Point, shape
print('shapely.geometry imported.')

print('All libraries imported.')

In [None]:
seafood_restaurants_paris = pd.DataFrame(columns=['id', 'Name', 'Latitude', 'Longitude', 'NeighborhoodCode'])

paris_neighborhoods_geo_file = project.get_file('paris_neighborhoods_geo.json')
paris_neighborhoods_geo = pd.read_json(paris_neighborhoods_geo_file)
print('File paris_neighborhoods_geo.json loaded.')

for id, venue in venues.iterrows():
    venue_location = Point(venue['Longitude'], venue['Latitude'])

    for feature in paris_neighborhoods_geo['features']:
        neighborhood = shape(feature['geometry'])
        if venue_location.within(neighborhood):
            seafood_restaurants_paris = seafood_restaurants_paris.append({'id': venue['id'], 'Name': venue['Name'], 'Latitude': venue['Latitude'], 'Longitude': venue['Longitude'], 'NeighborhoodCode': feature['properties']['c_quinsee']}, ignore_index=True, sort=False)
            break

print('The dataframe shape is {}.'.format(seafood_restaurants_paris.shape))
seafood_restaurants_paris.head()

In [None]:
project.save_data(file_name='seafood_restaurants_paris.csv', data=seafood_restaurants_paris.to_csv(index=False), overwrite=True)
print('File seafood_restaurants_paris.csv saved.')

In [None]:
seafood_restaurants = folium.map.FeatureGroup()

for latitude, longitude, venueName in seafood_restaurants_paris[['Latitude', 'Longitude', 'Name']].values:
    seafood_restaurants.add_child(
        folium.CircleMarker(
            [latitude, longitude],
            radius=5,
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6,
            popup=venueName
        )
    )
    
paris_map.add_child(seafood_restaurants)

paris_map

In [None]:
bar_chart_df = pd.merge(paris_neighborhoods[['NeighborhoodCode', 'Neighborhood', 'District',]], seafood_restaurants_paris[['NeighborhoodCode', 'id']], on='NeighborhoodCode', how='outer')
bar_chart_df.drop(['NeighborhoodCode'], axis='columns', inplace=True)
bar_chart_df.set_index(['District', 'Neighborhood'], inplace=True)

print('The dataframe shape is {}.'.format(bar_chart_df.shape))
bar_chart_df.head()

In [None]:
bar_chart_neighborhood_df = bar_chart_df.groupby(['District', 'Neighborhood']).count()
bar_chart_neighborhood_df.rename(columns={'id': 'VenueCount'}, inplace=True)
bar_chart_neighborhood_df = bar_chart_neighborhood_df.sort_values(by=['VenueCount'], ascending=True)

print('The dataframe shape is {}.'.format(bar_chart_neighborhood_df.shape))
bar_chart_neighborhood_df.head()

In [None]:
bar_chart_neighborhood_df.plot(kind='barh', figsize=(20, 24))

plt.xlabel('Number of seafood restaurants')
plt.ylabel('Neighborhood')
plt.title('Number of seafood restaurants per neighborhood')
plt.legend().remove()

plt.show()

In [None]:
bar_chart_neighborhood_df2 = pd.merge(bar_chart_neighborhood_df, paris_neighborhoods, on='Neighborhood')
bar_chart_neighborhood_df2 = bar_chart_neighborhood_df2[['Neighborhood', 'VenueCount', 'District', 'Population']]
bar_chart_neighborhood_df2['VenuePerMillionInhabitants'] = bar_chart_neighborhood_df2['VenueCount'] / bar_chart_neighborhood_df2['Population'] * 1e6
bar_chart_neighborhood_df2 = bar_chart_neighborhood_df2[['Neighborhood', 'VenuePerMillionInhabitants', 'District']]
bar_chart_neighborhood_df2.set_index(['District', 'Neighborhood'], inplace=True)
bar_chart_neighborhood_df2 = bar_chart_neighborhood_df2.sort_values(by=['VenuePerMillionInhabitants'], ascending=True)

print('The dataframe shape is {}.'.format(bar_chart_neighborhood_df2.shape))
bar_chart_neighborhood_df2.head()

In [None]:
bar_chart_neighborhood_df2.plot(kind='barh', figsize=(20, 24))

plt.xlabel('Number of seafood restaurants per million inhabitants')
plt.ylabel('Neighborhood')
plt.title('Number of seafood restaurants per million inhabitants per neighborhood')
plt.legend().remove()

plt.show()

In [None]:
bar_chart_district_df = bar_chart_df.groupby(['District']).count()
bar_chart_district_df.rename(columns={'id': 'VenueCount'}, inplace=True)
bar_chart_district_df = bar_chart_district_df.sort_values(by=['VenueCount'], ascending=True)

print('The dataframe shape is {}.'.format(bar_chart_district_df.shape))
bar_chart_district_df.head()

In [None]:
bar_chart_district_df.plot(kind='barh', figsize=(20, 6))

plt.xlabel('Number of seafood restaurants')
plt.ylabel('District')
plt.title('Number of seafood restaurants per district')
plt.legend().remove()

plt.show()

In [None]:
bar_chart_district_df2 = pd.merge(bar_chart_neighborhood_df, paris_neighborhoods, on='Neighborhood')
bar_chart_district_df2 = bar_chart_district_df2[['VenueCount', 'District', 'Population']]
bar_chart_district_df2 = bar_chart_district_df2.groupby(['District']).sum()
bar_chart_district_df2['VenuePerMillionInhabitants'] = bar_chart_district_df2['VenueCount'] / bar_chart_district_df2['Population'] * 1e6
bar_chart_district_df2 = bar_chart_district_df2[['VenuePerMillionInhabitants']]
bar_chart_district_df2 = bar_chart_district_df2.sort_values(by=['VenuePerMillionInhabitants'], ascending=True)

print('The dataframe shape is {}.'.format(bar_chart_district_df2.shape))
bar_chart_district_df2.head()

In [None]:
bar_chart_district_df2.plot(kind='barh', figsize=(20, 6))

plt.xlabel('Number of seafood restaurants per million inhabitants')
plt.ylabel('District')
plt.title('Number of seafood restaurants per million inhabitants per district')
plt.legend().remove()

plt.show()

## More details regarding seafood restaurants

In [None]:
def get_venue_details(venue_id, category=FOURSQUARE_CATEGORY):
    url = 'https://api.foursquare.com/v2/venues/{}?&client_id={}&client_secret={}&v={}'.format(
            venue_id,
            FOURSQUARE_CLIENT_ID, 
            FOURSQUARE_CLIENT_SECRET, 
            FOURSQUARE_VERSION)

    results = requests.get(url).json()
    venue_data = results['response']['venue']
    
    try:
        name = venue_data['name']
    except KeyError:
        name = np.nan
    
    try:
        price = venue_data['price']['tier']
    except KeyError:
        price = np.nan
    
    try:
        price_description = venue_data['price']['message']
    except KeyError:
        price_description = np.nan
    
    try:
        rating = venue_data['rating']
    except KeyError:
        rating = np.nan
        
    try:
        likes = venue_data['likes']['count']
    except KeyError:
        likes = np.nan
    
    try:
        tips_count = venue_data['tips']['count']
    except KeyError:
        tips_count = np.nan
    
    try:
        if category is not None:
            is_primary_category = False
            for venue_category in venue_data['categories']:
                if venue_category['id'] == category and venue_category['primary'] is True:
                    is_primary_category = True
        else:
            is_primary_category = np.nan;
    except KeyError:
        is_primary_category = np.nan
        
    venue = {'id': venue_id, 'Name': name, 'Price': price, 'PriceDescription': price_description, 'Rating': rating, 'Likes': likes, 'TipsCount': tips_count, 'IsPrimaryCategory': is_primary_category}

    return venue

In [None]:
seafood_restaurants_paris_details = pd.DataFrame(columns=['id', 'Name', 'Latitude', 'Longitude', 'Price', 'PriceDescription', 'Rating', 'Likes', 'TipsCount', 'IsPrimaryCategory', 'NeighborhoodCode'])

seafood_restaurants_paris_file = project.get_file('seafood_restaurants_paris.csv')
seafood_restaurants_paris = pd.read_csv(seafood_restaurants_paris_file)
print('File seafood_restaurants_paris.csv loaded.')

if FOURSQUARE_UPDATE:
    for restaurant_id, restaurant in seafood_restaurants_paris.iterrows():
        venue_details = get_venue_details(restaurant['id'])
        seafood_restaurants_paris_details = seafood_restaurants_paris_details.append({'id': restaurant['id'], 'Name': venue_details['Name'], 'Latitude': restaurant['Latitude'], 'Longitude': restaurant['Longitude'], 'Price': venue_details['Price'], 'PriceDescription': venue_details['PriceDescription'], 'Rating': venue_details['Rating'], 'Likes': venue_details['Likes'], 'TipsCount': venue_details['TipsCount'], 'IsPrimaryCategory': venue_details['IsPrimaryCategory'], 'NeighborhoodCode': restaurant['NeighborhoodCode']}, ignore_index=True, sort=False)
        
    project.save_data(file_name='seafood_restaurants_paris_details.csv', data=seafood_restaurants_paris_details.to_csv(index=False), overwrite=True)
    print('File seafood_restaurants_paris_details.csv saved.')
else:
    seafood_restaurants_paris_details_file = project.get_file('seafood_restaurants_paris_details.csv')
    seafood_restaurants_paris_details = pd.read_csv(seafood_restaurants_paris_details_file)
    print('File seafood_restaurants_paris_details.csv loaded.')

print('The dataframe shape is {}.'.format(seafood_restaurants_paris_details.shape))
seafood_restaurants_paris_details.head()