# Final Project Capstone : Comparing Cities - Data

## Data Introduction

### Library importation

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### New York Neighborhood and geolocalisation data

In [2]:
new_york_neighborhoods=pd.read_csv('new_york.csv')
new_york_neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [3]:
print('New york has {} boroughs and {} neighborhoods.'.format(
        len(new_york_neighborhoods['Borough'].unique()),
        new_york_neighborhoods.shape[0]
    )
)

New york has 5 boroughs and 306 neighborhoods.


### Toronto Neighborhood and geolocalisation Data

In [4]:
toronto_neighborhoods=pd.read_csv('toronto.csv')
toronto_neighborhoods.head()

Unnamed: 0,Postal_Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [5]:
print('Toronto has {} boroughs and {} neighborhoods.'.format(
        len(toronto_neighborhoods['Borough'].unique()),
        toronto_neighborhoods.shape[0]
    )
)

Toronto has 4 boroughs and 39 neighborhoods.


### Paris Neighborhood and geolocalisation data 

In [6]:
with open('quartier_paris.json') as json_data:
    paris_data_prep = json.load(json_data)

In [8]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
paris_neighborhoods = pd.DataFrame(columns=column_names)

for el in paris_data_prep:
    paris_neighborhoods=paris_neighborhoods.append({
        'Borough':el['fields']['c_ar'],
        'Neighborhood':el['fields']['l_qu'],
        'Latitude':el['fields']['geom_x_y'][0],
        'Longitude':el['fields']['geom_x_y'][1]

    }, ignore_index=True)

In [9]:
paris_neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,9,Rochechouart,48.879812,2.344861
1,12,Bercy,48.835209,2.38621
2,1,Halles,48.862289,2.344899
3,4,Arsenal,48.851585,2.364768
4,5,Jardin-des-Plantes,48.84194,2.356894


In [10]:
print('Paris has {} boroughs and {} neighborhoods.'.format(
        len(paris_neighborhoods['Borough'].unique()),
        paris_neighborhoods.shape[0]
    )
)

Paris has 20 boroughs and 80 neighborhoods.


## Introducing Venues on data

In this part, we will add to our datasets Venues data collected from FourSquare. for each eighborhood we will explore it and get all the venus their.
We will explore data and prepare it for clustering.

### Get NearbyVenues

In [11]:
def getNearbyVenues(data, radius=500,LIMIT=1000):
    names=data['Neighborhood']
    latitudes=data['Latitude']
    longitudes=data['Longitude']
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [12]:
CLIENT_ID = '0JN40RP4PTX2RPXDYLY1FJMFT4NT1YWJA3PGYGJ13DAUAPZ2' # your Foursquare ID
CLIENT_SECRET = 'NFQZAIV11AYTNZILJXHP2MBWWUVKRJLE34RW02ZZ3FZTXJDK' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 0JN40RP4PTX2RPXDYLY1FJMFT4NT1YWJA3PGYGJ13DAUAPZ2
CLIENT_SECRET:NFQZAIV11AYTNZILJXHP2MBWWUVKRJLE34RW02ZZ3FZTXJDK


In [13]:
new_york_venues = getNearbyVenues(new_york_neighborhoods)
new_york_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Lollipops Gelato,40.894123,-73.845892,Dessert Shop
1,Wakefield,40.894705,-73.847201,Carvel Ice Cream,40.890487,-73.848568,Ice Cream Shop
2,Wakefield,40.894705,-73.847201,Walgreens,40.896528,-73.8447,Pharmacy
3,Wakefield,40.894705,-73.847201,Rite Aid,40.896649,-73.844846,Pharmacy
4,Wakefield,40.894705,-73.847201,Dunkin',40.890459,-73.849089,Donut Shop


In [14]:
paris_venues = getNearbyVenues(paris_neighborhoods)
paris_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rochechouart,48.879812,2.344861,Mamiche,48.880112,2.343699,Bakery
1,Rochechouart,48.879812,2.344861,Pizza di Loretta,48.880634,2.344011,Pizza Place
2,Rochechouart,48.879812,2.344861,Les 36 Corneil,48.878997,2.345501,Wine Bar
3,Rochechouart,48.879812,2.344861,La Ferme Saint Hubert,48.878908,2.345428,Cheese Shop
4,Rochechouart,48.879812,2.344861,Le Barbe à Papa,48.879654,2.347438,French Restaurant


In [15]:
toronto_venues = getNearbyVenues(toronto_neighborhoods)
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park , Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park , Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park , Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,"Regent Park , Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,"Regent Park , Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [16]:
print('There are {} uniques categories in new york city.'.format(len(new_york_venues['Venue Category'].unique())))

There are 429 uniques categories in new york city.


In [17]:
print('There are {} uniques categories in paris city.'.format(len(paris_venues['Venue Category'].unique())))

There are 295 uniques categories in paris city.


In [18]:
print('There are {} uniques categories in toronto city.'.format(len(toronto_venues['Venue Category'].unique())))

There are 232 uniques categories in toronto city.
