<a href="https://colab.research.google.com/github/Enell261/Coursera_Capstone/blob/main/Clustering_NY_neighbourhoods.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The idea is to convert addresses to their coordinates and use the Foursquare API to get the most interesting venues in each neighbourhood, then use these places to cluster similar neighborhoods

In [None]:
#import the relevant dependencies

import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [None]:
#Download the data from IBM

!wget -q -O 'newyork_data.json' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs/newyork_data.json
print('Data downloaded!')

Data downloaded!


In [None]:
# open the json file
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

newyork_data 

{'bbox': [-74.2492599487305,
  40.5033187866211,
  -73.7061614990234,
  40.9105606079102],
 'crs': {'properties': {'name': 'urn:ogc:def:crs:EPSG::4326'}, 'type': 'name'},
 'features': [{'geometry': {'coordinates': [-73.84720052054902,
     40.89470517661],
    'type': 'Point'},
   'geometry_name': 'geom',
   'id': 'nyu_2451_34572.1',
   'properties': {'annoangle': 0.0,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661],
    'borough': 'Bronx',
    'name': 'Wakefield',
    'stacked': 1},
   'type': 'Feature'},
  {'geometry': {'coordinates': [-73.82993910812398, 40.87429419303012],
    'type': 'Point'},
   'geometry_name': 'geom',
   'id': 'nyu_2451_34572.2',
   'properties': {'annoangle': 0.0,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.874294193

In [None]:
# The data we are interested in is under the 'Features' key

neighbourhood_data = newyork_data['features']


In [None]:
neighbourhood_data[0]

{'geometry': {'coordinates': [-73.84720052054902, 40.89470517661],
  'type': 'Point'},
 'geometry_name': 'geom',
 'id': 'nyu_2451_34572.1',
 'properties': {'annoangle': 0.0,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661],
  'borough': 'Bronx',
  'name': 'Wakefield',
  'stacked': 1},
 'type': 'Feature'}

Transform the json neighborhood data to a pandas dataframe

In [None]:
# Define the column names for the pandas dataframe

columns = ['Borough', 'Neighborhood', 'Latitude', 'Longitude']

# instantiate the dataframe
neighbourhoods = pd.DataFrame(columns=columns)

In [None]:
# loop through the data and populate the dataframe

for data in neighbourhood_data:
  borough = neighbourhood_name = data['properties']['borough']
  neighbourhood_name = data['properties']['name']

  lati_long = data['geometry']['coordinates']
  latitude = lati_long[1]
  longitude = lati_long[0]

  neighbourhoods = neighbourhoods.append({'Borough':borough,
                                          'Neighbourhood':neighbourhood_name,
                                          'Latitude': latitude,
                                          'Longitude':longitude}, ignore_index=True)

In [None]:
neighbourhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Neighbourhood
0,Bronx,,40.894705,-73.847201,Wakefield
1,Bronx,,40.874294,-73.829939,Co-op City
2,Bronx,,40.887556,-73.827806,Eastchester
3,Bronx,,40.895437,-73.905643,Fieldston
4,Bronx,,40.890834,-73.912585,Riverdale
5,Bronx,,40.881687,-73.902818,Kingsbridge
6,Manhattan,,40.876551,-73.91066,Marble Hill
7,Bronx,,40.898273,-73.867315,Woodlawn
8,Bronx,,40.877224,-73.879391,Norwood
9,Bronx,,40.881039,-73.857446,Williamsbridge


Use Geopy to get the coordinates of New York City

In [None]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent='ny_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude


Create a map on NY with the nighbourhoods superimposed


In [None]:
map_ny = folium.Map(location = [latitude, longitude], zoom_start=10)

#add markers to the map
for lat, lng, borough, neigh in zip(neighbourhoods['Latitude'], neighbourhoods['Longitude'], neighbourhoods['Borough'], neighbourhoods['Neighbourhood']):
  label = '{},{}'.format(neigh, borough)
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker([lat,lng],
                      radius = 5,
                      popup=label,
                      color = 'red',
                      fill = False,
                      fill_color='#3186cc',
                      parse_html=False
                      ).add_to(map_ny)

map_ny

For simplicity, we will focus on Manhattan

In [None]:
# get data corresponding to Manhattan

manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Neighbourhood
0,Manhattan,,40.876551,-73.91066,Marble Hill
1,Manhattan,,40.715618,-73.994279,Chinatown
2,Manhattan,,40.851903,-73.9369,Washington Heights
3,Manhattan,,40.867684,-73.92121,Inwood
4,Manhattan,,40.823604,-73.949688,Hamilton Heights
5,Manhattan,,40.816934,-73.957385,Manhattanville
6,Manhattan,,40.815976,-73.943211,Central Harlem
7,Manhattan,,40.792249,-73.944182,East Harlem
8,Manhattan,,40.775639,-73.960508,Upper East Side
9,Manhattan,,40.77593,-73.947118,Yorkville


In [None]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7896239, -73.9598939.


In [None]:
# create map of Manhattan using latitude and longitude values
map_manhattan = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan) 

In [None]:
map_manhattan

Use Foursquare API to access places in Manhatan

In [None]:
CLIENT_ID = 'PJX3PJ4FIED23NYXJEMYAHHJH0S4UHKZRL214FEKVLMT13FW' # my Foursquare ID
CLIENT_SECRET = 'HHUGJ5DXTGCJ35ORN5DJXXCM5ZPS42ESFGJ4TX2ICJULGLAX' # my Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PJX3PJ4FIED23NYXJEMYAHHJH0S4UHKZRL214FEKVLMT13FW
CLIENT_SECRET:HHUGJ5DXTGCJ35ORN5DJXXCM5ZPS42ESFGJ4TX2ICJULGLAX


In [None]:
# get url for Manhattan from Foursquare

LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=PJX3PJ4FIED23NYXJEMYAHHJH0S4UHKZRL214FEKVLMT13FW&client_secret=HHUGJ5DXTGCJ35ORN5DJXXCM5ZPS42ESFGJ4TX2ICJULGLAX&v=20180605&ll=40.7896239,-73.9598939&radius=500&limit=100'

In [None]:
# Get the json file for the Manhattan data from Foursquare

results = requests.get(url).json()

In [None]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [None]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Central Park Tennis Center,Tennis Court,40.789313,-73.961862
1,East Meadow,Field,40.79016,-73.955498
2,North Meadow Recreation Center,Recreation Center,40.791216,-73.959661
3,Oldest Tree in Central Park,Park,40.789188,-73.957867
4,Central Park - 96th Street Playground,Playground,40.787813,-73.956257


In [None]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

31 venues were returned by Foursquare.


Explore the neighbourhood

In [None]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)