# Comparing Neighborhoods of New York City and Toronto
## Introduction

Is there a way to assess a neighborhood in one city that is analagous to a neighborhood in another city? And if so, to which accuracy? Building such a model could be useful to real estate investors whom cannot travel but would still like to assess the neighborhood.

## Data

I will use the new york data we used in week 3 and the toronto dataset in week 4. 

In [183]:
import pandas as pd
import numpy as np
import json
import requests
from sklearn.cluster import KMeans
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from geopy.geocoders import Nominatim

In [184]:
# Toronto
toronto = pd.read_csv("Geospatial_Coordinates.csv")

# New York
newyork = pd.read_excel("new_york.xlsx", index_col = [0])
manhattan = newyork[newyork.Borough == "Manhattan"]

In [185]:
manhattan.shape

(40, 4)

### Foursquare login

In [186]:
CLIENT_ID = '***' # your Foursquare ID
CLIENT_SECRET = '***' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

In [191]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Get venue data

In [188]:
# Get all venues in manhatten
manhattan_venues = getNearbyVenues(names=manhattan['Neighborhood'],
                                   latitudes=manhattan['Latitude'],
                                   longitudes=manhattan['Longitude']
                                  )

# One hot encoding
manhattan_onehot = pd.get_dummies(manhattan_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = manhattan_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

# Group by neighborhood
manhattan_grouped = manhattan_onehot.groupby(by = "Neighborhood").mean().reset_index()

KeyError: 'groups'

In [54]:
# Get all venues in Toronto
toronto_venues = getNearbyVenues(names = toronto['Postal Code'],
                                   latitudes = toronto['Latitude'],
                                   longitudes = toronto['Longitude']
                                  )

# One hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

# Group by neighborhood
toronto_grouped = toronto_onehot.groupby(by = "Neighborhood").mean().reset_index()

## Clustering

In [72]:
# set number of clusters
k = 5
manhattan_grouped = manhattan_onehot.groupby(by = "Neighborhood").mean().reset_index()
manhattan_grouped_ = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters = k, random_state = 0).fit(manhattan_grouped_)

# add clustering labels
manhattan_grouped.insert(0, 'Cluster Labels', kmeans.labels_)

manhattan_merged = manhattan

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
manhattan_merged = manhattan_merged.join(manhattan_grouped.set_index('Neighborhood'),
                                         on='Neighborhood')

In [79]:
manhattan_merged.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,...,Video Store,Vietnamese Restaurant,Volleyball Court,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
6,Manhattan,Marble Hill,40.876551,-73.91066,1,0.0,0.0,0.0,0.0,0.041667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667
100,Manhattan,Chinatown,40.715618,-73.994279,0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
101,Manhattan,Washington Heights,40.851903,-73.9369,0,0.012346,0.0,0.0,0.0,0.012346,...,0.0,0.0,0.0,0.0,0.0,0.012346,0.024691,0.0,0.012346,0.0
102,Manhattan,Inwood,40.867684,-73.92121,3,0.0,0.0,0.0,0.0,0.018868,...,0.0,0.0,0.0,0.0,0.0,0.037736,0.018868,0.0,0.0,0.018868
103,Manhattan,Hamilton Heights,40.823604,-73.949688,3,0.0,0.015873,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.031746


In [80]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [119]:
# Merging Toronto and New York dataset
_ = newyork.drop(columns = "Borough")
_["City"] = "Manhattan"
tor = pd.read_excel("toronto.xlsx", index_col = [0])
tor.drop(columns = ["PostalCode", "Borough"], inplace = True)
tor.rename(columns = {"Neighbourhood":"Neighborhood"}, inplace = True)
tor["City"] = "Toronto"
torman = pd.concat([_, tor])

In [98]:
# Get all venues in manhatten
torman_venues = getNearbyVenues(names = torman['Neighborhood'],
                                   latitudes = torman['Latitude'],
                                   longitudes = torman['Longitude']
                                  )

# One hot encoding
torman_onehot = pd.get_dummies(torman_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
torman_onehot['Neighborhood'] = torman_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [torman_onehot.columns[-1]] + list(torman_onehot.columns[:-1])
torman_onehot = torman_onehot[fixed_columns]

# Group by neighborhood
torman_grouped = torman_onehot.groupby(by = "Neighborhood").mean().reset_index()

In [122]:
# set number of clusters
k = 10
torman_grouped_ = torman_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters = k, random_state = 0).fit(torman_grouped_)

# add clustering labels
#torman_grouped.insert(0, 'Cluster Labels', kmeans.labels_)

torman_merged = torman

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
torman_merged = torman_merged.join(torman_grouped.set_index('Neighborhood'),
                                         on='Neighborhood')

#### I want to find similar neighborhoods to Somerville, Manhattan in Toronto where the number of stores for women is a key indicator:

In [171]:
def find_similar_nbs(neighborhood, city, sort_values_by):
    cluster = torman_merged[torman_merged.loc[:, "Neighborhood"] == neighborhood]["Cluster Labels"].item()
    not_city =  torman_merged[torman_merged.loc[:, "City"] != city]
    similars = not_city.loc[not_city.loc[:, "Cluster Labels"] == cluster].reset_index(drop = True)
    similars_sorted = similars.sort_values(by = "Yoga Studio")
    return similars_sorted, similars_sorted.Neighborhood

In [175]:
map_similar = find_similar_nbs("Somerville", "Manhattan", "Women's Store")[0]
find_similar_nbs("Somerville", "Manhattan", "Women's Store")[1]

0                                  Caledonia-Fairbanks
1        East Toronto, Broadview North (Old East York)
2                              Willowdale, Newtonbrook
3                                        Lawrence Park
4                                               Weston
5                                      York Mills West
6    Milliken, Agincourt North, Steeles East, L'Amo...
Name: Neighborhood, dtype: object

In [180]:
# Create map of New York using latitude and longitude values
map_toronto_similar = folium.Map(location=[map_similar.Latitude[0], map_similar.Longitude[0]], zoom_start = 10)

# Add markers to map
for lat, lng, neighborhood in zip(map_similar['Latitude'],
                                   map_similar['Longitude'],
                                   map_similar['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_similar)  
    
map_toronto_similar