# Neighborhoods of both New York city and Toronto.

In [2]:
# Load primary libraries
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
import json # library to handle JSON files

# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge beautifulsoup4 --yes # get BeautifulSoup package
from bs4 import BeautifulSoup # to parse html file

from pathlib import Path #To check if file exists

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.11

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          90 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.20.0-py_0 conda-forge


Downloading and Extracting Packages
geopy-1.20.0         | 57 KB     | ##################################### | 100% 
geographiclib-1.49   | 32 KB     | ##

# D1. Define coordinates for the cities of New york city and Toronto

In [9]:
geolocator = Nominatim(user_agent="on_explorer")

n_address = 'New York City, NY'
n_location = geolocator.geocode(n_address)
n_latitude = n_location.latitude
n_longitude = n_location.longitude

t_address = 'Toronto, ON'
t_location = geolocator.geocode(t_address)
t_latitude = t_location.latitude
t_longitude = t_location.longitude

# create map of New York city using latitude and longitude values
map_ny = folium.Map(location=[n_latitude, n_longitude], zoom_start=10)
#map_ny

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[t_latitude, t_longitude], zoom_start=10)
#map_toronto


# D2a. Get Borough and neighborhood data for New york city

In [65]:
# Get from online : New york data
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset

# Get data from json file
with open('newyork_data.json') as json_data:
    ny_data = json.load(json_data)

# Get list of neighborhood from feature tag
ny_neighborhoods_data = ny_data['features']

# define the dataframe columns
ny_column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
ny_neighborhoods = pd.DataFrame(columns=ny_column_names)

for data in ny_neighborhoods_data:
    ny_borough = ny_neighborhood_name = data['properties']['borough'] 
    ny_neighborhood_name = data['properties']['name']
        
    ny_neighborhood_latlon = data['geometry']['coordinates']
    ny_neighborhood_lat = ny_neighborhood_latlon[1]
    ny_neighborhood_lon = ny_neighborhood_latlon[0]
    
    ny_neighborhoods = ny_neighborhoods.append({'Borough': ny_borough,
                                          'Neighborhood': ny_neighborhood_name,
                                          'Latitude': ny_neighborhood_lat,
                                          'Longitude': ny_neighborhood_lon}, ignore_index=True)
ny_neighborhoods.head()


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


# D2b. Get Borough and neighborhood data for Toronto

In [66]:
# get data from wikipedia for all postal codes located with Toronto city
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

# convert to beautiful soup 
soup = BeautifulSoup(source, 'html.parser')

# this method return the index when element is found and return -1 when element is not found
def find_in_list(lst, value):
    try:
        return lst.index(value)
    except:
        return -1

# find the table in the page
table = soup.find('table', class_="wikitable sortable")

# define lists to store values in each table column
pc=[] # list for postcode
bh=[] # list for borough
nd=[] # list for neighborhood

# loop through each row in the wiki table
for row in table.find_all("tr"):
    try:
        # get the column values in each row
        col = row.find_all("td")
        
        # rule #1: Ignore cells with a borough that is Not assigned.
        if col[1].text != "Not assigned" :
            # rule #2: when multiple neighborhood can exist in one postalcode area, 
            #          combine neignbordhood with commas
            idx = find_in_list(pc, col[0].text)
            #print (idx)
            if idx > 0:
                nd[idx] = nd[idx] + "," + col[2].text.replace('\n','') # clean column value
                
            else:
                pc.append(col[0].text)
                bh.append(col[1].text)
                # rule #3: If a cell has a borough but a 'Not assigned' neighborhood, 
                #          then the neighborhood will be the same as the borough.
                if col[2].text.replace('\n','') == "Not assigned":
                    nd.append(col[1].text)
                else:
                    nd.append(col[2].text.replace('\n','')) # clean column value
            
    except Exception as e:
        pass

# define column names    
column=['PostalCode', 'Borough', 'Neighborhood']

# create a dataframe from the data
tto=pd.DataFrame({column[0]:pc,column[1]:bh, column[2]:nd})

# display some rows in the dataframe
#print(tto.head())

# save dataframe to csv file
tto.to_csv('temp_toronto_postcodes_borough_neighborhood.csv',index=None)

# coordinates dataframe 
coordDf = pd.read_csv('Toronto_Geospatial_Coordinates.csv')
#coordDf.head()

# Rename the columns
coordDf.rename(columns={"Postal Code": "PostalCode"}, inplace=True) ## 
#coordDf.head()

# Merge the two dataframes
t_neighborhoods = pd.merge(tto, coordDf, on='PostalCode' )
t_neighborhoods.drop(['PostalCode'], axis=1,inplace=True)
t_neighborhoods.head()


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,North York,Parkwoods,43.753259,-79.329656
1,North York,Victoria Village,43.725882,-79.315572
2,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
3,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,Queen's Park,Queen's Park,43.662301,-79.389494


# M1a. Create a map of New York city with neighborhoods superimposed on top.

In [34]:
# create map of New York city using latitude and longitude values
map_ny = folium.Map(location=[n_latitude, n_longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(ny_neighborhoods['Latitude'], ny_neighborhoods['Longitude'], ny_neighborhoods['Borough'], ny_neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_ny)  
    
map_ny

# M2a. Create a map of Toronto with neighborhoods superimposed on top.

In [35]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[t_latitude, t_longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(t_neighborhoods['Latitude'], t_neighborhoods['Longitude'], t_neighborhoods['Borough'], t_neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# D3. Use Foursquare and acquire data for the following features.
#### a> Pizza Place (Food)
#### b> Coffee Place
#### c> Parks / Trails
#### d> Nightlife
#### e> Monument / Landmark

In [67]:
CLIENT_ID = 'XGMCX5BO5EASJ1YG2ZYTMQJKLJJCK0HKLA1TNGVPYBWUHP2R' # your Foursquare ID
CLIENT_SECRET = 'QCMB52KBXKPAQUAOPJP44M4H1LQUSPMAZVJCFRHYF1JHH4WL'  # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

#print('Your credentails:')
#print('CLIENT_ID: ' + CLIENT_ID)
#print('CLIENT_SECRET:' + CLIENT_SECRET)


In [140]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# function to repeat the same process to all the neighborhoods
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
        LIMIT=100
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        #break

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

# function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# For each city generate the dataframe with features
def get_city_venues(csvName, nhoods, lats, longs):
    
    # save dataframe to csv file
    my_file = Path(csvName)
    if not my_file.is_file():
        city_venues = getNearbyVenues(names=nhoods,
                               latitudes=lats,
                               longitudes=longs
                              )
        city_venues.to_csv(csvName,index=None)
    
    # read data from file
    city_venues = pd.read_csv(csvName)
    
    #Let's check the size of the resulting dataframe
    #print(city_venues.shape)
    #city_venues.head()
    print('There are {} uniques categories.'.format(len(city_venues['Venue Category'].unique())))
    
    # Analyze Each Neighborhood
    # one hot encoding
    city_onehot = pd.get_dummies(city_venues[['Venue Category']], prefix="", prefix_sep="")

    # add neighborhood column back to dataframe
    city_onehot['Neighborhood'] = city_venues['Neighborhood'] 

    # move neighborhood column to the first column
    fixed_columns = [city_onehot.columns[-1]] + list(city_onehot.columns[:-1])
    city_onehot = city_onehot[fixed_columns]

    #city_onehot.head()
    
    #let's examine the new dataframe size
    city_onehot.shape
    
    # Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
    city_grouped = city_onehot.groupby('Neighborhood').mean().reset_index()
    #city_grouped
    
    # Let's confirm the new size
    city_grouped.shape
    return city_grouped

def get_nHood_Venue_Sorted(city_grouped):
    # Let's print each neighborhood along with the top 5 most common venues
    num_top_venues = 4

    for hood in city_grouped['Neighborhood']:
        #print("----"+hood+"----")
        temp = city_grouped[city_grouped['Neighborhood'] == hood].T.reset_index()
        temp.columns = ['venue','freq']
        temp = temp.iloc[1:]
        temp['freq'] = temp['freq'].astype(float)
        temp = temp.round({'freq': 2})
        #print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
        #print('\n')
    
        #Let's put that into a pandas dataframe
        
    # Now let's create the new dataframe and display the top 10 venues for each neighborhood.
    num_top_venues = 10
    indicators = ['st', 'nd', 'rd']
    
    # create columns according to number of top venues
    columns = ['Neighborhood']
    for ind in np.arange(num_top_venues):
        try:
            columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
        except:
            columns.append('{}th Most Common Venue'.format(ind+1))

    # create a new dataframe
    neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
    neighborhoods_venues_sorted['Neighborhood'] = city_grouped['Neighborhood']

    for ind in np.arange(city_grouped.shape[0]):
        neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(city_grouped.iloc[ind, :], num_top_venues)

    #neighborhoods_venues_sorted.head()
    return neighborhoods_venues_sorted


# D4a. Get venues data for New York city

In [141]:
ny_grouped = get_city_venues('ny_raw_foursquareData.csv', ny_neighborhoods['Neighborhood'], ny_neighborhoods['Latitude'], ny_neighborhoods['Longitude'])
ny_neighborhoods_venues_sorted = get_nHood_Venue_Sorted(ny_grouped)
ny_neighborhoods_venues_sorted.head()

There are 427 uniques categories.


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allerton,Pizza Place,Deli / Bodega,Spa,Supermarket,Dessert Shop,Bakery,Martial Arts Dojo,Intersection,Gas Station,Spanish Restaurant
1,Annadale,Dance Studio,Food,Pizza Place,Restaurant,Bakery,Sports Bar,American Restaurant,Train Station,Park,Diner
2,Arden Heights,Coffee Shop,Bus Stop,Pizza Place,Pharmacy,Women's Store,Fish & Chips Shop,Eye Doctor,Factory,Falafel Restaurant,Farm
3,Arlington,Grocery Store,Deli / Bodega,Intersection,American Restaurant,Coffee Shop,Bus Stop,Women's Store,Falafel Restaurant,Farm,Farmers Market
4,Arrochar,Deli / Bodega,Italian Restaurant,Bus Stop,Bagel Shop,Polish Restaurant,Pizza Place,Sandwich Place,Athletics & Sports,Mediterranean Restaurant,Hotel


# D4b. Get venues data for Toronto

In [142]:
t_grouped = get_city_venues('toronto_raw_foursquareData.csv', t_neighborhoods['Neighborhood'], t_neighborhoods['Latitude'], t_neighborhoods['Longitude'])
t_neighborhoods_venues_sorted = get_nHood_Venue_Sorted(t_grouped)
t_neighborhoods_venues_sorted.head()

There are 274 uniques categories.


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Thai Restaurant,Steakhouse,Bar,Hotel,Asian Restaurant,Gym,American Restaurant,Breakfast Spot
1,Agincourt,Lounge,Breakfast Spot,Skating Rink,Clothing Store,Drugstore,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Park,Playground,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Drugstore
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Beer Store,Fried Chicken Joint,Fast Food Restaurant,Pharmacy,Coffee Shop,Pizza Place,Sandwich Place,Doner Restaurant,Diner
4,"Alderwood,Long Branch",Pizza Place,Coffee Shop,Skating Rink,Sandwich Place,Pub,Dance Studio,Pharmacy,Gym,Comic Shop,Electronics Store


# D5. Cluster Neighborhoods

In [143]:
# set number of clusters
kclusters = 4

# function take city dataset and return cluster map
def get_Clusters(city_grouped,neighborhoods_venues_sorted, neighborhoods, latitude,longitude):

    city_grouped_clustering = city_grouped.drop('Neighborhood', 1)

    # run k-means clustering
    kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(city_grouped_clustering)

    # check cluster labels generated for each row in the dataframe
    kmeans.labels_[0:10] 

    # Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
    # add clustering labels
    #n_v_s=neighborhoods_venues_sorted.copy()
    if 'Cluster Labels' not in neighborhoods_venues_sorted:
        neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
    neighborhoods_venues_sorted['Cluster Labels'].head()
    #return neighborhoods_venues_sorted

    city_merged = neighborhoods

    # merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
    city_merged = city_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
    city_merged['Cluster Labels'] = city_merged['Cluster Labels'].fillna(0)
    #city_merged = city_merged.fillna(0)
    city_merged.dropna()
    city_merged['Cluster Labels'] = city_merged['Cluster Labels'].astype(int)
    
    #df.astype({'col1': 'int32'}).dtypes
    city_merged.head()
    return city_merged
    
    #city_merged.head() # check the last columns!

def generate_Map(city_merged, latitude,longitude):
    # Finally, let's visualize the resulting clusters
    # create map
    map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

    # set color scheme for the clusters
    x = np.arange(kclusters)
    ys = [i + x + (i*x)**2 for i in range(kclusters)]
    colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
    rainbow = [colors.rgb2hex(i) for i in colors_array]

    # add markers to the map
    markers_colors = []
    for lat, lon, poi, cluster in zip(city_merged['Latitude'], city_merged['Longitude'], city_merged['Neighborhood'], city_merged['Cluster Labels']):
        label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
        folium.CircleMarker(
            [lat, lon],
            radius=5,
            popup=label,
            color=rainbow[cluster-1],
            fill=True,
            fill_color=rainbow[cluster-1],
            fill_opacity=0.7).add_to(map_clusters)
       
    return map_clusters

# D5a. Cluster Neighborhoods for New York city

In [144]:
ny_merged=get_Clusters(ny_grouped,ny_neighborhoods_venues_sorted,ny_neighborhoods,n_latitude,n_longitude)
generate_Map(ny_merged,n_latitude,n_longitude)

# D5b. Cluster Neighborhoods for Toronto

In [145]:
t_merged=get_Clusters(t_grouped,t_neighborhoods_venues_sorted,t_neighborhoods,t_latitude,t_longitude)
generate_Map(t_merged,t_latitude,t_longitude)

# Examine Clusters
Now, we will examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, you can then assign a name to each cluster.

##### Cluster 1 : New York City 

In [146]:
ny_c1=ny_merged.loc[ny_merged['Cluster Labels'] == 0, ny_merged.columns[[1] + list(range(4, ny_merged.shape[1]))]]
#ny_c1
print(ny_c1.shape)
ny_c1.head()

(131, 12)


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Wakefield,0,Sandwich Place,Laundromat,Pharmacy,Food Truck,Dessert Shop,Gas Station,Donut Shop,Pizza Place,Food,Ice Cream Shop
1,Co-op City,0,Baseball Field,Bus Station,Pharmacy,Ice Cream Shop,Pizza Place,Grocery Store,Fast Food Restaurant,Discount Store,Chinese Restaurant,Mattress Store
2,Eastchester,0,Caribbean Restaurant,Deli / Bodega,Diner,Metro Station,Bus Station,Convenience Store,Chinese Restaurant,Fast Food Restaurant,Bakery,Seafood Restaurant
5,Kingsbridge,0,Pizza Place,Sandwich Place,Supermarket,Discount Store,Bar,Mexican Restaurant,Spanish Restaurant,Bakery,Fried Chicken Joint,Latin American Restaurant
7,Woodlawn,0,Playground,Bar,Deli / Bodega,Pizza Place,Food & Drink Shop,Park,Liquor Store,Train Station,Donut Shop,Moving Target


##### Cluster 2 : New York City 

In [147]:
ny_c2=ny_merged.loc[ny_merged['Cluster Labels'] == 1, ny_merged.columns[[1] + list(range(4, ny_merged.shape[1]))]]
#ny_c2
print(ny_c2.shape)
ny_c2.head()

(149, 12)


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Fieldston,1,River,Playground,Plaza,Women's Store,Financial or Legal Service,Exhibit,Eye Doctor,Factory,Falafel Restaurant,Farm
4,Riverdale,1,Park,Plaza,Bank,Bus Station,Food Truck,Locksmith,Home Service,Gym,Playground,Women's Store
6,Marble Hill,1,Sandwich Place,Coffee Shop,Discount Store,Yoga Studio,Pizza Place,Steakhouse,Spa,Supplement Shop,Gym,Tennis Stadium
9,Williamsbridge,1,Nightclub,Metro Station,Caribbean Restaurant,Soup Place,Bar,Fish Market,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
12,City Island,1,Harbor / Marina,Ice Cream Shop,Seafood Restaurant,Thrift / Vintage Store,Spanish Restaurant,Music Venue,Italian Restaurant,French Restaurant,Park,Baseball Field


##### Cluster 3 : New York City 

In [148]:
ny_c3=ny_merged.loc[ny_merged['Cluster Labels'] == 2, ny_merged.columns[[1] + list(range(4, ny_merged.shape[1]))]]
#ny_c3
print(ny_c3.shape)
ny_c3.head()

(3, 12)


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
192,Somerville,2,Park,Women's Store,Exhibit,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant
203,Todt Hill,2,Park,Women's Store,Exhibit,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant
303,Bayswater,2,Playground,Park,Women's Store,Fish & Chips Shop,Eye Doctor,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant


##### Cluster 4 : New York City 

In [149]:
ny_c4=ny_merged.loc[ny_merged['Cluster Labels'] == 3, ny_merged.columns[[1] + list(range(4, ny_merged.shape[1]))]]
#ny_c4
print(ny_c4.shape)
ny_c4.head()

(23, 12)


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
77,Manhattan Beach,3,Bus Stop,Café,Sandwich Place,Harbor / Marina,Beach,Ice Cream Shop,Pizza Place,Playground,Financial or Legal Service,Factory
89,Ocean Hill,3,Deli / Bodega,Food,Southern / Soul Food Restaurant,Playground,Bus Stop,Fried Chicken Joint,Grocery Store,Bakery,Coffee Shop,Seafood Restaurant
193,Brookville,3,Deli / Bodega,Women's Store,Flower Shop,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant
198,New Brighton,3,Bus Stop,Deli / Bodega,Park,Bowling Alley,Discount Store,Convenience Store,Pizza Place,Playground,Fish & Chips Shop,Falafel Restaurant
202,Grymes Hill,3,American Restaurant,Bus Stop,Moving Target,Dog Run,Women's Store,Flower Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant


##### Cluster 5 : New York City 

In [150]:
ny_c5=ny_merged.loc[ny_merged['Cluster Labels'] == 4, ny_merged.columns[[1] + list(range(4, ny_merged.shape[1]))]]
#ny_c5
print(ny_c5.shape)
ny_c5.head()

(0, 12)


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


##### Cluster 1 : Toronto

In [153]:
t_c1=t_merged.loc[t_merged['Cluster Labels'] == 0, t_merged.columns[[1] + list(range(4, t_merged.shape[1]))]]
#t_c1
print(t_c1.shape)
t_c1.head()

(17, 12)


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,0,Fast Food Restaurant,Food & Drink Shop,Park,Grocery Store,Falafel Restaurant,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
5,Islington Avenue,0,,,,,,,,,,
10,Glencairn,0,Park,Sushi Restaurant,Pizza Place,Pub,Japanese Restaurant,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Dive Bar
21,Caledonia-Fairbanks,0,Park,Fast Food Restaurant,Market,Women's Store,Gourmet Shop,Golf Course,Grocery Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant
35,East Toronto,0,Park,Pizza Place,Coffee Shop,Convenience Store,Ethiopian Restaurant,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dessert Shop


##### Cluster 2 : Toronto

In [154]:
t_c2=t_merged.loc[t_merged['Cluster Labels'] == 1, t_merged.columns[[1] + list(range(4, t_merged.shape[1]))]]
#t_c2
print(t_c2.shape)
t_c2.head()

(1, 12)


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,"Silver Hills,York Mills",1,Cafeteria,Drugstore,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Women's Store,College Stadium


##### Cluster 3 : Toronto

In [155]:
t_c3=t_merged.loc[t_merged['Cluster Labels'] == 2, t_merged.columns[[1] + list(range(4, t_merged.shape[1]))]]
#t_c3
print(t_c3.shape)
t_c3.head()

(1, 12)


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,"Silver Hills,York Mills",1,Cafeteria,Drugstore,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Women's Store,College Stadium


##### Cluster 4 : Toronto

In [156]:
t_c4=t_merged.loc[t_merged['Cluster Labels'] == 3, t_merged.columns[[1] + list(range(4, t_merged.shape[1]))]]
#t_c4
print(t_c4.shape)
t_c4.head()

(1, 12)


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,"Rouge,Malvern",3,Fast Food Restaurant,Drugstore,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Hakka Restaurant


##### Cluster 5 : Toronto

In [157]:
t_c5=t_merged.loc[t_merged['Cluster Labels'] == 4, t_merged.columns[[1] + list(range(4, t_merged.shape[1]))]]
#t_c5
print(t_c5.shape)
t_c5.head()

(0, 12)


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


# Create a map of New York with neighborhoods superimposed on top.

In [None]:

# add markers to map
for lat, lng, borough, neighborhood in zip(ny_neighborhoods['Latitude'], ny_neighborhoods['Longitude'], ny_neighborhoods['Borough'], ny_neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)
    



In [None]:
from folium import plugins

# let's start again with a clean copy of the map of San Francisco
ny_map = folium.Map(location = [n_latitude, n_longitude], zoom_start = 12)

# instantiate a mark cluster object for the incidents in the dataframe
incidents = plugins.MarkerCluster().add_to(ny_map)

# loop through the dataframe and add each data point to the mark cluster
for lat, lng, label, in zip(df_incidents.Y, df_incidents.X, df_incidents.Category):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
    ).add_to(incidents)

# display map
sanfran_map