# Comparison of Toronto & NYC Commerce Centres

## 1. Web-scrapping Wiki

In [None]:
#Organising
import pandas as pd

#Calculating
import numpy as np

#Webscrapping data
import requests

In [None]:
#WEBSCRAPPING 
import urllib.request

#ORGANISING FOR WEBSCRAPPING
from bs4 import BeautifulSoup 

In [None]:
#Three Columns!
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

In [None]:
Page = urllib.request.urlopen(url)

In [None]:
# Get the Page to it's coded format--
soup = BeautifulSoup(Page, "lxml")

In [None]:
tables = soup.find("table", class_= "wikitable sortable")

In [None]:
#Three Columns = Three Iterators
PostCode = []
Borough = []
Nbhd = []

for row in tables.findAll("tr"):
    cells = row.findAll("td")
    if len(cells) == 3:
        PostCode.append(cells[0].find(text=True))
        Borough.append(cells[1].find(text=True))
        Nbhd.append(cells[2].find(text=True))

In [None]:
#Making DataFrames
df = pd.DataFrame(PostCode, columns = ["Postal Code"])
df["Borough"] = Borough
df["Neighbourhood"] = Nbhd

In [None]:
#Removing all "\n" in DataFrame
#regex = True --> Regular Expression Syntax
df = df.replace('\n', ' ', regex = True)
#Removing all the "Not Assigned"
df = df[~df.Borough.str.contains("Not assigned")]

In [None]:
#Renumbering the data set
df = df.reset_index(drop = True)

In [None]:
print(df.shape)

End of Web Scrapping Canada Postal Codes

## 2: Getting Latitude & Longitude.

In [None]:
#Necessary tools. Getting geocoder & json
!pip install geocoder
import geocoder
import json
import csv

Used given data set as geocoder takes too long and had multiple timeouts.

In [None]:
#Using other data for Geographical Coordinates for postal code
!wget q- O- 'latlng_data.json' http://cocl.us/Geospatial_data

In [None]:
#Get the Latitude & Longitudinal Data
with open('Geospatial_data') as csv_data:
    lldf = pd.read_csv(csv_data)

In [None]:
#Sorting two dataframes
df.sort_values("Postal Code", inplace = True)
df = df.reset_index(drop = True)
print(df)
lldf.sort_values("Postal Code", inplace = True)
print(lldf)

In [None]:
#Combining two dataframes into one
lldf = lldf.rename(columns = {"Postal Code":"PS"})
gdf = pd.concat([df, lldf], axis = 1)
gdf.drop(columns = ["PS"], inplace = True)
gdf

End of adding latitude/longditude portion of the project

## 3. Clustering Data in Toronto vs NYC:

1. I will be using folium to get the map of Toronto and NYC.

2. I will create a list of restaurants/cafes in NYC & Toronto using Foursquare

3. I will cluster the restaurants and cafes using K-means cluster for Toronto & NYC.

4. Using the clusters, I will compare the density of business part of both NYC and Toronto to each other.

In [95]:
#Getting NYC Data, courtesy of NYU Spatial Data Repository (download link below)
# https://geo.nyu.edu/catalog/nyu_2451_34572
path = "/Users/Dansong/Desktop/newyork_data.json"
with open(path) as json_data:
    newyork_data = json.load(json_data)
newyork_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

In [97]:
#Making Panda Dataset for NYC
nycrawdata = newyork_data["features"]
column_name = ["Borough", "Neighbourhood", "Latitude", "Longitude"]

nyc_df = pd.DataFrame(columns = column_name)

for data in nycrawdata:
    boro = data["properties"]["borough"]
    nhbd = data["properties"]["name"]
    lati = data["geometry"]["coordinates"][1]
    long = data["geometry"]["coordinates"][0]
    
    nyc_df = nyc_df.append({"Borough": boro,
                           "Neighbourhood": nhbd,
                           "Latitude": lati,
                           "Longitude": long}, ignore_index = True)

nyc_df

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
...,...,...,...,...
301,Manhattan,Hudson Yards,40.756658,-74.000111
302,Queens,Hammels,40.587338,-73.805530
303,Queens,Bayswater,40.611322,-73.765968
304,Queens,Queensbridge,40.756091,-73.945631


In [None]:
#importing Folium
!conda install -c conda-forge folium=0.5.0 --yes
import folium

In [None]:
#Drawing Map
maptrt = folium.Map(location = [43.6532, -79.3832], zoom_start = 10)

#Marker for Toronto's Neighbourhoods
for lat, long, label in zip(gdf["Latitude"], gdf["Longitude"], gdf["Neighbourhood"]):
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
    [lat, long],
    radius = 5,
    popup = label,
    color = "blue",
    fill = True,
    fill_color = "#3186cc",
    fill_opacity = 0.8,
    parse_html = False).add_to(maptrt)
    
maptrt

In [None]:
#Preparation for Foursquare for neighbourhood data

CLIENT_ID = '5XXEGPJ5FYIEZLLKQSWC23131XARNI2QWG2CBSEBOONPHPUU'
CLIENT_SECRET = 'R5VD2CBYASPNM5WPPIKG1HO10ASGEE3J2UXKWKLEZHUL4R1X'
VERSION = '20180605'

In [83]:
#Setting up getting Venue Details w/ Foursquare
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']

        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [84]:
#Getting Venues for Toronto
Trt_venu = getNearbyVenues(gdf["Neighbourhood"], gdf["Latitude"], gdf["Longitude"])

Malvern, Rouge 
Rouge Hill, Port Union, Highland Creek 
Guildwood, Morningside, West Hill 
Woburn 
Cedarbrae 
Scarborough Village 
Kennedy Park, Ionview, East Birchmount Park 
Golden Mile, Clairlea, Oakridge 
Cliffside, Cliffcrest, Scarborough Village West 
Birch Cliff, Cliffside West 
Dorset Park, Wexford Heights, Scarborough Town Centre 
Wexford, Maryvale 
Agincourt 
Clarks Corners, Tam O'Shanter, Sullivan 
Milliken, Agincourt North, Steeles East, L'Amoreaux East 
Steeles West, L'Amoreaux West 
Upper Rouge 
Hillcrest Village 
Fairview, Henry Farm, Oriole 
Bayview Village 
York Mills, Silver Hills 
Willowdale, Newtonbrook 
Willowdale, Willowdale East 
York Mills West 
Willowdale, Willowdale West 
Parkwoods 
Don Mills 
Don Mills 
Bathurst Manor, Wilson Heights, Downsview North 
Northwood Park, York University 
Downsview 
Downsview 
Downsview 
Downsview 
Victoria Village 
Parkview Hill, Woodbine Gardens 
Woodbine Heights 
The Beaches 
Leaside 
Thorncliffe Park 
East Toronto, Broadview N

In [98]:
#Getting Venues for NYC
Nyc_venu = getNearbyVenues(nyc_df["Neighbourhood"], nyc_df["Latitude"], nyc_df["Longitude"])

Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

In [100]:
print(Trt_venu.shape)
print(Nyc_venu.shape)

(2152, 7)
(10124, 7)


### Clustering for NYC

In [117]:
#Getting dummy values for all the categories of shops for NYC

#Making analyis easier by making 'have place/not have place' as 0/1
dummy_nyc = pd.get_dummies(Nyc_venu[["Venue Category"]], prefix = "", prefix_sep="")
dummy_nyc["Neighbourhood"] = Nyc_venu["Neighbourhood"]

#Reorganisng dataframe, cleaning the columns
fixed_col = [dummy_nyc.columns[-1]] + list(dummy_nyc.columns[:-1])
dummy_nyc = dummy_nyc[fixed_col]

#Sorting out the dummies by area
grp_nyc_ven = dummy_nyc.groupby("Neighbourhood").mean().reset_index()

#Getting the 5 most popular places for each neighbourhood :D
for nhbd in grp_nyc_ven["Neighbourhood"]:
    print("----"+nhbd+"----")
    temp = grp_nyc_ven[grp_nyc_ven["Neighbourhood"] == nhbd].T.reset_index()
    temp.columns = ["venue","freq"]
    temp = temp.iloc[1:]
    temp["freq"] = temp["freq"].astype(float)
    temp = temp.round({"freq": 2})
    print(temp.sort_values("freq", ascending=False).reset_index(drop=True).head(5))
    print("\n")

----Allerton----
                  venue  freq
0           Pizza Place  0.15
1         Deli / Bodega  0.12
2           Supermarket  0.08
3      Department Store  0.04
4  Used Auto Dealership  0.04


----Annadale----
                 venue  freq
0          Pizza Place  0.31
1  American Restaurant  0.15
2           Restaurant  0.08
3        Train Station  0.08
4       Cosmetics Shop  0.08


----Arden Heights----
               venue  freq
0           Pharmacy  0.25
1        Coffee Shop  0.25
2        Pizza Place  0.25
3             Lawyer  0.25
4  Accessories Store  0.00


----Arlington----
                 venue  freq
0        Deli / Bodega   0.2
1             Bus Stop   0.2
2  American Restaurant   0.2
3         Intersection   0.2
4        Grocery Store   0.2


----Arrochar----
                venue  freq
0            Bus Stop  0.17
1       Deli / Bodega  0.09
2  Italian Restaurant  0.09
3          Bagel Shop  0.09
4  Athletics & Sports  0.04


----Arverne----
            venue  freq
0

           venue  freq
0  Deli / Bodega  0.17
1       Pharmacy  0.17
2          Diner  0.17
3           Bank  0.17
4    Pizza Place  0.17


----Castleton Corners----
            venue  freq
0     Pizza Place  0.20
1            Bank  0.13
2   Go Kart Track  0.07
3  Ice Cream Shop  0.07
4       Mini Golf  0.07


----Central Harlem----
                 venue  freq
0   African Restaurant  0.07
1   Chinese Restaurant  0.05
2   Seafood Restaurant  0.05
3  American Restaurant  0.05
4       Cosmetics Shop  0.05


----Charleston----
            venue  freq
0  Cosmetics Shop  0.07
1   Big Box Store  0.07
2     Coffee Shop  0.07
3          Bakery  0.03
4     Pizza Place  0.03


----Chelsea----
                 venue  freq
0          Coffee Shop  0.09
1          Art Gallery  0.05
2    French Restaurant  0.03
3   Italian Restaurant  0.03
4  American Restaurant  0.03


----Chinatown----
                venue  freq
0  Chinese Restaurant  0.08
1              Bakery  0.06
2        Cocktail Bar  0.05
3 

                        venue  freq
0                        Food  0.33
1            Sculpture Garden  0.33
2  Construction & Landscaping  0.33
3        Pakistani Restaurant  0.00
4                   Pet Store  0.00


----Erasmus----
                  venue  freq
0  Caribbean Restaurant  0.19
1    Chinese Restaurant  0.10
2     Convenience Store  0.05
3     Health Food Store  0.05
4  Gym / Fitness Center  0.05


----Far Rockaway----
                venue  freq
0         Pizza Place  0.13
1       Deli / Bodega  0.13
2  Chinese Restaurant  0.10
3      Breakfast Spot  0.06
4                Bank  0.06


----Fieldston----
                  venue  freq
0                 River  0.25
1  Medical Supply Store  0.25
2           Bus Station  0.25
3                 Plaza  0.25
4     Accessories Store  0.00


----Financial District----
                 venue  freq
0          Coffee Shop  0.10
1          Pizza Place  0.05
2         Cocktail Bar  0.04
3  American Restaurant  0.04
4                 Caf

              venue  freq
0              Bank   0.2
1    Sandwich Place   0.1
2       Bridal Shop   0.1
3  Asian Restaurant   0.1
4        Nail Salon   0.1


----Hunters Point----
                 venue  freq
0                 Café  0.07
1   Italian Restaurant  0.06
2  Japanese Restaurant  0.04
3          Coffee Shop  0.04
4                  Bar  0.03


----Hunts Point----
                venue  freq
0                Food  0.13
1      Farmers Market  0.07
2         Pizza Place  0.07
3  Seafood Restaurant  0.07
4              Bakery  0.07


----Inwood----
                venue  freq
0  Mexican Restaurant  0.07
1              Lounge  0.05
2          Restaurant  0.05
3                Café  0.05
4                Park  0.03


----Jackson Heights----
                       venue  freq
0  Latin American Restaurant  0.11
1        Peruvian Restaurant  0.08
2  South American Restaurant  0.06
3          Mobile Phone Shop  0.05
4                     Bakery  0.05


----Jamaica Center----
          

                 venue  freq
0        Grocery Store  0.11
1        Metro Station  0.11
2          Pizza Place  0.11
3  Fried Chicken Joint  0.11
4                 Park  0.05


----New Springville----
                venue  freq
0         Coffee Shop  0.09
1         Pizza Place  0.09
2          Bagel Shop  0.09
3  Chinese Restaurant  0.09
4       Deli / Bodega  0.04


----Noho----
                venue  freq
0         Coffee Shop  0.05
1  Italian Restaurant  0.05
2         Pizza Place  0.04
3  Mexican Restaurant  0.04
4         Art Gallery  0.04


----North Corona----
                  venue  freq
0         Deli / Bodega  0.19
1           Pizza Place  0.10
2  Gym / Fitness Center  0.10
3                Bakery  0.10
4  Caribbean Restaurant  0.05


----North Riverdale----
                venue  freq
0         Pizza Place  0.14
1  Chinese Restaurant  0.10
2                Bank  0.10
3  Italian Restaurant  0.10
4        Burger Joint  0.05


----North Side----
         venue  freq
0  Coffee 

                       venue  freq
0                      Beach  0.17
1             Ice Cream Shop  0.06
2                  BBQ Joint  0.04
3  Latin American Restaurant  0.04
4                 Bagel Shop  0.04


----Rockaway Park----
            venue  freq
0           Beach  0.21
1      Donut Shop  0.08
2     Pizza Place  0.08
3           Diner  0.04
4  Sandwich Place  0.04


----Roosevelt Island----
                  venue  freq
0                  Park  0.07
1   Monument / Landmark  0.04
2  Gym / Fitness Center  0.04
3        Scenic Lookout  0.04
4         Metro Station  0.04


----Rosebank----
                 venue  freq
0        Grocery Store  0.07
1   Italian Restaurant  0.07
2         Gourmet Shop  0.04
3               Museum  0.04
4  Filipino Restaurant  0.04


----Rosedale----
               venue  freq
0        Bus Station  0.15
1  Accessories Store  0.08
2     Baseball Field  0.08
3      Jewelry Store  0.08
4           Pharmacy  0.08


----Rossville----
           venue  fre

                venue  freq
0                Café  0.06
1              Bakery  0.04
2  Chinese Restaurant  0.03
3       Grocery Store  0.03
4   Mobile Phone Shop  0.03


----Weeksville----
                venue  freq
0      Discount Store  0.13
1            Bus Line  0.07
2                Café  0.07
3      History Museum  0.07
4  Chinese Restaurant  0.07


----West Brighton----
                venue  freq
0         Coffee Shop  0.08
1      Cosmetics Shop  0.05
2                Bank  0.05
3                 Bar  0.05
4  Italian Restaurant  0.05


----West Farms----
              venue  freq
0       Bus Station  0.20
1              Park  0.10
2     Metro Station  0.10
3  Basketball Court  0.05
4    Sandwich Place  0.05


----West Village----
                     venue  freq
0       Italian Restaurant  0.09
1  New American Restaurant  0.05
2      American Restaurant  0.05
3             Cocktail Bar  0.04
4                     Park  0.04


----Westchester Square----
                  venue 

In [119]:
# GETTING ORDERING FUNCTION FOR VENUES!
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [123]:
num_top_venues = 10
indicators = ["st", "nd", "rd"]

columns = ["Neighbourhood"]
for ind in np.arange(num_top_venues):
    try:
        columns.append("{}{} Most Common Venue".format(ind+1, indicators[ind]))
    except:
        columns.append("{}th Most Common Venue".format(ind+1))


nyc_venu_sort = pd.DataFrame(columns=columns)
nyc_venu_sort["Neighbourhood"] = grp_nyc_ven["Neighbourhood"]

for ind in np.arange(grp_nyc_ven.shape[0]):
    nyc_venu_sort.iloc[ind, 1:] = return_most_common_venues(grp_nyc_ven.iloc[ind,
                                                                            :],
                                                                          num_top_venues)
nyc_venu_sort.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allerton,Pizza Place,Deli / Bodega,Supermarket,Department Store,Bus Station,Breakfast Spot,Gas Station,Food,Fast Food Restaurant,Check Cashing Service
1,Annadale,Pizza Place,American Restaurant,Restaurant,Liquor Store,Train Station,Cosmetics Shop,Diner,Park,Pharmacy,Falafel Restaurant
2,Arden Heights,Lawyer,Pizza Place,Coffee Shop,Pharmacy,Fountain,Filipino Restaurant,Exhibit,Eye Doctor,Fried Chicken Joint,Factory
3,Arlington,Deli / Bodega,American Restaurant,Bus Stop,Intersection,Grocery Store,Flea Market,Factory,Falafel Restaurant,Farm,Farmers Market
4,Arrochar,Bus Stop,Italian Restaurant,Bagel Shop,Deli / Bodega,Nail Salon,Polish Restaurant,Outdoors & Recreation,Pizza Place,Sandwich Place,Athletics & Sports


### Clustering for Toronto

In [118]:
#Doing the same for Toronto [Copy/Paste above and changed names]

#Making analyis easier by making 'have place/not have place' as 0/1
dummy_trt = pd.get_dummies(Trt_venu[["Venue Category"]], prefix = "", prefix_sep="")
dummy_trt["Neighbourhood"] = Trt_venu["Neighbourhood"]

#Reorganisng dataframe, cleaning the columns
fixed_cols = [dummy_trt.columns[-1]] + list(dummy_trt.columns[:-1])
dummy_trt = dummy_trt[fixed_cols]

#Sorting out the dummies by area
grp_trt_ven = dummy_trt.groupby("Neighbourhood").mean().reset_index()

#Getting the 5 most popular places for each neighbourhood :D
for nhbd in grp_trt_ven["Neighbourhood"]:
    print("----"+nhbd+"----")
    temp = grp_trt_ven[grp_trt_ven["Neighbourhood"] == nhbd].T.reset_index()
    temp.columns = ["venue","freq"]
    temp = temp.iloc[1:]
    temp["freq"] = temp["freq"].astype(float)
    temp = temp.round({"freq": 2})
    print(temp.sort_values("freq", ascending=False).reset_index(drop=True).head(5))
    print("\n")

----Agincourt ----
                       venue  freq
0  Latin American Restaurant  0.25
1             Breakfast Spot  0.25
2                     Lounge  0.25
3               Skating Rink  0.25
4  Middle Eastern Restaurant  0.00


----Alderwood, Long Branch ----
                venue  freq
0         Pizza Place  0.22
1         Coffee Shop  0.11
2        Skating Rink  0.11
3      Sandwich Place  0.11
4  Athletics & Sports  0.11


----Bathurst Manor, Wilson Heights, Downsview North ----
         venue  freq
0  Coffee Shop  0.09
1         Bank  0.09
2    Pet Store  0.05
3  Bridal Shop  0.05
4         Park  0.05


----Bayview Village ----
                 venue  freq
0  Japanese Restaurant  0.25
1                 Bank  0.25
2   Chinese Restaurant  0.25
3                 Café  0.25
4               Museum  0.00


----Bedford Park, Lawrence Manor East ----
                venue  freq
0      Sandwich Place  0.07
1          Restaurant  0.07
2         Coffee Shop  0.07
3  Italian Restaurant  0.0

                      venue  freq
0      Fast Food Restaurant   1.0
1             Movie Theater   0.0
2            Massage Studio   0.0
3            Medical Center   0.0
4  Mediterranean Restaurant   0.0


----Milliken, Agincourt North, Steeles East, L'Amoreaux East ----
                       venue  freq
0                 Playground   0.5
1                       Park   0.5
2          Accessories Store   0.0
3  Middle Eastern Restaurant   0.0
4        Monument / Landmark   0.0


----Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West ----
                  venue  freq
0        Hardware Store  0.07
1         Tanning Salon  0.07
2        Discount Store  0.07
3        Sandwich Place  0.07
4  Fast Food Restaurant  0.07


----Moore Park, Summerhill East ----
               venue  freq
0        Summer Camp  0.25
1                Gym  0.25
2               Park  0.25
3         Restaurant  0.25
4  Accessories Store  0.00


----New Toronto, Mimico South



----York Mills West ----
                 venue  freq
0    Convenience Store   0.5
1                 Park   0.5
2    Accessories Store   0.0
3   Miscellaneous Shop   0.0
4  Moroccan Restaurant   0.0




In [125]:
columns = ["Neighbourhood"]
for ind in np.arange(num_top_venues):
    try:
        columns.append("{}{} Most Common Venue".format(ind+1, indicators[ind]))
    except:
        columns.append("{}th Most Common Venue".format(ind+1))


trt_venu_sort = pd.DataFrame(columns=columns)
trt_venu_sort["Neighbourhood"] = grp_trt_ven["Neighbourhood"]

for ind in np.arange(grp_trt_ven.shape[0]):
    trt_venu_sort.iloc[ind, 1:] = return_most_common_venues(grp_trt_ven.iloc[ind,
                                                                            :],
                                                                          num_top_venues)
trt_venu_sort.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Breakfast Spot,Latin American Restaurant,Skating Rink,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
1,"Alderwood, Long Branch",Pizza Place,Pharmacy,Gym,Sandwich Place,Coffee Shop,Skating Rink,Athletics & Sports,Pub,Distribution Center,Dim Sum Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Fried Chicken Joint,Shopping Mall,Sandwich Place,Diner,Deli / Bodega,Restaurant,Middle Eastern Restaurant,Supermarket
3,Bayview Village,Café,Bank,Japanese Restaurant,Chinese Restaurant,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Sandwich Place,Italian Restaurant,Restaurant,Pizza Place,Breakfast Spot,Comfort Food Restaurant,Juice Bar,Fast Food Restaurant,Butcher


In [134]:
#Using K-Means Cluster to... cluster both
from sklearn.cluster import KMeans

grp_nyc_clustr = grp_nyc_ven.drop("Neighbourhood", 1)
km_nyc = KMeans(n_clusters= 5, random_state=0).fit(grp_nyc_clustr)

grp_trt_clustr = grp_trt_ven.drop("Neighbourhood", 1)
km_trt = KMeans(n_clusters= 5, random_state=0).fit(grp_trt_clustr)

print(km_nyc.labels_[0:5])
print(km_trt.labels_[0:5])

[0 0 0 1 1]
[0 0 0 0 0]


In [136]:
#Data for all
nyc_venu_sort.insert(0, "Cluster Labels", km_nyc.labels_)

nycdf_merg = nyc_df
nycdf_merg = nycdf_merg.join(nyc_venu_sort.set_index('Neighbourhood'), on='Neighbourhood')

nycdf_merg.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bronx,Wakefield,40.894705,-73.847201,3,Pharmacy,Donut Shop,Laundromat,Deli / Bodega,Sandwich Place,Dessert Shop,Gas Station,Food,Ice Cream Shop,Financial or Legal Service
1,Bronx,Co-op City,40.874294,-73.829939,3,Baseball Field,Bus Station,Fast Food Restaurant,Post Office,Park,Pharmacy,Bagel Shop,Grocery Store,Pizza Place,Basketball Court
2,Bronx,Eastchester,40.887556,-73.827806,3,Bus Station,Caribbean Restaurant,Deli / Bodega,Diner,Food & Drink Shop,Cosmetics Shop,Pizza Place,Bowling Alley,Bakery,Convenience Store
3,Bronx,Fieldston,40.895437,-73.905643,3,Medical Supply Store,Plaza,River,Bus Station,Event Space,Exhibit,Eye Doctor,Factory,Falafel Restaurant,Farm
4,Bronx,Riverdale,40.890834,-73.912585,3,Bus Station,Park,Baseball Field,Bank,Gym,Plaza,Medical Supply Store,Financial or Legal Service,Eye Doctor,Factory


In [139]:
trt_venu_sort.insert(0, "Cluster Label", km_trt.labels_)

trtdf_merg = gdf
trtdf_merg = trtdf_merg.join(trt_venu_sort.set_index('Neighbourhood'), on='Neighbourhood')

trtdf_merg.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Label,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,0.0,0.0,Fast Food Restaurant,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Yoga Studio,Dessert Shop
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,0.0,0.0,Bar,Golf Course,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0.0,0.0,Breakfast Spot,Electronics Store,Intersection,Restaurant,Medical Center,Rental Car Location,Bank,Mexican Restaurant,Yoga Studio,Distribution Center
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,0.0,Coffee Shop,Soccer Field,Korean Restaurant,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,0.0,Bank,Hakka Restaurant,Lounge,Caribbean Restaurant,Athletics & Sports,Fried Chicken Joint,Gas Station,Thai Restaurant,Bakery,Dog Run


## The End