# Segmenting and Clustering Neighborhoods in Toronto

## Section 1.


## a) Use the BeautifulSoup package or any other way you are comfortable with to transform the data in the table on the Wikipedia page into the above pandas dataframe

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [2]:
import requests
webseitdata=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [3]:
# parse data from the html into a beautifulsoup object
from bs4 import BeautifulSoup
soup = BeautifulSoup(webseitdata,'html.parser')
#print(soup.prettify())

In [4]:
# create three lists to store table data
postalCodeList = []
boroughList = []
neighborhoodList = []

In [5]:
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')
    if (len(cells)>0):
        postalCodeList.append(cells[0].text)
        boroughList.append(cells[1].text)
        neighborhoodList.append(cells[2].text)

In [6]:
postalCodeList=list(map(lambda x:x.rstrip("\n"),postalCodeList))
boroughList=list(map(lambda x:x.rstrip("\n"),boroughList))
neighborhoodList=list(map(lambda x:x.rstrip("\n"),neighborhoodList))

In [11]:
df=pd.DataFrame({"PostalCode":postalCodeList, "Borough":boroughList,"Neighborhood":neighborhoodList})
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## b) Only processing the cells that have an assigned borough. Ignoring the cells with a borough that is Not assigned. Droping row where borough is "Not assigned


In [10]:
df_clean1=df[df.Borough!="Not assigned"].reset_index(drop=True)
df_clean1.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## c) If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough

In [12]:
# group neighborhoods in the same borough
toronto_df_grouped = df_clean1.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
toronto_df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## d) For Neighborhood="Not assigned", make the value the same as Borough

In [13]:
toronto_df_grouped["Neighborhood"]=toronto_df_grouped["Neighborhood"].replace("Not assigned",toronto_df_grouped["Borough"])
toronto_df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [16]:
### 6. Check whether the data looks same as the list reqired
test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

test_df=pd.DataFrame(columns=toronto_df_grouped.columns)

for postalcode in test_list:
    test_df=test_df.append(toronto_df_grouped[toronto_df_grouped["PostalCode"]==postalcode],ignore_index=True)
test_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M5G,Downtown Toronto,Central Bay Street
1,M2H,North York,Hillcrest Village
2,M4B,East York,"Parkview Hill, Woodbine Gardens"
3,M1J,Scarborough,Scarborough Village
4,M4G,East York,Leaside
5,M4M,East Toronto,Studio District
6,M1R,Scarborough,"Wexford, Maryvale"
7,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."
8,M9L,North York,Humber Summit
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har..."


## e) Finally, print the number of rows of the cleaned dataframe

In [17]:
toronto_df_grouped.shape

(103, 3)

# Section 2

## Use the Geocoder package or the csv file to create dataframe with longitude and latitude values

### We will be using a csv file that has the geographical coordinates of each postal code: http://cocl.us/Geospatial_data

In [18]:
geo_url="http://cocl.us/Geospatial_data"
geo_data=pd.read_csv(geo_url)

In [19]:
#geo_data.columns
geo_data.columns=['PostalCode', 'Latitude', 'Longitude']

In [20]:
toronto_df2=pd.merge(toronto_df_grouped,geo_data,how='inner',on="PostalCode")
toronto_df2

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


## Explore and cluster the neighborhoods in Toronto

In [21]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(toronto_df2['Borough'].unique()),
        toronto_df2.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


In [32]:
!pip install conda
!pip install geopy
!pip install folium 
from geopy.geocoders import Nominatim
import geopy
# convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Libraries imported.


In [33]:
#get Latitute and longitude of toronto

address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ON")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of toronto City are 43.6534817, -79.3839347.


In [34]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df2['Latitude'], toronto_df2['Longitude'], toronto_df2['Borough'], toronto_df2['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto) 
map_toronto

In [35]:
## Step 3c. Foursquare to explore
df_borough_toronto=toronto_df2[toronto_df2["Borough"].str.contains("Toronto")].reset_index(drop=True)
df_borough_toronto.size

195

In [36]:
df_borough_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [37]:
df_borough_toronto["Borough"].unique()

array(['East Toronto', 'Central Toronto', 'Downtown Toronto',
       'West Toronto'], dtype=object)

In [38]:
df_borough_toronto["color"]=df_borough_toronto["Borough"].map({'East Toronto':"green", 'Central Toronto':"red", 'Downtown Toronto':"blue",
       'West Toronto':"black"})

In [39]:
df_borough_toronto.shape

(39, 6)

In [40]:
# create map of Torronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
# add markers to map
for lat, lng, borough, neighborhood,clr in zip(df_borough_toronto['Latitude'], df_borough_toronto['Longitude'], df_borough_toronto['Borough'], df_borough_toronto['Neighborhood'],df_borough_toronto["color"]):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=clr,
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto) 
map_toronto

In [41]:
CLIENT_ID = '2VKTSJV0IYURC5MRWD0IAEWK413VB2JMAXPN1EULK2SRHXAJ' # your Foursquare ID
CLIENT_SECRET = '53ST1JAMROP4R10XLEN4JJ0I2E3QORNDX1UW4DBTOCZDIRGI' # your Foursquare Secret
VERSION = '20180604'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2VKTSJV0IYURC5MRWD0IAEWK413VB2JMAXPN1EULK2SRHXAJ
CLIENT_SECRET:53ST1JAMROP4R10XLEN4JJ0I2E3QORNDX1UW4DBTOCZDIRGI


In [42]:
#first neigbourhood
neighborhood_latitude1=df_borough_toronto["Latitude"][0]
neighborhood_longitude1=df_borough_toronto["Longitude"][0]
neighborhood_name1=df_borough_toronto["Neighborhood"][0]

print (f"{neighborhood_name1} has lognitude and latitude as : [{neighborhood_latitude1},{neighborhood_longitude1}]")

The Beaches has lognitude and latitude as : [43.67635739999999,-79.2930312]


In [43]:
# Setup API URL to explore venues near by
LIMIT=100
RADIUS=500
url=f"https://api.foursquare.com/v2/venues/explore?client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&ll={neighborhood_latitude1},{neighborhood_longitude1}&v={VERSION}&radius={RADIUS}&limit={LIMIT}"
neighborhood_json = requests.get(url).json()["response"]["groups"][0]["items"]

In [44]:
# Serializing json
import json
json_object = json.dumps(neighborhood_json, indent = 4)

In [45]:
#save data as json file to explore
with open("jsonData.json","w") as f:
    f.write(json_object)

In [46]:
venues=neighborhood_json

In [47]:
#flatten Json
from pandas.io.json import json_normalize
nearby_venues=json_normalize(venues)

  This is separate from the ipykernel package so we can avoid doing imports until


In [48]:
filtered_columns=['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']

In [49]:
nearby_venues=nearby_venues.loc[:,filtered_columns]

In [50]:
def getCategory_type(row):
    try:
        category_list=row["name"]
    except:
        category_list=row["venue.categories"]
    if len(category_list)==0:
        return None
    else:
        return category_list[0]["name"]

In [51]:
nearby_venues["categories"]= [x[0]["name"] for x in nearby_venues["venue.categories"]]

In [52]:
nearby_venues.drop(["venue.categories"],axis=1,inplace=True)

In [53]:
nearby_venues

Unnamed: 0,venue.name,venue.location.lat,venue.location.lng,categories
0,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,Grover Pub and Grub,43.679181,-79.297215,Pub
3,Upper Beaches,43.680563,-79.292869,Neighborhood


In [54]:
#we have just explored one neighbourhood and its nearby venues and their category now we will be doing for all nerwighborhood in
df_borough_toronto

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,color
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,green
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,green
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,green
3,M4M,East Toronto,Studio District,43.659526,-79.340923,green
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,red
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197,red
6,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678,red
7,M4S,Central Toronto,Davisville,43.704324,-79.38879,red
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,red
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,red


In [55]:
CLIENT_ID = '2VKTSJV0IYURC5MRWD0IAEWK413VB2JMAXPN1EULK2SRHXAJ' # your Foursquare ID
CLIENT_SECRET = '53ST1JAMROP4R10XLEN4JJ0I2E3QORNDX1UW4DBTOCZDIRGI' # your Foursquare Secret
VERSION = '20180604' # Foursquare API version

# Setup API URL to explore venues near by
LIMIT=100
RADIUS=500

def getNearByVenues(neighbourhood_name,lat,long):
    venues_list=[]

    for name, lat, lng in zip(neighbourhood_name,lat,long):
        print(name)
        
        url=f"https://api.foursquare.com/v2/venues/explore?client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&ll={lat},{lng}&v={VERSION}&radius={RADIUS}&limit={LIMIT}"
        neighborhood_json = requests.get(url).json()["response"]["groups"][0]["items"]
        venues_list.append([(
            name,
            lat,
            lng,
            v["venue"]["name"],
            v["venue"]["location"]["lat"],
            v["venue"]["location"]["lng"],
            v["venue"]["categories"][0]["name"]) for v in neighborhood_json])
        #appending list of  venuedetails as list into another list venues list
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns=['Neighborhood', 
                    'Neighborhood Latitude', 
                    'Neighborhood Longitude', 
                    'Venue', 
                    'Venue Latitude', 
                    'Venue Longitude', 
                    'Venue Category']
    return (nearby_venues)

In [56]:
#Get all Tor
toronto_venues_df = getNearByVenues(df_borough_toronto['Neighborhood'],df_borough_toronto['Latitude'],df_borough_toronto['Longitude'])

The Beaches
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West,  Lawrence Park
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North & West, Forest Hill Road Park
The Annex, North Midtown, Yorkville
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Dufferin, Dovercourt Village
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
High

In [57]:
toronto_venues_df.shape
#toronto_venues_df.head()

(1627, 7)

In [58]:
toronto_venues_df.groupby("Neighborhood").count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,58,58,58,58,58,58
"Brockton, Parkdale Village, Exhibition Place",24,24,24,24,24,24
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",14,14,14,14,14,14
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",15,15,15,15,15,15
Central Bay Street,65,65,65,65,65,65
Christie,17,17,17,17,17,17
Church and Wellesley,74,74,74,74,74,74
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,35,35,35,35,35,35
Davisville North,8,8,8,8,8,8


In [59]:
#analyze the neighbourhoood
#creating dummy for each venue category

torento_onehot=pd.get_dummies(toronto_venues_df[["Venue Category"]], prefix="", prefix_sep="")

In [60]:
torento_onehot.shape

(1627, 237)

In [61]:
torento_onehot["Neighborhood"]=toronto_venues_df["Neighborhood"]

In [62]:
torento_onehot.head()

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [63]:
torento_onehot.columns.get_loc("Neighborhood")

165

In [65]:
torento_onehot.columns[165]

'Neighborhood'

In [66]:
fixed_columns=[torento_onehot.columns[165]]+list(torento_onehot.columns[0:165])+list(torento_onehot.columns[166:])

In [67]:
len(fixed_columns)

237

In [68]:
torento_onehot=torento_onehot[fixed_columns]

In [69]:
torento_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [71]:
torento_onehot_grouped=torento_onehot.groupby("Neighborhood").mean().reset_index()
torento_onehot_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.066667,0.066667,0.066667,0.133333,0.2,0.066667,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.015385,0.0,0.0,0.015385,0.0,0.0,0.015385
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,...,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [73]:
for  hood in torento_onehot_grouped["Neighborhood"]:
    print(f"-------{hood}----")
    temp=torento_onehot_grouped[torento_onehot_grouped["Neighborhood"]==hood].T.reset_index()
    temp.columns=["venue","freq"]
    temp=temp[1:]
    temp["freq"]=round(temp["freq"].astype(float),2)
    print(temp.sort_values(by="freq",axis=0,ascending=False).reset_index(drop=True).head(10))
    dict1={}
    print("\n")

-------Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1        Cocktail Bar  0.03
2  Seafood Restaurant  0.03
3         Cheese Shop  0.03
4      Farmers Market  0.03
5          Restaurant  0.03
6                Café  0.03
7            Beer Bar  0.03
8              Bakery  0.03
9              Lounge  0.02


-------Brockton, Parkdale Village, Exhibition Place----
                   venue  freq
0                   Café  0.12
1            Coffee Shop  0.08
2              Nightclub  0.08
3         Breakfast Spot  0.08
4            Yoga Studio  0.04
5     Italian Restaurant  0.04
6              Pet Store  0.04
7  Performing Arts Venue  0.04
8             Restaurant  0.04
9           Climbing Gym  0.04


-------Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                  venue  freq
0    Light Rail Station  0.14
1  Gym / Fitness Center  0.07
2               Brewery  0.07
3         Garden Center  0.07
4                Garde

In [74]:
dict1={}

for  hood in torento_onehot_grouped["Neighborhood"]:
    val=[]
    #print(f"-------{hood}----")
    temp=torento_onehot_grouped[torento_onehot_grouped["Neighborhood"]==hood].T.reset_index()
    temp.columns=["venue","freq"]
    temp=temp[1:]
    temp["freq"]=round(temp["freq"].astype(float),2)
    val=list(temp.sort_values(by="freq",axis=0,ascending=False).reset_index(drop=True).head(10)["venue"])
    dict1[hood]=val

In [75]:
cols=["No."+str(x)+"_common_Place" for x in range(1,11)]

In [76]:
neighborhoods_venues_sorted=pd.DataFrame(dict1).T

In [77]:
neighborhoods_venues_sorted.columns=cols

In [78]:
neighborhoods_venues_sorted.insert(0,"Neighborhood",list(neighborhoods_venues_sorted.index))

In [79]:
neighborhoods_venues_sorted.reset_index(drop=True,inplace=True)

In [80]:
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,No.1_common_Place,No.2_common_Place,No.3_common_Place,No.4_common_Place,No.5_common_Place,No.6_common_Place,No.7_common_Place,No.8_common_Place,No.9_common_Place,No.10_common_Place
0,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Cheese Shop,Farmers Market,Restaurant,Café,Beer Bar,Bakery,Lounge
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Nightclub,Breakfast Spot,Yoga Studio,Italian Restaurant,Pet Store,Performing Arts Venue,Restaurant,Climbing Gym
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Gym / Fitness Center,Brewery,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Park,Comic Shop,Restaurant
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Rental Car Location,Airport Food Court,Airport Gate,Airport Terminal,Plane,Harbor / Marina,Airport,Sculpture Garden
4,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Burger Joint,Salad Place,Japanese Restaurant,Department Store,Bubble Tea Shop,Discount Store


In [81]:
torento_onehot_grouped.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.066667,0.066667,0.066667,0.133333,0.2,0.066667,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.015385,0.0,0.0,0.015385,0.0,0.0,0.015385


In [82]:
#Training
from  sklearn.cluster import KMeans

#set no of clusters
n_cluster=5
#set gtraining Data
training_Data=torento_onehot_grouped.drop("Neighborhood",axis=1)
#Training the model
cluster_kmean=KMeans(n_clusters=n_cluster,random_state=0).fit(training_Data)
cluster_kmean

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=0, tol=0.0001, verbose=0)

In [83]:
#check the labels
cluster_kmean.labels_

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [84]:
#adding cluster into venues tables
neighborhoods_venues_sorted.insert(0,"cluster_lablel",cluster_kmean.labels_)

In [85]:
neighborhoods_venues_sorted

Unnamed: 0,cluster_lablel,Neighborhood,No.1_common_Place,No.2_common_Place,No.3_common_Place,No.4_common_Place,No.5_common_Place,No.6_common_Place,No.7_common_Place,No.8_common_Place,No.9_common_Place,No.10_common_Place
0,0,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Cheese Shop,Farmers Market,Restaurant,Café,Beer Bar,Bakery,Lounge
1,0,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Nightclub,Breakfast Spot,Yoga Studio,Italian Restaurant,Pet Store,Performing Arts Venue,Restaurant,Climbing Gym
2,0,"Business reply mail Processing Centre, South C...",Light Rail Station,Gym / Fitness Center,Brewery,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Park,Comic Shop,Restaurant
3,0,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Rental Car Location,Airport Food Court,Airport Gate,Airport Terminal,Plane,Harbor / Marina,Airport,Sculpture Garden
4,0,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Burger Joint,Salad Place,Japanese Restaurant,Department Store,Bubble Tea Shop,Discount Store
5,0,Christie,Grocery Store,Café,Park,Baby Store,Italian Restaurant,Nightclub,Diner,Athletics & Sports,Coffee Shop,Candy Store
6,0,Church and Wellesley,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Yoga Studio,Bubble Tea Shop,Hotel,Café,Men's Store
7,0,"Commerce Court, Victoria Hotel",Coffee Shop,Café,Restaurant,Hotel,Gym,American Restaurant,Japanese Restaurant,Seafood Restaurant,Bar,Deli / Bodega
8,0,Davisville,Pizza Place,Sandwich Place,Dessert Shop,Café,Gym,Coffee Shop,Sushi Restaurant,Italian Restaurant,Park,Indoor Play Area
9,0,Davisville North,Hotel,Breakfast Spot,Department Store,Food & Drink Shop,Sandwich Place,Park,Gym,Pizza Place,Mediterranean Restaurant,Men's Store


In [86]:
torento_merged=toronto_df2.copy()

In [87]:
torento_merged=pd.merge(torento_merged,neighborhoods_venues_sorted,on="Neighborhood")

In [88]:
torento_merged.set_index("PostalCode",drop=True,inplace=True)

In [89]:
torento_merged

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,cluster_lablel,No.1_common_Place,No.2_common_Place,No.3_common_Place,No.4_common_Place,No.5_common_Place,No.6_common_Place,No.7_common_Place,No.8_common_Place,No.9_common_Place,No.10_common_Place
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Trail,Health Food Store,Pub,Afghan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Miscellaneous Shop,Middle Eastern Restaurant,Mexican Restaurant
M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Furniture / Home Store,Ice Cream Shop,Restaurant,Fruit & Vegetable Store,Frozen Yogurt Shop,Bookstore,Liquor Store
M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,0,Park,Burrito Place,Food & Drink Shop,Sushi Restaurant,Sandwich Place,Restaurant,Italian Restaurant,Movie Theater,Light Rail Station,Pub
M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Brewery,American Restaurant,Bakery,Gastropub,Gay Bar,Cheese Shop,Clothing Store,Comfort Food Restaurant
M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,Park,Lawyer,Bus Line,Swim School,Afghan Restaurant,Moroccan Restaurant,Martial Arts School,Mediterranean Restaurant,Men's Store,Metro Station
M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Hotel,Breakfast Spot,Department Store,Food & Drink Shop,Sandwich Place,Park,Gym,Pizza Place,Mediterranean Restaurant,Men's Store
M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678,0,Clothing Store,Coffee Shop,Yoga Studio,Sporting Goods Shop,Ice Cream Shop,Fast Food Restaurant,Metro Station,Mexican Restaurant,Diner,Park
M4S,Central Toronto,Davisville,43.704324,-79.38879,0,Pizza Place,Sandwich Place,Dessert Shop,Café,Gym,Coffee Shop,Sushi Restaurant,Italian Restaurant,Park,Indoor Play Area
M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,1,Tennis Court,Afghan Restaurant,Moroccan Restaurant,Lounge,Market,Martial Arts School,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant
M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,0,Coffee Shop,Pub,Restaurant,Fried Chicken Joint,Liquor Store,Pizza Place,Sushi Restaurant,Vietnamese Restaurant,Light Rail Station,American Restaurant


In [90]:
torento_merged.columns

Index(['Borough', 'Neighborhood', 'Latitude', 'Longitude', 'cluster_lablel',
       'No.1_common_Place', 'No.2_common_Place', 'No.3_common_Place',
       'No.4_common_Place', 'No.5_common_Place', 'No.6_common_Place',
       'No.7_common_Place', 'No.8_common_Place', 'No.9_common_Place',
       'No.10_common_Place'],
      dtype='object')

In [91]:
# create map of Torronto using latitude and longitude values
map_toronto = folium.Map(location=[torento_merged["Latitude"][0], torento_merged["Longitude"][0]], zoom_start=10)

# set color scheme for the clusters
x = np.arange(n_cluster)
ys = [i + x + (i*x)**2 for i in range(n_cluster)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to map
for lat, lng,neighborhood,cluster_label in zip(torento_merged['Latitude'], torento_merged['Longitude'], torento_merged['Neighborhood'],torento_merged["cluster_lablel"]):
    
    label = folium.Popup(str(neighborhood)+"cluster\n"+str(cluster_label), parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster_label],
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto) 
map_toronto

# Examine Clusters

## Cluster 1

In [92]:
torento_merged[torento_merged["cluster_lablel"]== 0]

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,cluster_lablel,No.1_common_Place,No.2_common_Place,No.3_common_Place,No.4_common_Place,No.5_common_Place,No.6_common_Place,No.7_common_Place,No.8_common_Place,No.9_common_Place,No.10_common_Place
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Trail,Health Food Store,Pub,Afghan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Miscellaneous Shop,Middle Eastern Restaurant,Mexican Restaurant
M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Furniture / Home Store,Ice Cream Shop,Restaurant,Fruit & Vegetable Store,Frozen Yogurt Shop,Bookstore,Liquor Store
M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,0,Park,Burrito Place,Food & Drink Shop,Sushi Restaurant,Sandwich Place,Restaurant,Italian Restaurant,Movie Theater,Light Rail Station,Pub
M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Brewery,American Restaurant,Bakery,Gastropub,Gay Bar,Cheese Shop,Clothing Store,Comfort Food Restaurant
M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,Park,Lawyer,Bus Line,Swim School,Afghan Restaurant,Moroccan Restaurant,Martial Arts School,Mediterranean Restaurant,Men's Store,Metro Station
M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Hotel,Breakfast Spot,Department Store,Food & Drink Shop,Sandwich Place,Park,Gym,Pizza Place,Mediterranean Restaurant,Men's Store
M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678,0,Clothing Store,Coffee Shop,Yoga Studio,Sporting Goods Shop,Ice Cream Shop,Fast Food Restaurant,Metro Station,Mexican Restaurant,Diner,Park
M4S,Central Toronto,Davisville,43.704324,-79.38879,0,Pizza Place,Sandwich Place,Dessert Shop,Café,Gym,Coffee Shop,Sushi Restaurant,Italian Restaurant,Park,Indoor Play Area
M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,0,Coffee Shop,Pub,Restaurant,Fried Chicken Joint,Liquor Store,Pizza Place,Sushi Restaurant,Vietnamese Restaurant,Light Rail Station,American Restaurant
M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675,0,Pizza Place,Coffee Shop,Italian Restaurant,Bakery,Market,Pub,Restaurant,Café,Snack Place,Beer Store


## Cluster 2

In [93]:
torento_merged[torento_merged["cluster_lablel"]== 1]

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,cluster_lablel,No.1_common_Place,No.2_common_Place,No.3_common_Place,No.4_common_Place,No.5_common_Place,No.6_common_Place,No.7_common_Place,No.8_common_Place,No.9_common_Place,No.10_common_Place
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,1,Tennis Court,Afghan Restaurant,Moroccan Restaurant,Lounge,Market,Martial Arts School,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant


## Cluster 3

In [94]:
torento_merged[torento_merged["cluster_lablel"]== 2]

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,cluster_lablel,No.1_common_Place,No.2_common_Place,No.3_common_Place,No.4_common_Place,No.5_common_Place,No.6_common_Place,No.7_common_Place,No.8_common_Place,No.9_common_Place,No.10_common_Place
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M5N,Central Toronto,Roselawn,43.711695,-79.416936,2,Garden,Afghan Restaurant,Movie Theater,Market,Martial Arts School,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant


## Cluster 4

In [95]:

torento_merged[torento_merged["cluster_lablel"]== 3]

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,cluster_lablel,No.1_common_Place,No.2_common_Place,No.3_common_Place,No.4_common_Place,No.5_common_Place,No.6_common_Place,No.7_common_Place,No.8_common_Place,No.9_common_Place,No.10_common_Place
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307,3,Jewelry Store,Mexican Restaurant,Trail,Sushi Restaurant,Afghan Restaurant,Moroccan Restaurant,Market,Martial Arts School,Mediterranean Restaurant,Men's Store


## Cluster 5

In [96]:
torento_merged[torento_merged["cluster_lablel"]== 4]

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,cluster_lablel,No.1_common_Place,No.2_common_Place,No.3_common_Place,No.4_common_Place,No.5_common_Place,No.6_common_Place,No.7_common_Place,No.8_common_Place,No.9_common_Place,No.10_common_Place
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,4,Park,Playground,Trail,Afghan Restaurant,Moroccan Restaurant,Market,Martial Arts School,Mediterranean Restaurant,Men's Store,Metro Station


### Observations: 

Most of the Neighbourhoods fall into Cluster 1 which are business areas with a lot of Cafes, restaurants and bars. Cluster 2,3 and 5 are just individual Tennis Courts, Garden and Parks respectively. While Cluster 4 is a Jewlery Store.