# Toronto Neighbour Hoods
### *Important Note: This notebook contains all the code of the project and all steps
### *Also I have decided to not limit the boroughs to the ones including the word "Toronto", i will run it on all the database


# Part 1


First we import modules that we might need

In [2]:
import numpy as np
import pandas as pd
import requests
import json
import random
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from datetime import date
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium # map rendering library

Next we Grap the data and format it

In [3]:
URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
#Getting the data
rawDataRES = requests.get(URL)
rawData = rawDataRES.text
#Removing unimportant Parts
rawData =rawData.split("<table class=\"wikitable sortable\">\n<tbody>")[1]
rawData = rawData.split("</tbody></table>")[0]
rawData = rawData[79:]
rawData = rawData.replace("<td>","")
rawData = rawData.replace(",","~")
rawData = rawData.replace("\n</td>\n",",")
rawData = rawData.replace("\n</td></tr>\n<tr>","")
rawData = rawData.replace("\n</td></tr>","")


# Part 2

Then we start to put it in pandas dataframe

In [10]:
columnNames = ["PostalCode","Borough","Neighborhood","Latitude","Longitude"]
df = pd.DataFrame(columns=columnNames)
coodrinates = pd.read_csv("coordinates.csv")


for entry in rawData.splitlines():
    entry = entry.split(",")
    code = entry[0]
    borough = entry[1]
    neighbours = entry[2]
    if borough == "Not assigned":
        continue
    if neighbours == "Not assigned":
        neighbours = borough
    elif len(neighbours.split("~")) >1:
        neighbours = neighbours.replace("~",",")
    ll = coodrinates[coodrinates["Postal Code"] == code][["Latitude","Longitude"]]
    latitude = ll.iloc[0,0]
    longitude = ll.iloc[0,1]
    data = {
        "PostalCode":code,
        "Borough":borough,
        "Neighborhood":neighbours,
        "Latitude":latitude,
        "Longitude":longitude
    }
    df = df.append(data,ignore_index=True)

Finally , we view it

In [11]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [9]:
df.shape

(103, 3)

# Part 3

Now we need to get the coordinates of Toronto

In [13]:
address = "Toronto, Ontario"
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronot are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronot are 43.6534817, -79.3839347.


In [17]:
# create map using latitude and longitude values
torontoMap = folium.Map(location=[latitude, longitude], zoom_start=10.5)

# add markers to map
for lat, lng, label in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(torontoMap)  
    
torontoMap

In [22]:
client_id="XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
client_secret="XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
version = str(date.today()).replace("-","")

Following the same approach as in the NewYork analysis but with some variations

In [32]:
def getNearbyVenues(names,latitudes,longitudes,radius=500,LIMIT=100):
    venues_list = []
    for name, lat, lng in zip(names, latitudes, longitudes):
        url = f'https://api.foursquare.com/v2/venues/explore?&client_id={client_id}&client_secret={client_secret}&v={version}&ll={lat},{lng}&radius={radius}&limit={LIMIT}'
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [33]:
torontoVenues = getNearbyVenues(names=df['Neighborhood'],
    latitudes=df["Latitude"], longitudes=df["Longitude"])

Lets check what we got

In [34]:
torontoVenues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.332140,Park
1,Parkwoods,43.753259,-79.329656,649 Variety,43.754513,-79.331942,Convenience Store
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
...,...,...,...,...,...,...,...
2151,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Islington Florist & Nursery,43.630156,-79.518718,Flower Shop
2152,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Koala Tan Tanning Salon & Sunless Spa,43.631370,-79.519006,Tanning Salon
2153,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Value Village,43.631269,-79.518238,Thrift / Vintage Store
2154,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Kingsway Boxing Club,43.627254,-79.526684,Gym


In [37]:
# Now we create one hot encoding
toronto_onehot = pd.get_dummies(torontoVenues[["Venue Category"]])
toronto_onehot['Neighborhood']=torontoVenues["Neighborhood"]
fixedColumns = [toronto_onehot.columns[-1]]+list(toronto_onehot.columns[:-1])
toronto_onehot=toronto_onehot[fixedColumns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Venue Category_Accessories Store,Venue Category_Afghan Restaurant,Venue Category_Airport,Venue Category_Airport Food Court,Venue Category_Airport Lounge,Venue Category_Airport Service,Venue Category_Airport Terminal,Venue Category_American Restaurant,Venue Category_Antique Shop,...,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Video Game Store,Venue Category_Video Store,Venue Category_Vietnamese Restaurant,Venue Category_Warehouse Store,Venue Category_Wine Bar,Venue Category_Wine Shop,Venue Category_Wings Joint,Venue Category_Women's Store,Venue Category_Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [38]:
toronto_onehot.shape

(2156, 273)

In [42]:
toronto_grouped = toronto_onehot.groupby("Neighborhood").mean().reset_index()
toronto_grouped

Unnamed: 0_level_0,Venue Category_Accessories Store,Venue Category_Afghan Restaurant,Venue Category_Airport,Venue Category_Airport Food Court,Venue Category_Airport Lounge,Venue Category_Airport Service,Venue Category_Airport Terminal,Venue Category_American Restaurant,Venue Category_Antique Shop,Venue Category_Aquarium,...,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Video Game Store,Venue Category_Video Store,Venue Category_Vietnamese Restaurant,Venue Category_Warehouse Store,Venue Category_Wine Bar,Venue Category_Wine Shop,Venue Category_Wings Joint,Venue Category_Women's Store,Venue Category_Yoga Studio
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,...,0.0,0.0,0.000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Willowdale, Willowdale East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.000,0.029412,0.0,0.0,0.0,0.0,0.0,0.0
"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.125,0.000000,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
toronto_grouped.shape

(95, 272)

Code used to get most common venues

In [77]:
def return_most_common_venues(row, num_top_venues=10):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now to get the top 10 most Coomon Venues (Code was derivied from the Newyork Notebook)

In [91]:
columns = ['Neighborhood']
for ind in np.arange(10):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
neighborhood_Venues_Sorted = pd.DataFrame(columns=columns)
neighborhood_Venues_Sorted["Neighborhood"] = toronto_grouped.index
neighborhood_Venues_Sorted

Unnamed: 0,Neighborhood,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,,,,,,,,,,
1,"Alderwood, Long Branch",,,,,,,,,,
2,"Bathurst Manor, Wilson Heights, Downsview North",,,,,,,,,,
3,Bayview Village,,,,,,,,,,
4,"Bedford Park, Lawrence Manor East",,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
90,"Willowdale, Willowdale East",,,,,,,,,,
91,"Willowdale, Willowdale West",,,,,,,,,,
92,Woburn,,,,,,,,,,
93,Woodbine Heights,,,,,,,,,,


In [92]:
for i in range(neighborhood_Venues_Sorted.shape[0]):
    row = toronto_grouped.iloc[i,:-1]
    neighborhood_Venues_Sorted.iloc[i,1:]=return_most_common_venues(row)

In [93]:
neighborhood_Venues_Sorted

Unnamed: 0,Neighborhood,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Venue Category_Lounge,Venue Category_Latin American Restaurant,Venue Category_Breakfast Spot,Venue Category_Skating Rink,Venue Category_Dumpling Restaurant,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Drugstore
1,"Alderwood, Long Branch",Venue Category_Pizza Place,Venue Category_Gym,Venue Category_Coffee Shop,Venue Category_Pharmacy,Venue Category_Sandwich Place,Venue Category_Pub,Venue Category_Pool,Venue Category_Dog Run,Venue Category_Dim Sum Restaurant,Venue Category_Diner
2,"Bathurst Manor, Wilson Heights, Downsview North",Venue Category_Bank,Venue Category_Coffee Shop,Venue Category_Frozen Yogurt Shop,Venue Category_Ice Cream Shop,Venue Category_Supermarket,Venue Category_Middle Eastern Restaurant,Venue Category_Sushi Restaurant,Venue Category_Restaurant,Venue Category_Shopping Mall,Venue Category_Mobile Phone Shop
3,Bayview Village,Venue Category_Japanese Restaurant,Venue Category_Café,Venue Category_Bank,Venue Category_Chinese Restaurant,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Women's Store
4,"Bedford Park, Lawrence Manor East",Venue Category_Italian Restaurant,Venue Category_Coffee Shop,Venue Category_Restaurant,Venue Category_Sandwich Place,Venue Category_Hobby Shop,Venue Category_Pharmacy,Venue Category_Pizza Place,Venue Category_Pub,Venue Category_Café,Venue Category_Butcher
...,...,...,...,...,...,...,...,...,...,...,...
90,"Willowdale, Willowdale East",Venue Category_Ramen Restaurant,Venue Category_Sandwich Place,Venue Category_Sushi Restaurant,Venue Category_Pizza Place,Venue Category_Café,Venue Category_Restaurant,Venue Category_Coffee Shop,Venue Category_Ice Cream Shop,Venue Category_Hotel,Venue Category_Steakhouse
91,"Willowdale, Willowdale West",Venue Category_Bank,Venue Category_Pharmacy,Venue Category_Pizza Place,Venue Category_Coffee Shop,Venue Category_Women's Store,Venue Category_Donut Shop,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run
92,Woburn,Venue Category_Coffee Shop,Venue Category_Soccer Field,Venue Category_Korean Restaurant,Venue Category_Dumpling Restaurant,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Drugstore
93,Woodbine Heights,Venue Category_Park,Venue Category_Bus Stop,Venue Category_Skating Rink,Venue Category_Dance Studio,Venue Category_Video Store,Venue Category_Beer Store,Venue Category_Athletics & Sports,Venue Category_Curling Ice,Venue Category_Doner Restaurant,Venue Category_Distribution Center


## Now the Most important Part in the PART 3
# Clustring

In [139]:
kclusters = 5
toronto_grouped_clusters = toronto_grouped.drop("Neighborhood",1)
toronto_grouped_clusters
kmeans = KMeans(n_clusters=kclusters,random_state=0).fit(toronto_grouped_clusters)

kmeans.labels_[0:100]

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [150]:
neighborhood_Venues_Sorted.insert(0,'ClusterLables',kmeans.labels_)
df_merged = df

df_merged = df_merged.join(neighborhood_Venues_Sorted.set_index('Neighborhood'), on='Neighborhood')
df_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,ClusterLables,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,2.0,Venue Category_Food & Drink Shop,Venue Category_Park,Venue Category_Convenience Store,Venue Category_Women's Store,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Drugstore
1,M4A,North York,Victoria Village,43.725882,-79.315572,1.0,Venue Category_Hockey Arena,Venue Category_Coffee Shop,Venue Category_Portuguese Restaurant,Venue Category_Financial or Legal Service,Venue Category_French Restaurant,Venue Category_Pizza Place,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Dumpling Restaurant,Venue Category_Ethiopian Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,2.0,Venue Category_Coffee Shop,Venue Category_Pub,Venue Category_Park,Venue Category_Bakery,Venue Category_Breakfast Spot,Venue Category_Café,Venue Category_Theater,Venue Category_Shoe Store,Venue Category_Restaurant,Venue Category_Distribution Center
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,4.0,Venue Category_Furniture / Home Store,Venue Category_Clothing Store,Venue Category_Boutique,Venue Category_Event Space,Venue Category_Vietnamese Restaurant,Venue Category_Coffee Shop,Venue Category_Gift Shop,Venue Category_Donut Shop,Venue Category_Diner,Venue Category_Discount Store
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2.0,Venue Category_Coffee Shop,Venue Category_Diner,Venue Category_Park,Venue Category_Burrito Place,Venue Category_Bank,Venue Category_Bar,Venue Category_Beer Bar,Venue Category_Japanese Restaurant,Venue Category_Sandwich Place,Venue Category_Café
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944,1.0,Venue Category_River,Venue Category_Smoke Shop,Venue Category_Pool,Venue Category_Electronics Store,Venue Category_Ethiopian Restaurant,Venue Category_Eastern European Restaurant,Venue Category_Dumpling Restaurant,Venue Category_Drugstore,Venue Category_Department Store,Venue Category_Donut Shop
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160,3.0,Venue Category_Coffee Shop,Venue Category_Japanese Restaurant,Venue Category_Sushi Restaurant,Venue Category_Gay Bar,Venue Category_Restaurant,Venue Category_Mediterranean Restaurant,Venue Category_Dance Studio,Venue Category_Café,Venue Category_Pub,Venue Category_Men's Store
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,3.0,Venue Category_Light Rail Station,Venue Category_Skate Park,Venue Category_Garden Center,Venue Category_Recording Studio,Venue Category_Burrito Place,Venue Category_Fast Food Restaurant,Venue Category_Auto Workshop,Venue Category_Spa,Venue Category_Farmers Market,Venue Category_Restaurant
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509,2.0,Venue Category_Baseball Field,Venue Category_Women's Store,Venue Category_Drugstore,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Dumpling Restaurant,Venue Category_Field


In [156]:
clusterMap = folium.Map(location=[latitude,longitude],zoom_start=10.5)
x= np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
markers_colors = []

for lat, lon, poi, cluster in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Neighborhood'], df_merged['ClusterLables']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    if cluster not in [1.0,2.0,3.0,4.0]:
        continue
        
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(clusterMap)
       

In [157]:
clusterMap