## Importing all the modules...

In [1]:
import numpy as np
import pandas as pd
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors

## Importing raw postal code data.

In [2]:
k=pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M',header=0)

## Cleaning.

In [3]:
code=pd.DataFrame(k[0])
code

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [4]:
code=code[code['Borough']!='Not assigned']
code=code.dropna()
code.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
code = code.sort_values(by=['Postal Code','Borough'])
code.reset_index(inplace=True)
code.drop('index',axis=1,inplace=True)
code.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## Number of rows in the data-frame

In [6]:
code.shape[0]

103

## Importing geospatial data.

In [7]:
gdata = pd.read_csv('http://cocl.us/Geospatial_data')
gdata.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Merged data

In [8]:
fdata=pd.merge(code,gdata,on='Postal Code')

In [9]:
fdata.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Boroughs in Toronto only.

In [10]:
toro=fdata[fdata['Borough'].str.contains(pat='Toronto')]
toro.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


(39, 5)

## Coordinates of Toronto

In [11]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="ca_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


## Map of the boroughs

In [12]:
import folium

In [13]:
maptoro = folium.Map(location=[latitude,longitude],zoom_start=11)
for lat,lon,label in zip(fdata['Latitude'],fdata['Longitude'],fdata['Neighborhood']):
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat,lon],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='yellow',
        fill_opacity=1.6,
        parse_html=False).add_to(maptoro)
maptoro

In [14]:
CLIENT_ID = '' 
CLIENT_SECRET = ''
VERSION = '20180605'
radius=500
LIMIT=300

## Get details of the venues

In [15]:
def getvenues(names, latitudes, longitudes, radius=500):
    vlist=[]
    for name,la,lo in zip(names,latitudes,longitudes):
        url='https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit{}'.format(CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            la, 
            lo, 
            radius, 
            LIMIT)
        res=requests.get(url).json()["response"]['groups'][0]['items']
        vlist.append([(
            name, 
            la, 
            lo, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in res])
    nvenues = pd.DataFrame([item for vlist in vlist for item in vlist])
    nvenues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return (nvenues)

In [16]:
torrovenues = getvenues(names=toro['Neighborhood'],
                                   latitudes=toro['Latitude'],
                                   longitudes=toro['Longitude']
                                  )

In [17]:
k=torrovenues

In [34]:
torrovenues.shape

(861, 7)

In [18]:
torrovenues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,30,30,30,30,30,30
"Brockton, Parkdale Village, Exhibition Place",24,24,24,24,24,24
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",17,17,17,17,17,17
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",17,17,17,17,17,17
Central Bay Street,30,30,30,30,30,30
Christie,17,17,17,17,17,17
Church and Wellesley,30,30,30,30,30,30
"Commerce Court, Victoria Hotel",30,30,30,30,30,30
Davisville,30,30,30,30,30,30
Davisville North,8,8,8,8,8,8


## One-hot encoding

In [19]:
print('Numer of unique categories:- {}'.format(len(torrovenues['Venue Category'].unique())))

Numer of unique categories:- 193


In [39]:
# one hot encoding
torro_onehot = pd.get_dummies(torrovenues[['Venue Category']],prefix="", prefix_sep="")
#torro_onehot.columns=torro_onehot.columns.str.replace('Venue Category_','')
torro_onehot.drop(labels=['Neighborhood'],axis=1,inplace=True)
torro_onehot.insert(0,'Neighborhood',torrovenues[['Neighborhood']])

In [40]:
torro_onehot.head()

Unnamed: 0,Neighborhood,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [41]:
torrog=torro_onehot.groupby('Neighborhood').mean().reset_index()

In [62]:
torrog.head()

Unnamed: 0,Neighborhood,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,...,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333


## Most common venues

In [43]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [44]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = torrog['Neighborhood']

for ind in np.arange(torrog.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(torrog.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Cocktail Bar,Coffee Shop,Beer Bar,Seafood Restaurant,Jazz Club,Farmers Market,Basketball Stadium,Museum,Breakfast Spot,Restaurant
1,"Brockton, Parkdale Village, Exhibition Place",Café,Performing Arts Venue,Coffee Shop,Breakfast Spot,Bakery,Office,Convenience Store,Pet Store,Climbing Gym,Restaurant
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Yoga Studio,Auto Workshop,Brewery,Smoke Shop,Burrito Place,Restaurant,Farmers Market,Fast Food Restaurant,Skate Park
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Airport,Bar,Harbor / Marina,Plane,Rental Car Location,Sculpture Garden,Boutique
4,Central Bay Street,Coffee Shop,Café,Yoga Studio,Seafood Restaurant,Sandwich Place,Bubble Tea Shop,Ramen Restaurant,Poke Place,Chinese Restaurant,Modern European Restaurant


## Machine learning

In [45]:
from sklearn.cluster import KMeans

In [51]:
kclusters=5
torro_clust=torrog.drop('Neighborhood',axis=1)
kmeans=KMeans(n_clusters=kclusters,random_state=0)
kmeans.fit(torro_clust)
kmeans.labels_

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 1, 2, 0, 2,
       2, 2, 2, 2, 0, 4, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2], dtype=int32)

In [52]:
neighborhoods_venues_sorted.insert(0,'Labels',kmeans.labels_)

In [53]:
torrom=toro
torrom=torrom.join(neighborhoods_venues_sorted.set_index('Neighborhood'),on='Neighborhood')
torrom.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,M4E,East Toronto,The Beaches,43.676357,-79.293031,3,Trail,Health Food Store,Pub,Yoga Studio,Cuban Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,2,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Yoga Studio,Pizza Place,Brewery,Juice Bar,Bookstore,Restaurant,Spa
42,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,2,Fast Food Restaurant,Sandwich Place,Park,Ice Cream Shop,Steakhouse,Liquor Store,Sushi Restaurant,Movie Theater,Brewery,Fish & Chips Shop
43,M4M,East Toronto,Studio District,43.659526,-79.340923,2,Café,Coffee Shop,Bakery,Bookstore,Seafood Restaurant,Sandwich Place,Brewery,Cheese Shop,Pet Store,Yoga Studio
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,1,Park,Swim School,Bus Line,Deli / Bodega,Eastern European Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner


## Map after applying KMeans algorithm

In [55]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
markers_colors = []
for lat, lon, poi, cluster in zip(torrom['Latitude'], torrom['Longitude'], torrom['Neighborhood'], torrom['Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## List of all clusters....

In [56]:
torrom.loc[torrom['Labels'] == 0, torrom.columns[[1] + list(range(5, torrom.shape[1]))]]

Unnamed: 0,Borough,Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
48,Central Toronto,0,Park,Restaurant,Trail,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop,Department Store
50,Downtown Toronto,0,Park,Playground,Trail,Cuban Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop
64,Central Toronto,0,Jewelry Store,Trail,Sushi Restaurant,Park,Gas Station,Cuban Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store


In [57]:
torrom.loc[torrom['Labels'] == 1, torrom.columns[[1] + list(range(5, torrom.shape[1]))]]

Unnamed: 0,Borough,Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
44,Central Toronto,1,Park,Swim School,Bus Line,Deli / Bodega,Eastern European Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner


In [58]:
torrom.loc[torrom['Labels'] == 2, torrom.columns[[1] + list(range(5, torrom.shape[1]))]]

Unnamed: 0,Borough,Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
41,East Toronto,2,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Yoga Studio,Pizza Place,Brewery,Juice Bar,Bookstore,Restaurant,Spa
42,East Toronto,2,Fast Food Restaurant,Sandwich Place,Park,Ice Cream Shop,Steakhouse,Liquor Store,Sushi Restaurant,Movie Theater,Brewery,Fish & Chips Shop
43,East Toronto,2,Café,Coffee Shop,Bakery,Bookstore,Seafood Restaurant,Sandwich Place,Brewery,Cheese Shop,Pet Store,Yoga Studio
45,Central Toronto,2,Food & Drink Shop,Park,Pizza Place,Breakfast Spot,Gym / Fitness Center,Sandwich Place,Department Store,Hotel,Yoga Studio,Distribution Center
46,Central Toronto,2,Clothing Store,Coffee Shop,Sporting Goods Shop,Fast Food Restaurant,Ice Cream Shop,Diner,Mexican Restaurant,Park,Chinese Restaurant,Café
47,Central Toronto,2,Dessert Shop,Italian Restaurant,Sandwich Place,Café,Pizza Place,Coffee Shop,Sushi Restaurant,Gym,Indian Restaurant,Diner
49,Central Toronto,2,Coffee Shop,Pub,Fried Chicken Joint,Light Rail Station,Sushi Restaurant,Bagel Shop,Sports Bar,Bank,Liquor Store,Pizza Place
51,Downtown Toronto,2,Café,Restaurant,Italian Restaurant,Bakery,Coffee Shop,Taiwanese Restaurant,Caribbean Restaurant,Deli / Bodega,Bank,Playground
52,Downtown Toronto,2,Burger Joint,Restaurant,Ramen Restaurant,Café,Smoke Shop,Indian Restaurant,Ice Cream Shop,Juice Bar,Beer Bar,Breakfast Spot
53,Downtown Toronto,2,Coffee Shop,Park,Theater,Breakfast Spot,Bakery,Yoga Studio,Restaurant,Café,Pub,Chocolate Shop


In [59]:
torrom.loc[torrom['Labels'] == 3, torrom.columns[[1] + list(range(5, torrom.shape[1]))]]

Unnamed: 0,Borough,Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,East Toronto,3,Trail,Health Food Store,Pub,Yoga Studio,Cuban Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner


In [60]:
torrom.loc[torrom['Labels'] == 4, torrom.columns[[1] + list(range(5, torrom.shape[1]))]]

Unnamed: 0,Borough,Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,Central Toronto,4,Garden,Home Service,Yoga Studio,Dance Studio,Eastern European Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner


In [61]:
torrom.loc[torrom['Labels'] == 5, torrom.columns[[1] + list(range(5, torrom.shape[1]))]]

Unnamed: 0,Borough,Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
