In [1]:
!pip install folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 11.8MB/s eta 0:00:01
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/81/6d/31c83485189a2521a75b4130f1fee5364f772a0375f81afff619004e5237/branca-0.4.0-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1


In [2]:
import pandas as pd
import numpy as np
import json
import requests
from pandas.io.html import read_html
from pandas.io.json import json_normalize

In [3]:
#Extracting the data from the wiki
page= 'https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=945633050.' #permanent link to a specific revision to prevent issues from changes in formating
table= read_html(page, attrs={'class':'wikitable'}) #the class is obtainable by inspecting the table on the wiki
table=table[0]
table.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [4]:
#processing the data: removing any entry without a borough assigned
table=table[table['Borough']!="Not assigned"]
table=table.reset_index(drop=True)
table.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


In [5]:
#processing the data: setting any entry without a neighbourhood assigned to have it set to its borough
table['Neighbourhood']=table['Neighbourhood'].replace('Not assigned', table['Borough'])
table.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


In [6]:
#processing the data:grouping neighbourhoods by postcode
table=table.groupby(['Postcode','Borough'])['Neighbourhood'].apply(','.join).reset_index()
table.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


In [7]:
#getting the geographical coordinates from the csv file
! wget -q -O 'geodata.csv' 'http://cocl.us/Geospatial_data'
geodata=pd.read_csv('geodata.csv')
geodata.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
#fixing the postal code column to match the name in the other dataframe
geodata.rename(columns={'Postal Code':'Postcode'},inplace=True)
geodata.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:
#merging the geographical coordinates into the previous dataframe
dataframe = pd.merge(table, geodata, on='Postcode')
dataframe

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


In [10]:
#obtaining location data from Foursquare 
#the following cell contains client information for the Foursquare API and its contents are hidden. It sets the version to 2020/03/24 and a limit of 200 calls

In [11]:
{
    "tags": [
        "remove_input",
    ]
}
CLIENT_ID = 'ZP1KGGMHPVX411KN4GYL22Q3DPR42Y1SZ4003KBLADGMZ2BZ' # your Foursquare ID
CLIENT_SECRET = 'KJIJ22G0KRZVAXBN2WSYUATRERHY0NCOZ0ZYBECF3SLZKVTP' # your Foursquare Secret
VERSION = '20200324'
LIMIT = 200
print('Information set')

Information set


In [12]:
#we will use the average Neighbourhood coordinates to determine the latitude and Longitude in which to search for every Borough 
Borough_coordinates=dataframe.drop(['Postcode','Neighbourhood'], axis=1)
Borough_coordinates=Borough_coordinates.groupby(['Borough']).mean()
Borough_coordinates

Unnamed: 0_level_0,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1
Central Toronto,43.70198,-79.398954
Downtown Toronto,43.654597,-79.383972
East Toronto,43.669436,-79.324654
East York,43.700303,-79.335851
Etobicoke,43.660043,-79.542074
Mississauga,43.636966,-79.615819
North York,43.750727,-79.429338
Scarborough,43.766229,-79.249085
West Toronto,43.652653,-79.44929
York,43.690797,-79.472633


In [13]:
#For each borough we will run a query and format the data to a dataframe
#Central Toronto
latitude=Borough_coordinates.loc['Central Toronto','Latitude']
longitude=Borough_coordinates.loc['Central Toronto','Longitude']
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, LIMIT)
#getting the results to a json file
results = requests.get(url).json()
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
queryCT = json_normalize(venues)

#Get the relevant information and obtain the locations by distance to the coordinates, limiting to points in under 1000
queryCT.sort_values(by=['location.distance'], inplace=True)
queryCT.drop(queryCT[queryCT['location.distance'] > 1000].index, inplace=True)
relevant_columns=['name','location.distance','location.lat','location.lng','id','categories']
queryCT = queryCT.loc[:, relevant_columns]

#Filtering the categories
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return 0
    else:
        return categories_list[0]['name']

    
#Filtering the category for each row
queryCT['categories'] = queryCT.apply(get_category_type, axis=1)

#Cleaning column names by keeping only last term
queryCT.columns = [column.split('.')[-1] for column in queryCT.columns]

#Dropping any entry with category 'None' 
queryCT.drop(queryCT[queryCT.categories == 0].index, inplace=True)

queryCT.head()

Unnamed: 0,name,distance,lat,lng,id,categories
86,Optika,33,43.702283,-79.398928,4dcedc257d8b975f18d9eff1,Optical Shop
28,Threading Experts,58,43.70153,-79.399328,53902de5498e267601fa1c88,Check Cashing Service
75,Manor Road Pedestrian Bridge,74,43.702039,-79.399871,50e05e4ce4b0c48b61348cab,Bridge
119,TPH,80,43.70191,-79.39796,517ed6b2e4b04cd49103d80c,Office
58,Urban Health Group,89,43.70171,-79.397908,505dd541e4b0cc983457f5e5,Medical Center


In [14]:
#Downtown Toronto
latitude=Borough_coordinates.loc['Downtown Toronto','Latitude']
longitude=Borough_coordinates.loc['Downtown Toronto','Longitude']
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']

queryDT = json_normalize(venues)

queryDT.sort_values(by=['location.distance'], inplace=True)
queryDT.drop(queryDT[queryDT['location.distance'] > 1000].index, inplace=True)
relevant_columns=['name','location.distance','location.lat','location.lng','id','categories']
queryDT = queryDT.loc[:, relevant_columns]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return 0
    else:
        return categories_list[0]['name']

queryDT['categories'] = queryDT.apply(get_category_type, axis=1)

queryDT.columns = [column.split('.')[-1] for column in queryDT.columns]
 
queryDT.drop(queryDT[queryDT.categories == 0].index, inplace=True)

queryDT.head()

Unnamed: 0,name,distance,lat,lng,id,categories
3,The Secret Garden,33,43.654672,-79.384373,51ab95f2498ecb834c625c60,Garden
47,Skylounge @ One City Hall,44,43.654794,-79.384448,4bf062c624f020a11f0d684f,Lounge
13,Downtown Diversity Garden,45,43.65446,-79.38451,4fa27c83e4b08fa9e926deb4,Garden
33,Larry Sefton Park,45,43.654589,-79.383405,51803df8e4b0f7cc47bb1531,Plaza
43,Hester How Daycare,57,43.654111,-79.384225,53cd1b44498e310834ebaae5,Nursery School


In [15]:
#East Toronto
latitude=Borough_coordinates.loc['East Toronto','Latitude']
longitude=Borough_coordinates.loc['East Toronto','Longitude']
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']

queryET = json_normalize(venues)

queryET.sort_values(by=['location.distance'], inplace=True)
queryET.drop(queryET[queryET['location.distance'] > 1000].index, inplace=True)
relevant_columns=['name','location.distance','location.lat','location.lng','id','categories']
queryET = queryET.loc[:, relevant_columns]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return 0
    else:
        return categories_list[0]['name']

queryET['categories'] = queryET.apply(get_category_type, axis=1)

queryET.columns = [column.split('.')[-1] for column in queryET.columns]
 
queryET.drop(queryET[queryET.categories == 0].index, inplace=True)

queryET.head()

Unnamed: 0,name,distance,lat,lng,id,categories
114,Leonora's,61,43.669976,-79.324818,4dc831821fc72e8637734ee5,Building
109,Skellie's,87,43.668759,-79.324091,4b91aa27f964a52025ce33e3,Office
65,Uncomfortable Silence Recording Studios,149,43.670501,-79.32353,4cc4653e3d7fa1cddc97ab5f,General Entertainment
13,Booty Camp Fitness,178,43.66883,-79.3267,4c584ddbb1369521e9d0735a,Gym / Fitness Center
97,Buster's Hideout,192,43.668023,-79.326023,5157943ee4b065501599d99a,Other Nightlife


In [16]:
#East York
latitude=Borough_coordinates.loc['East York','Latitude']
longitude=Borough_coordinates.loc['East York','Longitude']
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']

queryEY = json_normalize(venues)

queryEY.sort_values(by=['location.distance'], inplace=True)
queryEY.drop(queryEY[queryEY['location.distance'] > 1000].index, inplace=True)
relevant_columns=['name','location.distance','location.lat','location.lng','id','categories']
queryEY = queryEY.loc[:, relevant_columns]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return 0
    else:
        return categories_list[0]['name']

queryEY['categories'] = queryEY.apply(get_category_type, axis=1)

queryEY.columns = [column.split('.')[-1] for column in queryEY.columns]
 
queryEY.drop(queryEY[queryEY.categories == 0].index, inplace=True)

queryEY.head()

Unnamed: 0,name,distance,lat,lng,id,categories
3,CIBC,135,43.701409,-79.336557,4cc5ff92c844721efd45f201,Bank
0,Elevated Wetlands Sculptures,203,43.701848,-79.337202,5287a3c3498ee5793a856cc0,Public Art
59,Shory Villa,264,43.698059,-79.336936,5153a1fce4b0bdbdf1a54bdd,Lounge
21,Marta's Pet Store,331,43.697324,-79.33576,50e4af7ee4b0869d101c3a92,Pet Store
24,Taylor Creek Park - West,348,43.70126,-79.331726,5743147d498e8ffce0a5dfb5,Park


In [17]:
#Etobicoke
latitude=Borough_coordinates.loc['Etobicoke','Latitude']
longitude=Borough_coordinates.loc['Etobicoke','Longitude']
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']

queryE = json_normalize(venues)

queryE.sort_values(by=['location.distance'], inplace=True)
queryE.drop(queryE[queryE['location.distance'] > 1000].index, inplace=True)
relevant_columns=['name','location.distance','location.lat','location.lng','id','categories']
queryE = queryE.loc[:, relevant_columns]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return 0
    else:
        return categories_list[0]['name']

queryE['categories'] = queryE.apply(get_category_type, axis=1)

queryE.columns = [column.split('.')[-1] for column in queryE.columns]
 
queryE.drop(queryE[queryE.categories == 0].index, inplace=True)

queryE.head()

Unnamed: 0,name,distance,lat,lng,id,categories
3,Rosethorn Junior School,21,43.65994,-79.541846,4eb1515bb8f74bbb23cb1fe5,School
1,Rosethorn Park,67,43.659923,-79.541247,4e99a4b4722e25a7f5e6bee1,Park
2,Softron Accounting & Tax Preparation,82,43.659504,-79.542773,4f352bb3e4b0993aed261836,Office
23,St. Gregory Catholic School,82,43.659922,-79.543084,4c1a6538b4e62d7f16bdd793,School
0,St. Gregory's Church,107,43.659968,-79.543409,4b61bad3f964a5202b1f2ae3,Church


In [18]:
#Mississauga
latitude=Borough_coordinates.loc['Mississauga','Latitude']
longitude=Borough_coordinates.loc['Mississauga','Longitude']
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']

queryM = json_normalize(venues)

queryM.sort_values(by=['location.distance'], inplace=True)
queryM.drop(queryM[queryM['location.distance'] > 1000].index, inplace=True)
relevant_columns=['name','location.distance','location.lat','location.lng','id','categories']
queryM = queryM.loc[:, relevant_columns]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return 0
    else:
        return categories_list[0]['name']

queryM['categories'] = queryM.apply(get_category_type, axis=1)

queryM.columns = [column.split('.')[-1] for column in queryM.columns]
 
queryM.drop(queryM[queryM.categories == 0].index, inplace=True)

queryM.head()

Unnamed: 0,name,distance,lat,lng,id,categories
0,Canada Post - Gateway,161,43.636985,-79.617824,4b0310f4f964a520654c22e3,Government Building
2,Canada Border Services,165,43.636814,-79.617867,50231791e4b00ac2d8261ce3,Office
23,Eggsmart,209,43.638361,-79.617566,554e32fc498e98c39019b30e,Breakfast Spot
87,Mississauga Transit Stop #2035,250,43.638748,-79.617708,4de94c9252b1741cdb195e54,Bus Stop
8,Wokker,258,43.634856,-79.617149,4bb4d98cd027ef3b84cf0c9d,Chinese Restaurant


In [19]:
#North York
latitude=Borough_coordinates.loc['North York','Latitude']
longitude=Borough_coordinates.loc['North York','Longitude']
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']

queryNY = json_normalize(venues)

queryNY.sort_values(by=['location.distance'], inplace=True)
queryNY.drop(queryNY[queryNY['location.distance'] > 1000].index, inplace=True)
relevant_columns=['name','location.distance','location.lat','location.lng','id','categories']
queryNY = queryNY.loc[:, relevant_columns]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return 0
    else:
        return categories_list[0]['name']

queryNY['categories'] = queryNY.apply(get_category_type, axis=1)

queryNY.columns = [column.split('.')[-1] for column in queryNY.columns]
 
queryNY.drop(queryNY[queryNY.categories == 0].index, inplace=True)

queryNY.head()

Unnamed: 0,name,distance,lat,lng,id,categories
12,"Zodiac - Camps, Swim & More",125,43.749651,-79.428889,595a27c0123a195de6b253c2,Recreation Center
1,Earl Bales Ski and Snowboard Centre,293,43.752631,-79.431865,4d9e237f7958f04df15426fa,Ski Chalet
24,North York Ski Centre,304,43.752711,-79.431953,4cc475e938aaa093e42b1362,Ski Area
97,Stonebrook Dental (Dr. Nathan Haas),367,43.751403,-79.433812,4c866223d92ea0932cbd6d72,Dentist's Office
73,Buding's Kitchen,499,43.749239,-79.435201,51b09ba7498ec43e8eb836ff,Gastropub


In [20]:
#Scarborough
latitude=Borough_coordinates.loc['Scarborough','Latitude']
longitude=Borough_coordinates.loc['Scarborough','Longitude']
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']

queryS = json_normalize(venues)

queryS.sort_values(by=['location.distance'], inplace=True)
queryS.drop(queryS[queryS['location.distance'] > 1000].index, inplace=True)
relevant_columns=['name','location.distance','location.lat','location.lng','id','categories']
queryS = queryS.loc[:, relevant_columns]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return 0
    else:
        return categories_list[0]['name']

queryS['categories'] = queryS.apply(get_category_type, axis=1)

queryS.columns = [column.split('.')[-1] for column in queryS.columns]
 
queryS.drop(queryS[queryS.categories == 0].index, inplace=True)

queryS.head()

Unnamed: 0,name,distance,lat,lng,id,categories
30,Telephone booth,307,43.767873,-79.252166,4be08aab4f15c9287b94cb0b,Electronics Store
99,Devils Playground,354,43.767892,-79.245331,5227ba8b11d21cef7b2f4250,Lounge
92,Green earth,396,43.76977,-79.249647,4cb4dec164998cfa39d309a2,Non-Profit
9,Hydro Corridor,467,43.766251,-79.243267,4f2300b8e4b04f6e67af7e84,Trail
114,But 'N' Ben,468,43.766636,-79.24329,4dc02bfe6a23e5a549e57e41,Butcher


In [21]:
#West Toronto
latitude=Borough_coordinates.loc['West Toronto','Latitude']
longitude=Borough_coordinates.loc['West Toronto','Longitude']
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']

queryWT = json_normalize(venues)

queryWT.sort_values(by=['location.distance'], inplace=True)
queryWT.drop(queryWT[queryWT['location.distance'] > 1000].index, inplace=True)
relevant_columns=['name','location.distance','location.lat','location.lng','id','categories']
queryWT = queryWT.loc[:, relevant_columns]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return 0
    else:
        return categories_list[0]['name']

queryWT['categories'] = queryWT.apply(get_category_type, axis=1)

queryWT.columns = [column.split('.')[-1] for column in queryWT.columns]
 
queryWT.drop(queryWT[queryWT.categories == 0].index, inplace=True)

queryWT.head()

Unnamed: 0,name,distance,lat,lng,id,categories
0,Bandit Brewery,13,43.652663,-79.449452,5723e997498e113c968a7411,Brewery
38,Tigon Auto Repair,26,43.652478,-79.449514,51954832498edb9cc161a3a5,Automotive Shop
31,BoardAgain Games,30,43.652475,-79.448998,56999c68498e49b6add47518,Gaming Cafe
81,Peggy Nash Campaign Office,35,43.652364,-79.44911,4dbdcfd36a23e294ba4ed57c,Voting Booth
4,Artistic Glass,38,43.652542,-79.44884,5b7ebe5a16ef67002c7d4845,Hardware Store


In [22]:
#York
latitude=Borough_coordinates.loc['York','Latitude']
longitude=Borough_coordinates.loc['York','Longitude']
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, LIMIT)

results = requests.get(url).json()

venues = results['response']['venues']

queryY = json_normalize(venues)

queryY.sort_values(by=['location.distance'], inplace=True)
queryY.drop(queryY[queryY['location.distance'] > 1000].index, inplace=True)
relevant_columns=['name','location.distance','location.lat','location.lng','id','categories']
queryY = queryY.loc[:, relevant_columns]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return 0
    else:
        return categories_list[0]['name']

queryY['categories'] = queryY.apply(get_category_type, axis=1)

queryY.columns = [column.split('.')[-1] for column in queryY.columns]
 
queryY.drop(queryY[queryY.categories == 0].index, inplace=True)

queryY.head()

Unnamed: 0,name,distance,lat,lng,id,categories
3,Z Bar & Grille,17,43.690673,-79.472764,5000f2b3e4b0ae2f16d5dad7,Bar
11,"Dr. Gayle Wagman, Dr. Michelle Crystal",33,43.691041,-79.472881,4f74c63be4b0bb6ea26d4822,Dentist's Office
4,2579 Eglinton Ave. West,41,43.690584,-79.473055,4e86249f82315e9e62ac7e7f,Building
0,Money Mart,54,43.690346,-79.472385,4b57464af964a520bd2e28e3,Bank
12,Oriental City Restaurant,56,43.690984,-79.471979,4bfd5768b68d0f4721a0e857,Chinese Restaurant


In [23]:
#As shown below the categories column presents several different results with low counts
queryY.groupby('categories')['name'].nunique()

categories
Auto Garage                                 1
Automotive Shop                             1
BBQ Joint                                   1
Bakery                                      1
Bank                                        4
Bar                                         2
Building                                    6
Caribbean Restaurant                        2
Casino                                      1
Chinese Restaurant                          1
Church                                      3
Coffee Shop                                 1
Convenience Store                           6
Cosmetics Shop                              1
Country Dance Club                          1
Courthouse                                  3
Coworking Space                             1
Dentist's Office                            2
Dessert Shop                                1
Discount Store                              1
Doctor's Office                             1
Elementary School      

In [24]:
#to improve analysis a group column will be created to provide more condensed data. Groups will be assigned with a crude text serch in tha categories columns
#if a category contains restaurant, bar, place or joint it will go to the Restaurant/Bar group, if it contains shop or store to the Store/Shop one, if none of those to Services

queryCT['group'] = pd.np.where(queryCT['categories'].str.contains("Restaurant"), 'Restaurant/Bar',
                    pd.np.where(queryCT['categories'].str.contains("Bar"), 'Restaurant/Bar',
                    pd.np.where(queryCT['categories'].str.contains("Joint"), 'Restaurant/Bar',
                    pd.np.where(queryCT['categories'].str.contains("Place"), 'Restaurant/Bar',
                    pd.np.where(queryCT['categories'].str.contains("Store"), 'Store/Shop',
                    pd.np.where(queryCT['categories'].str.contains("Shop"), 'Store/Shop',
                    "Service"))))))
queryDT['group'] = pd.np.where(queryDT['categories'].str.contains("Restaurant"), 'Restaurant/Bar',
                    pd.np.where(queryDT['categories'].str.contains("Bar"), 'Restaurant/Bar',
                    pd.np.where(queryDT['categories'].str.contains("Joint"), 'Restaurant/Bar',
                    pd.np.where(queryDT['categories'].str.contains("Place"), 'Restaurant/Bar',
                    pd.np.where(queryDT['categories'].str.contains("Store"), 'Store/Shop',
                    pd.np.where(queryDT['categories'].str.contains("Shop"), 'Store/Shop',
                    "Service"))))))
queryET['group'] = pd.np.where(queryET['categories'].str.contains("Restaurant"), 'Restaurant/Bar',
                    pd.np.where(queryET['categories'].str.contains("Bar"), 'Restaurant/Bar',
                    pd.np.where(queryET['categories'].str.contains("Joint"), 'Restaurant/Bar',
                    pd.np.where(queryET['categories'].str.contains("Place"), 'Restaurant/Bar',
                    pd.np.where(queryET['categories'].str.contains("Store"), 'Store/Shop',
                    pd.np.where(queryET['categories'].str.contains("Shop"), 'Store/Shop',
                    "Service"))))))
queryEY['group'] = pd.np.where(queryEY['categories'].str.contains("Restaurant"), 'Restaurant/Bar',
                    pd.np.where(queryEY['categories'].str.contains("Bar"), 'Restaurant/Bar',
                    pd.np.where(queryEY['categories'].str.contains("Joint"), 'Restaurant/Bar',
                    pd.np.where(queryEY['categories'].str.contains("Place"), 'Restaurant/Bar',
                    pd.np.where(queryEY['categories'].str.contains("Store"), 'Store/Shop',
                    pd.np.where(queryEY['categories'].str.contains("Shop"), 'Store/Shop',
                    "Service"))))))
queryE['group'] = pd.np.where(queryE['categories'].str.contains("Restaurant"), 'Restaurant/Bar',
                    pd.np.where(queryE['categories'].str.contains("Bar"), 'Restaurant/Bar',
                    pd.np.where(queryE['categories'].str.contains("Joint"), 'Restaurant/Bar',
                    pd.np.where(queryE['categories'].str.contains("Place"), 'Restaurant/Bar',
                    pd.np.where(queryE['categories'].str.contains("Store"), 'Store/Shop',
                    pd.np.where(queryE['categories'].str.contains("Shop"), 'Store/Shop',
                    "Service"))))))
queryM['group'] = pd.np.where(queryM['categories'].str.contains("Restaurant"), 'Restaurant/Bar',
                    pd.np.where(queryM['categories'].str.contains("Bar"), 'Restaurant/Bar',
                    pd.np.where(queryM['categories'].str.contains("Joint"), 'Restaurant/Bar',
                    pd.np.where(queryM['categories'].str.contains("Place"), 'Restaurant/Bar',
                    pd.np.where(queryM['categories'].str.contains("Store"), 'Store/Shop',
                    pd.np.where(queryM['categories'].str.contains("Shop"), 'Store/Shop',
                    "Service"))))))
queryNY['group'] = pd.np.where(queryNY['categories'].str.contains("Restaurant"), 'Restaurant/Bar',
                    pd.np.where(queryNY['categories'].str.contains("Bar"), 'Restaurant/Bar',
                    pd.np.where(queryNY['categories'].str.contains("Joint"), 'Restaurant/Bar',
                    pd.np.where(queryNY['categories'].str.contains("Place"), 'Restaurant/Bar',
                    pd.np.where(queryNY['categories'].str.contains("Store"), 'Store/Shop',
                    pd.np.where(queryNY['categories'].str.contains("Shop"), 'Store/Shop',
                    "Service"))))))
queryS['group'] = pd.np.where(queryS['categories'].str.contains("Restaurant"), 'Restaurant/Bar',
                    pd.np.where(queryS['categories'].str.contains("Bar"), 'Restaurant/Bar',
                    pd.np.where(queryS['categories'].str.contains("Joint"), 'Restaurant/Bar',
                    pd.np.where(queryS['categories'].str.contains("Place"), 'Restaurant/Bar',
                    pd.np.where(queryS['categories'].str.contains("Store"), 'Store/Shop',
                    pd.np.where(queryS['categories'].str.contains("Shop"), 'Store/Shop',
                    "Service"))))))
queryWT['group'] = pd.np.where(queryWT['categories'].str.contains("Restaurant"), 'Restaurant/Bar',
                    pd.np.where(queryWT['categories'].str.contains("Bar"), 'Restaurant/Bar',
                    pd.np.where(queryWT['categories'].str.contains("Joint"), 'Restaurant/Bar',
                    pd.np.where(queryWT['categories'].str.contains("Place"), 'Restaurant/Bar',
                    pd.np.where(queryWT['categories'].str.contains("Store"), 'Store/Shop',
                    pd.np.where(queryWT['categories'].str.contains("Shop"), 'Store/Shop',
                    "Service"))))))
queryY['group'] = pd.np.where(queryY['categories'].str.contains("Restaurant"), 'Restaurant/Bar',
                    pd.np.where(queryY['categories'].str.contains("Bar"), 'Restaurant/Bar',
                    pd.np.where(queryY['categories'].str.contains("Joint"), 'Restaurant/Bar',
                    pd.np.where(queryY['categories'].str.contains("Place"), 'Restaurant/Bar',
                    pd.np.where(queryY['categories'].str.contains("Store"), 'Store/Shop',
                    pd.np.where(queryY['categories'].str.contains("Shop"), 'Store/Shop',
                    "Service"))))))

In [25]:
#Now we can plot the different services on a map
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
#defined toronto geolocation as 43 and -79 based on the data
# create map

map_services = folium.Map(location=[43.7, -79.3], zoom_start=11)

#the folowing set of codes will be repeted for each borough

#For Central Toronto

#seting the color
color_list=[]
for group in queryCT['group']:
    if (group=='Restaurant/Bar'):
        i = 'green';
    elif (group=='Service'):
        i = 'blue';
    elif (group=='Store/Shop'):
        i = 'red';
    else:
        i='black';
    color_list.append(i)
#seting dummy numerical values for the groups
color_code=[]
for group in queryCT['group']:
    if (group=='Restaurant/Bar'):
        c = 2;
    elif (group=='Service'):
        c = 3;
    elif (group=='Store/Shop'):
        c = 4;
    else:
        c= 5;
    color_code.append(c)
#Placing the markers
for lat, lon, poi, group in zip(queryCT['lat'],queryCT['lng'], queryCT['name'],color_code):        
    label = folium.Popup(str(poi), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=color_list[group],
        fill=True,
        fill_color=color_list[group],
        fill_opacity=0.7).add_to(map_services)    


In [26]:
#For Downtown Toronto

#seting the color
color_list=[]
for group in queryDT['group']:
    if (group=='Restaurant/Bar'):
        i = 'green';
    elif (group=='Service'):
        i = 'blue';
    elif (group=='Store/Shop'):
        i = 'red';
    else:
        i='black';
    color_list.append(i)
#seting dummy numerical values for the groups
color_code=[]
for group in queryDT['group']:
    if (group=='Restaurant/Bar'):
        c = 2;
    elif (group=='Service'):
        c = 3;
    elif (group=='Store/Shop'):
        c = 4;
    else:
        c= 5;
    color_code.append(c)
    
#Placing the markers
for lat, lon, poi, group in zip(queryDT['lat'],queryDT['lng'], queryDT['name'],color_code):        
    label = folium.Popup(str(poi), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=color_list[group],
        fill=True,
        fill_color=color_list[group],
        fill_opacity=0.7).add_to(map_services)    


In [27]:
#For East Toronto

#seting the color
color_list=[]
for group in queryET['group']:
    if (group=='Restaurant/Bar'):
        i = 'green';
    elif (group=='Service'):
        i = 'blue';
    elif (group=='Store/Shop'):
        i = 'red';
    else:
        i='black';
    color_list.append(i)
#seting dummy numerical values for the groups
color_code=[]
for group in queryET['group']:
    if (group=='Restaurant/Bar'):
        c = 2;
    elif (group=='Service'):
        c = 3;
    elif (group=='Store/Shop'):
        c = 4;
    else:
        c= 5;
    color_code.append(c)
    
#Placing the markers
for lat, lon, poi, group in zip(queryET['lat'],queryET['lng'], queryET['name'],color_code):        
    label = folium.Popup(str(poi), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=color_list[group],
        fill=True,
        fill_color=color_list[group],
        fill_opacity=0.7).add_to(map_services)    



In [28]:
#For East York

#seting the color
color_list=[]
for group in queryEY['group']:
    if (group=='Restaurant/Bar'):
        i = 'green';
    elif (group=='Service'):
        i = 'blue';
    elif (group=='Store/Shop'):
        i = 'red';
    else:
        i='black';
    color_list.append(i)
#seting dummy numerical values for the groups
color_code=[]
for group in queryEY['group']:
    if (group=='Restaurant/Bar'):
        c = 2;
    elif (group=='Service'):
        c = 3;
    elif (group=='Store/Shop'):
        c = 4;
    else:
        c= 5;
    color_code.append(c)
#Placing the markers
for lat, lon, poi, group in zip(queryEY['lat'],queryEY['lng'], queryEY['name'],color_code):        
    label = folium.Popup(str(poi), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=color_list[group],
        fill=True,
        fill_color=color_list[group],
        fill_opacity=0.7).add_to(map_services)    



In [29]:
#For Etobicoke

#seting the color
color_list=[]
for group in queryE['group']:
    if (group=='Restaurant/Bar'):
        i = 'green';
    elif (group=='Service'):
        i = 'blue';
    elif (group=='Store/Shop'):
        i = 'red';
    else:
        i='black';
    color_list.append(i)
#seting dummy numerical values for the groups
color_code=[]
for group in queryE['group']:
    if (group=='Restaurant/Bar'):
        c = 2;
    elif (group=='Service'):
        c = 3;
    elif (group=='Store/Shop'):
        c = 4;
    else:
        c= 5;
    color_code.append(c)
#Placing the markers
for lat, lon, poi, group in zip(queryE['lat'],queryE['lng'], queryE['name'],color_code):        
    label = folium.Popup(str(poi), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=color_list[group],
        fill=True,
        fill_color=color_list[group],
        fill_opacity=0.7).add_to(map_services)    



In [30]:
#For Mississauga

#seting the color
color_list=[]
for group in queryM['group']:
    if (group=='Restaurant/Bar'):
        i = 'green';
    elif (group=='Service'):
        i = 'blue';
    elif (group=='Store/Shop'):
        i = 'red';
    else:
        i='black';
    color_list.append(i)
#seting dummy numerical values for the groups
color_code=[]
for group in queryM['group']:
    if (group=='Restaurant/Bar'):
        c = 2;
    elif (group=='Service'):
        c = 3;
    elif (group=='Store/Shop'):
        c = 4;
    else:
        c= 5;
    color_code.append(c)
#Placing the markers
for lat, lon, poi, group in zip(queryM['lat'],queryM['lng'], queryM['name'],color_code):        
    label = folium.Popup(str(poi), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=color_list[group],
        fill=True,
        fill_color=color_list[group],
        fill_opacity=0.7).add_to(map_services)    



In [31]:
#For North York

#seting the color
color_list=[]
for group in queryNY['group']:
    if (group=='Restaurant/Bar'):
        i = 'green';
    elif (group=='Service'):
        i = 'blue';
    elif (group=='Store/Shop'):
        i = 'red';
    else:
        i='black';
    color_list.append(i)
#seting dummy numerical values for the groups
color_code=[]
for group in queryNY['group']:
    if (group=='Restaurant/Bar'):
        c = 2;
    elif (group=='Service'):
        c = 3;
    elif (group=='Store/Shop'):
        c = 4;
    else:
        c= 5;
    color_code.append(c)
#Placing the markers
for lat, lon, poi, group in zip(queryNY['lat'],queryNY['lng'], queryNY['name'],color_code):        
    label = folium.Popup(str(poi), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=color_list[group],
        fill=True,
        fill_color=color_list[group],
        fill_opacity=0.7).add_to(map_services)    



In [32]:
#For Scarborough

#seting the color
color_list=[]
for group in queryS['group']:
    if (group=='Restaurant/Bar'):
        i = 'green';
    elif (group=='Service'):
        i = 'blue';
    elif (group=='Store/Shop'):
        i = 'red';
    else:
        i='black';
    color_list.append(i)
#seting dummy numerical values for the groups
color_code=[]
for group in queryS['group']:
    if (group=='Restaurant/Bar'):
        c = 2;
    elif (group=='Service'):
        c = 3;
    elif (group=='Store/Shop'):
        c = 4;
    else:
        c= 5;
    color_code.append(c)
#Placing the markers
for lat, lon, poi, group in zip(queryS['lat'],queryS['lng'], queryS['name'],color_code):        
    label = folium.Popup(str(poi), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=color_list[group],
        fill=True,
        fill_color=color_list[group],
        fill_opacity=0.7).add_to(map_services)    



In [33]:
#For West Toronto

#seting the color
color_list=[]
for group in queryWT['group']:
    if (group=='Restaurant/Bar'):
        i = 'green';
    elif (group=='Service'):
        i = 'blue';
    elif (group=='Store/Shop'):
        i = 'red';
    else:
        i='black';
    color_list.append(i)
#seting dummy numerical values for the groups
color_code=[]
for group in queryWT['group']:
    if (group=='Restaurant/Bar'):
        c = 2;
    elif (group=='Service'):
        c = 3;
    elif (group=='Store/Shop'):
        c = 4;
    else:
        c= 5;
    color_code.append(c)
#Placing the markers
for lat, lon, poi, group in zip(queryWT['lat'],queryWT['lng'], queryWT['name'],color_code):        
    label = folium.Popup(str(poi), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=color_list[group],
        fill=True,
        fill_color=color_list[group],
        fill_opacity=0.7).add_to(map_services)    



In [34]:
#For York 

#seting the color
color_list=[]
for group in queryY['group']:
    if (group=='Restaurant/Bar'):
        i = 'green';
    elif (group=='Service'):
        i = 'blue';
    elif (group=='Store/Shop'):
        i = 'red';
    else:
        i='black';
    color_list.append(i)
#seting dummy numerical values for the groups
color_code=[]
for group in queryY['group']:
    if (group=='Restaurant/Bar'):
        c = 2;
    elif (group=='Service'):
        c = 3;
    elif (group=='Store/Shop'):
        c = 4;
    else:
        c= 5;
    color_code.append(c)
#Placing the markers
for lat, lon, poi, group in zip(queryY['lat'],queryY['lng'], queryY['name'],color_code):        
    label = folium.Popup(str(poi), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=3,
        popup=label,
        color=color_list[group],
        fill=True,
        fill_color=color_list[group],
        fill_opacity=0.7).add_to(map_services)    

In [35]:
#Printing the map
map_services

In [36]:
#As we can see the central part for each borough has distinct characteristics, as such we will use a logistic regression algorithm to try and predict what type of venue can be found in certain location
#A Multinomial Logistic Regression with a softmax function will be used
import pandas as pd
import numpy as np
import pylab
import scipy.optimize
import matplotlib.pyplot
from sklearn import preprocessing
from sklearn import linear_model
from sklearn import metrics
from sklearn.model_selection import train_test_split
%matplotlib inline
#For Central Toronto
#Setting dummy numerical values for the groups
groups_dummy=[]
for group in queryCT['group']:
    if (group=='Restaurant/Bar'):
        c = 1;
    elif (group=='Service'):
        c = 2;
    elif (group=='Store/Shop'):
        c = 3;
    else:
        c= 4;
    groups_dummy.append(c)
#Normalizing the data
x=queryCT[['lat','lng']].values.astype(float)
X=preprocessing.StandardScaler().fit(x).transform(x)
#Making a train/test split on the data
train_x, test_x, train_y, test_y = train_test_split(X,groups_dummy, train_size=0.7,random_state=3)
#Training a multinomial logistic regression model
multCT=linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(train_x, train_y)



In [37]:
#Downtown Toronto
#Setting dummy numerical values for the groups
groups_dummy=[]
for group in queryDT['group']:
    if (group=='Restaurant/Bar'):
        c = 1;
    elif (group=='Service'):
        c = 2;
    elif (group=='Store/Shop'):
        c = 3;
    else:
        c= 4;
    groups_dummy.append(c)
#Normalizing the data
x=queryDT[['lat','lng']].values.astype(float)
X=preprocessing.StandardScaler().fit(x).transform(x)
#Making a train/test split on the data
train_x, test_x, train_y, test_y = train_test_split(X,groups_dummy, train_size=0.7,random_state=3)
#Training a multinomial logistic regression model
multDT=linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(train_x, train_y)



In [38]:
#East Toronto
#Setting dummy numerical values for the groups
groups_dummy=[]
for group in queryET['group']:
    if (group=='Restaurant/Bar'):
        c = 1;
    elif (group=='Service'):
        c = 2;
    elif (group=='Store/Shop'):
        c = 3;
    else:
        c= 4;
    groups_dummy.append(c)
#Normalizing the data
x=queryET[['lat','lng']].values.astype(float)
X=preprocessing.StandardScaler().fit(x).transform(x)
#Making a train/test split on the data
train_x, test_x, train_y, test_y = train_test_split(X,groups_dummy, train_size=0.7,random_state=3)
#Training a multinomial logistic regression model
multET=linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(train_x, train_y)



In [39]:
#East York
#Setting dummy numerical values for the groups
groups_dummy=[]
for group in queryEY['group']:
    if (group=='Restaurant/Bar'):
        c = 1;
    elif (group=='Service'):
        c = 2;
    elif (group=='Store/Shop'):
        c = 3;
    else:
        c= 4;
    groups_dummy.append(c)
#Normalizing the data
x=queryEY[['lat','lng']].values.astype(float)
X=preprocessing.StandardScaler().fit(x).transform(x)
#Making a train/test split on the data
train_x, test_x, train_y, test_y = train_test_split(X,groups_dummy, train_size=0.7,random_state=3)
#Training a multinomial logistic regression model
multEY=linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(train_x, train_y)



In [40]:
#Etobicoke
#Setting dummy numerical values for the groups
groups_dummy=[]
for group in queryE['group']:
    if (group=='Restaurant/Bar'):
        c = 1;
    elif (group=='Service'):
        c = 2;
    elif (group=='Store/Shop'):
        c = 3;
    else:
        c= 4;
    groups_dummy.append(c)
#Normalizing the data
x=queryE[['lat','lng']].values.astype(float)
X=preprocessing.StandardScaler().fit(x).transform(x)
#Making a train/test split on the data
train_x, test_x, train_y, test_y = train_test_split(X,groups_dummy, train_size=0.7,random_state=3)
#Training a multinomial logistic regression model
multE=linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(train_x, train_y)



In [41]:
#Mississauga
#Setting dummy numerical values for the groups
groups_dummy=[]
for group in queryM['group']:
    if (group=='Restaurant/Bar'):
        c = 1;
    elif (group=='Service'):
        c = 2;
    elif (group=='Store/Shop'):
        c = 3;
    else:
        c= 4;
    groups_dummy.append(c)
#Normalizing the data
x=queryM[['lat','lng']].values.astype(float)
X=preprocessing.StandardScaler().fit(x).transform(x)
#Making a train/test split on the data
train_x, test_x, train_y, test_y = train_test_split(X,groups_dummy, train_size=0.7,random_state=3)
#Training a multinomial logistic regression model
multM=linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(train_x, train_y)



In [42]:
#North York
#Setting dummy numerical values for the groups
groups_dummy=[]
for group in queryNY['group']:
    if (group=='Restaurant/Bar'):
        c = 1;
    elif (group=='Service'):
        c = 2;
    elif (group=='Store/Shop'):
        c = 3;
    else:
        c= 4;
    groups_dummy.append(c)
#Normalizing the data
x=queryNY[['lat','lng']].values.astype(float)
X=preprocessing.StandardScaler().fit(x).transform(x)
#Making a train/test split on the data
train_x, test_x, train_y, test_y = train_test_split(X,groups_dummy, train_size=0.7,random_state=3)
#Training a multinomial logistic regression model
multNY=linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(train_x, train_y)



In [43]:
#Scarborough
#Setting dummy numerical values for the groups
groups_dummy=[]
for group in queryS['group']:
    if (group=='Restaurant/Bar'):
        c = 1;
    elif (group=='Service'):
        c = 2;
    elif (group=='Store/Shop'):
        c = 3;
    else:
        c= 4;
    groups_dummy.append(c)
#Normalizing the data
x=queryS[['lat','lng']].values.astype(float)
X=preprocessing.StandardScaler().fit(x).transform(x)
#Making a train/test split on the data
train_x, test_x, train_y, test_y = train_test_split(X,groups_dummy, train_size=0.7,random_state=3)
#Training a multinomial logistic regression model
multS=linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(train_x, train_y)



In [44]:
#West Toronto
#Setting dummy numerical values for the groups
groups_dummy=[]
for group in queryWT['group']:
    if (group=='Restaurant/Bar'):
        c = 1;
    elif (group=='Service'):
        c = 2;
    elif (group=='Store/Shop'):
        c = 3;
    else:
        c= 4;
    groups_dummy.append(c)
#Normalizing the data
x=queryWT[['lat','lng']].values.astype(float)
X=preprocessing.StandardScaler().fit(x).transform(x)
#Making a train/test split on the data
train_x, test_x, train_y, test_y = train_test_split(X,groups_dummy, train_size=0.7,random_state=3)
#Training a multinomial logistic regression model
multWT=linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(train_x, train_y)



In [45]:
#York#Setting dummy numerical values for the groups
groups_dummy=[]
for group in queryY['group']:
    if (group=='Restaurant/Bar'):
        c = 1;
    elif (group=='Service'):
        c = 2;
    elif (group=='Store/Shop'):
        c = 3;
    else:
        c= 4;
    groups_dummy.append(c)
#Normalizing the data
x=queryY[['lat','lng']].values.astype(float)
X=preprocessing.StandardScaler().fit(x).transform(x)
#Making a train/test split on the data
train_x, test_x, train_y, test_y = train_test_split(X,groups_dummy, train_size=0.7,random_state=3)
#Training a multinomial logistic regression model
multY=linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(train_x, train_y)



In [46]:
#For all boroughs
queryB=(((((((((queryCT.append(queryDT)).append(queryET)).append(queryEY)).append(queryE)).append(queryM)).append(queryNY)).append(queryS)).append(queryWT)).append(queryY))
#Setting dummy numerical values for the groups
groups_dummy=[]
for group in queryB['group']:
    if (group=='Restaurant/Bar'):
        c = 1;
    elif (group=='Service'):
        c = 2;
    elif (group=='Store/Shop'):
        c = 3;
    else:
        c= 4;
    groups_dummy.append(c)
#Normalizing the data
x=queryB[['lat','lng']].values.astype(float)
X=preprocessing.StandardScaler().fit(x).transform(x)
#Making a train/test split on the data
train_x, test_x, train_y, test_y = train_test_split(X,groups_dummy, train_size=0.7,random_state=3)
#Training a multinomial logistic regression model
multB=linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(train_x, train_y)



In [47]:
#Checking the accuracy for the training and testing sets for all boroughs
trainCT=metrics.accuracy_score(train_y, multCT.predict(train_x))
testCT=metrics.accuracy_score(test_y, multCT.predict(test_x))
trainDT=metrics.accuracy_score(train_y, multDT.predict(train_x))
testDT=metrics.accuracy_score(test_y, multDT.predict(test_x))
trainET=metrics.accuracy_score(train_y, multET.predict(train_x))
testET=metrics.accuracy_score(test_y, multET.predict(test_x))
trainEY=metrics.accuracy_score(train_y, multEY.predict(train_x))
testEY=metrics.accuracy_score(test_y, multEY.predict(test_x))
trainE=metrics.accuracy_score(train_y, multE.predict(train_x))
testE=metrics.accuracy_score(test_y, multE.predict(test_x))
trainM=metrics.accuracy_score(train_y, multM.predict(train_x))
testM=metrics.accuracy_score(test_y, multM.predict(test_x))
trainNY=metrics.accuracy_score(train_y, multNY.predict(train_x))
testNY=metrics.accuracy_score(test_y, multNY.predict(test_x))
trainS=metrics.accuracy_score(train_y, multS.predict(train_x))
testS=metrics.accuracy_score(test_y, multS.predict(test_x))
trainWT=metrics.accuracy_score(train_y, multWT.predict(train_x))
testWT=metrics.accuracy_score(test_y, multWT.predict(test_x))
trainY=metrics.accuracy_score(train_y, multY.predict(train_x))
testY=metrics.accuracy_score(test_y, multY.predict(test_x))
trainB=metrics.accuracy_score(train_y, multB.predict(train_x))
testB=metrics.accuracy_score(test_y, multB.predict(test_x))
data=[[trainCT,testCT],
     [trainDT,testDT],
     [trainET,testET],
     [trainEY,testEY],
     [trainE,testE],
     [trainM,testM],
     [trainNY,testNY],
     [trainS,testS],
     [trainWT,testWT],
     [trainY,testY],
     [trainB,testB]]
Accuracy=pd.DataFrame(data, index=['Central Toronto', 'Downtown Toronto','East Toronto','East York','Etobicoke','Mississauga','North York',
                                  'Scarborough','West Toronto','York','All Boroughs'],
                      columns=['Train Accuracy', 'Test Accuracy'])
Accuracy=Accuracy.round(decimals=4)
Accuracy

Unnamed: 0,Train Accuracy,Test Accuracy
Central Toronto,0.4747,0.4695
Downtown Toronto,0.6436,0.6703
East Toronto,0.4393,0.4731
East York,0.5929,0.5842
Etobicoke,0.6436,0.6703
Mississauga,0.5914,0.5842
North York,0.6436,0.6703
Scarborough,0.6436,0.6703
West Toronto,0.6436,0.6703
York,0.6436,0.6703
