## Importing the table from wikipedia link as pandas dataframe

In [3]:
import pandas as pd
import requests
request = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
dfs = pd.read_html(request.content)
df = dfs[0]

## Checking the imported dataframe

In [4]:
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


## Deleting all rows with "Not assigned" boroughs

In [5]:
df = df[df.Borough!="Not assigned"]
df = df.reset_index(drop=True)
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


## Making "Not assigned" neighborhoods same as their boroughs

In [6]:
for index, row in df.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]

## Final view of the dataframe after formatting

In [7]:
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


## Getting the co-ordinates from CSV file

In [8]:
df_1 = pd.read_csv("Geospatial_Coordinates.csv")
df_1

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


## Sorting the original dataframe

In [9]:
df = df.sort_values('Postal Code')
df = df.reset_index(drop=True)

## Dropping the column "Postal Code" from Co-ordinates dataframe


In [10]:
df_1 = df_1.drop(columns = ["Postal Code"])
df_1

Unnamed: 0,Latitude,Longitude
0,43.806686,-79.194353
1,43.784535,-79.160497
2,43.763573,-79.188711
3,43.770992,-79.216917
4,43.773136,-79.239476
...,...,...
98,43.706876,-79.518188
99,43.696319,-79.532242
100,43.688905,-79.554724
101,43.739416,-79.588437


## Joining the two dataframes

In [11]:
df_2 = pd.concat([df,df_1],axis=1)
df_2

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


## Creating a map of all neighborhoods in the dataframe

In [12]:
import folium
map_toronto = folium.Map(location=[43.6532, -79.3832], zoom_start=10.5)

for lat, lng, borough, neighborhood in zip(df_2['Latitude'], df_2['Longitude'], df_2['Borough'], df_2['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## Importing other relevant and useful libraries for analysis

In [13]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json
!pip install geopy
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.8/bin/python3.8 -m pip install --upgrade pip' command.[0m


## Checking how many boroughs and neighborhoods we will work with

In [14]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_2['Borough'].unique()),
        df_2.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


## Establishing Foursquare API details

In [69]:
CLIENT_ID = '#'
CLIENT_SECRET = '#'
VERSION = '20180604'
LIMIT = 30

## Establishing search query

In [16]:
search_query = 'Indian Food'
radius = 40000
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, 43.6532, -79.3832, VERSION, search_query, radius, LIMIT)

## Creating clean pandas dataframe

In [65]:
results = requests.get(url).json()

In [66]:
venues = results['response']['venues']
dataframe = json_normalize(venues)
dataframe.head()

  dataframe = json_normalize(venues)


Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.lat,location.lng,location.labeledLatLngs,location.distance,location.cc,location.city,location.state,location.country,location.formattedAddress,location.crossStreet,location.postalCode,location.neighborhood
0,58a5c2b94f417a549f23f181,Leela Indian Food Bar,"[{'id': '4bf58dd8d48988d10f941735', 'name': 'I...",v-1592639852,False,3108 Dundas St. West,43.665326,-79.473306,"[{'label': 'display', 'lat': 43.66532602974136...",7381,CA,Toronto,ON,Canada,"[3108 Dundas St. West, Toronto ON, Canada]",,,
1,4f8ee746e4b0603e8911247a,Indian Food,"[{'id': '4bf58dd8d48988d10f941735', 'name': 'I...",v-1592639852,False,,43.837654,-79.503838,"[{'label': 'display', 'lat': 43.83765429927359...",22709,CA,,,Canada,[Canada],,,
2,4d9a4ddde0a1721e89eab5bb,Peacock Express Indian Food,"[{'id': '4bf58dd8d48988d10f941735', 'name': 'I...",v-1592639852,False,5423 Yonge St.,43.776057,-79.415169,"[{'label': 'display', 'lat': 43.776057, 'lng':...",13916,CA,Toronto,ON,Canada,"[5423 Yonge St., Toronto ON, Canada]",,,
3,5156f7aae4b0e3e0a5c7819f,Markham Danforth West Indian Food Market,"[{'id': '4bf58dd8d48988d118951735', 'name': 'G...",v-1592639852,False,,43.767399,-79.228172,"[{'label': 'display', 'lat': 43.76739883422851...",17810,CA,,,Canada,[Canada],,,
4,4b9d3663f964a520f79936e3,Charlie's West Indian Food Mart,"[{'id': '4bf58dd8d48988d144941735', 'name': 'C...",v-1592639852,False,3057 Hurontario Street,43.581498,-79.617599,"[{'label': 'display', 'lat': 43.58149761049892...",20507,CA,Mississauga,ON,Canada,[3057 Hurontario Street (btw Dundas St E & Kir...,btw Dundas St E & Kirwin Ave,L5A 2G9,


In [67]:
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered.head()

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,cc,city,state,country,formattedAddress,crossStreet,postalCode,neighborhood,id
0,Leela Indian Food Bar,Indian Restaurant,3108 Dundas St. West,43.665326,-79.473306,"[{'label': 'display', 'lat': 43.66532602974136...",7381,CA,Toronto,ON,Canada,"[3108 Dundas St. West, Toronto ON, Canada]",,,,58a5c2b94f417a549f23f181
1,Indian Food,Indian Restaurant,,43.837654,-79.503838,"[{'label': 'display', 'lat': 43.83765429927359...",22709,CA,,,Canada,[Canada],,,,4f8ee746e4b0603e8911247a
2,Peacock Express Indian Food,Indian Restaurant,5423 Yonge St.,43.776057,-79.415169,"[{'label': 'display', 'lat': 43.776057, 'lng':...",13916,CA,Toronto,ON,Canada,"[5423 Yonge St., Toronto ON, Canada]",,,,4d9a4ddde0a1721e89eab5bb
3,Markham Danforth West Indian Food Market,Grocery Store,,43.767399,-79.228172,"[{'label': 'display', 'lat': 43.76739883422851...",17810,CA,,,Canada,[Canada],,,,5156f7aae4b0e3e0a5c7819f
4,Charlie's West Indian Food Mart,Caribbean Restaurant,3057 Hurontario Street,43.581498,-79.617599,"[{'label': 'display', 'lat': 43.58149761049892...",20507,CA,Mississauga,ON,Canada,[3057 Hurontario Street (btw Dundas St E & Kir...,btw Dundas St E & Kirwin Ave,L5A 2G9,,4b9d3663f964a520f79936e3


In [68]:
dataframe_filtered = dataframe_filtered[dataframe_filtered['city'] == 'Toronto'].reset_index(drop=True)
dataframe_filtered.head()

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,cc,city,state,country,formattedAddress,crossStreet,postalCode,neighborhood,id
0,Leela Indian Food Bar,Indian Restaurant,3108 Dundas St. West,43.665326,-79.473306,"[{'label': 'display', 'lat': 43.66532602974136...",7381,CA,Toronto,ON,Canada,"[3108 Dundas St. West, Toronto ON, Canada]",,,,58a5c2b94f417a549f23f181
1,Peacock Express Indian Food,Indian Restaurant,5423 Yonge St.,43.776057,-79.415169,"[{'label': 'display', 'lat': 43.776057, 'lng':...",13916,CA,Toronto,ON,Canada,"[5423 Yonge St., Toronto ON, Canada]",,,,4d9a4ddde0a1721e89eab5bb
2,Rubini West Indian Food Market,Grocery Store,31 Tapscott Rd. Unit B2,43.807465,-79.22121,"[{'label': 'display', 'lat': 43.80746450749075...",21556,CA,Toronto,ON,Canada,"[31 Tapscott Rd. Unit B2, Toronto ON, Canada]",,,,4e35b8a8ae60d86c3ac2e8d4
3,Everest Indian Foods,Grocery Store,,43.640083,-79.473095,"[{'label': 'display', 'lat': 43.640083, 'lng':...",7386,CA,Toronto,ON,Canada,"[Toronto ON, Canada]",,,,4daf7eccf7b149e03f35f3d1
4,Indian Flavour,Indian Restaurant,123 Dundas St W,43.655649,-79.384119,"[{'label': 'display', 'lat': 43.65564910619165...",282,CA,Toronto,ON,Canada,"[123 Dundas St W (btw Elizabeth & Bay), Toront...",btw Elizabeth & Bay,,,4b2a634af964a52020a824e3


## Displaying all restaurants on a map

In [25]:
restaurants_map = folium.Map(location=[43.6532, -79.3832], zoom_start=11.4)

for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='yellow',
        popup=label,
        fill = True,
        fill_color='red',
        fill_opacity = 1.0
    ).add_to(restaurants_map)

restaurants_map

We can clearly see that all Indian restaurants shown on the map above are part of the follwoing neighbourhoods:
1. North York
2. West Toronto
3. Downtown Toronto

Therefore, we will be exploring these neighbourhoods in detail

## Clustering the restaurants by nieghbourhood

In [27]:
from sklearn.preprocessing import StandardScaler
X = dataframe_filtered.values[:,3:5]
X = np.nan_to_num(X)
Clus_dataSet = StandardScaler().fit_transform(X)
Clus_dataSet

array([[-0.0629892 , -1.82517762],
       [ 2.51050099, -0.61966236],
       [ 3.24044052,  3.40222034],
       [-0.64966056, -1.8208036 ],
       [-0.28788986,  0.02417599],
       [-0.41801174,  0.09585832],
       [-0.2477082 ,  0.08220869],
       [-0.33955155,  0.21823326],
       [-0.30018754, -0.02818875],
       [-0.24912031, -0.36723333],
       [-0.22170217, -0.01554592],
       [-0.38272342,  0.0459523 ],
       [-0.44256845,  0.28596409],
       [-0.36935177,  0.08641195],
       [-0.27581299,  0.04009733],
       [-0.27387518,  0.0909453 ],
       [-0.38621627,  0.30078765],
       [-0.43749987, -0.03382279],
       [-0.40607243,  0.03757917]])

In [28]:
clusterNum = 3
k_means = KMeans(init = "k-means++", n_clusters = clusterNum, n_init = 12)
k_means.fit(X)
labels = k_means.labels_
print(labels)

[2 2 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [29]:
dataframe_filtered["Clus_km"] = labels
dataframe_filtered.head(5)

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,cc,city,state,country,formattedAddress,crossStreet,postalCode,neighborhood,id,Clus_km
0,Leela Indian Food Bar,Indian Restaurant,3108 Dundas St. West,43.665326,-79.473306,"[{'label': 'display', 'lat': 43.66532602974136...",7381,CA,Toronto,ON,Canada,"[3108 Dundas St. West, Toronto ON, Canada]",,,,58a5c2b94f417a549f23f181,2
1,Peacock Express Indian Food,Indian Restaurant,5423 Yonge St.,43.776057,-79.415169,"[{'label': 'display', 'lat': 43.776057, 'lng':...",13916,CA,Toronto,ON,Canada,"[5423 Yonge St., Toronto ON, Canada]",,,,4d9a4ddde0a1721e89eab5bb,2
2,Rubini West Indian Food Market,Grocery Store,31 Tapscott Rd. Unit B2,43.807465,-79.22121,"[{'label': 'display', 'lat': 43.80746450749075...",21556,CA,Toronto,ON,Canada,"[31 Tapscott Rd. Unit B2, Toronto ON, Canada]",,,,4e35b8a8ae60d86c3ac2e8d4,1
3,Everest Indian Foods,Grocery Store,,43.640083,-79.473095,"[{'label': 'display', 'lat': 43.640083, 'lng':...",7386,CA,Toronto,ON,Canada,"[Toronto ON, Canada]",,,,4daf7eccf7b149e03f35f3d1,2
4,Indian Flavour,Indian Restaurant,123 Dundas St W,43.655649,-79.384119,"[{'label': 'display', 'lat': 43.65564910619165...",282,CA,Toronto,ON,Canada,"[123 Dundas St W (btw Elizabeth & Bay), Toront...",btw Elizabeth & Bay,,,4b2a634af964a52020a824e3,0


## Displaying the map with the restaurants with their cluster numbers as popups

In [35]:
restaurants_map = folium.Map(location=[43.6532, -79.3832], zoom_start=11.4)

for lat, lng, cluster in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.Clus_km):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='yellow',
        popup=cluster,
        fill = True,
        fill_color='red',
        fill_opacity = 1.0
    ).add_to(restaurants_map)

restaurants_map

When clicked upon the above map, you can see teh cluster number that they belong to:

1. Cluster 0: Dowtown Toronto
2. Cluster 1: North York
3. Cluster 2: West Toronto

## Finding number of restaurants in each cluster

In [44]:
dataframe_filtered.groupby("Clus_km")["Clus_km"].count()

Clus_km
0    15
1     1
2     3
Name: Clus_km, dtype: int64

## Findings

The best option would be to locate in Cluster 2 (West Toronto) where there are quite some Indian restaurants but not as many as Cluster 0 (Toronto) therefore there would not be a lot of competition and the restaurant owner would be able to get many customers - a goal set out in the beginning itself. Furthermore, since there are 3 Indian Restaurants in Cluster 2 (West Toronto), the restaurant owner could predict that the people in West Toronto like Indian food and would in fact visit his restaurant.