*In this notebook, I attempted to explore and cluster neighborhoods in Toronto, using postal code data available on Wikipedia. I use Foursquare API calls to conduct this task.*

# **Importing Libraries:**


In [0]:
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium 
import requests
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim

# **Extracting the data:**

In [0]:
req=requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup=BeautifulSoup(req.content,'lxml')
table=soup.find_all('table')[0]
df=pd.read_html(str(table))
neighborhood=pd.DataFrame(df[0])

In [38]:
neighborhood.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


# **Processing the cells**

In [39]:
nb = neighborhood[neighborhood.Borough != 'Not assigned'].reset_index(drop=True)
nb = nb.groupby(["Postal code", "Borough"], as_index=False).agg(lambda x: ", ".join(x))

for index, row in nb.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]
nb.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


# **Shape of Dataframe**

In [0]:
nb.shape

(103, 3)

# **Getting the Coordinates**

In [40]:

coordinates = pd.read_csv('https://cocl.us/Geospatial_data')
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [41]:
coordinates.rename(columns={"Postal Code":"Postal code"},inplace=True)
coordinates

Unnamed: 0,Postal code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [43]:
nb = nb.merge(coordinates, on="Postal code", how="left")
nb.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [48]:
address = 'Toronto'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(nb['Latitude'], nb['Longitude'], nb['Borough'], nb['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [53]:
boroughnames = list(nb.Borough.unique())

nb_toronto = []

for x in boroughnames:
    if "toronto" in x.lower():
        nb_toronto.append(x)
        
nb_toronto

['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']

In [55]:
nb = nb[nb['Borough'].isin(nb_toronto)].reset_index(drop=True)
print(nb.shape)
nb.head()

(39, 5)


Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,The Danforth West / Riverdale,43.679557,-79.352188
2,M4L,East Toronto,India Bazaar / The Beaches West,43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [57]:
address = 'Toronto'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(nb['Latitude'], nb['Longitude'], nb['Borough'], nb['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto


The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [59]:
CLIENT_ID = 'Q2Z321Z4DNADEJQRR0OUR3N00WCXAOFKHMK4TWXXX1UUNG0T' 
CLIENT_SECRET = '30XWKEDSORESQGJIOIZI1JNN45YWK4IN0FTK1BMZLYTJMQIF'
VERSION = '20180605'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: Q2Z321Z4DNADEJQRR0OUR3N00WCXAOFKHMK4TWXXX1UUNG0T
CLIENT_SECRET:30XWKEDSORESQGJIOIZI1JNN45YWK4IN0FTK1BMZLYTJMQIF
