#Data Analysis of Neighbourhoods in Toronto

### Imports

In [30]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
!pip install pgeocode
import pgeocode
import geopy.geocoders
import folium



### Web Scraping

In [31]:
html_doc = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

soup = BeautifulSoup(html_doc.content, 'html.parser')
head_columns = ['PostalCode','Borough','Neighborhood']

df = pd.DataFrame(columns=head_columns)
canada_table = soup.find('table', attrs={'class':'wikitable sortable'})
canada_table_data = canada_table.tbody.find_all('tr')
del(canada_table_data[0])

lst = []
for tr in canada_table_data:
  lst_temp = tr.find_all('td')
  lst.append({'PostalCode':lst_temp[0].text.replace('\n',''),'Borough':lst_temp[1].text.replace('\n',''),'Neighborhood':lst_temp[2].text.replace('\n','')})
for data in lst:
  df = df.append(data,ignore_index=True)

df = df[df['Borough']!='Not assigned']
df.reset_index(inplace=True, drop=True)
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [32]:
len(df['PostalCode'].unique())

103

This means that all postal codes are unique

In [33]:
df[df['Neighborhood']=='Not assigned']

Unnamed: 0,PostalCode,Borough,Neighborhood


This means that all neighborhoods are assigned some value

In [34]:
df.shape

(103, 3)

### Adding coordinates ( Latitude & Longitude )

In [35]:
nomi = pgeocode.Nominatim('ca')
for index,postcode in enumerate(df['PostalCode']):
  code = nomi.query_postal_code(postcode)
  lat = code.latitude
  lon = code.longitude
  df.loc[index,'Latitude'] = lat
  df.loc[index,'Longitude'] = lon 
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7545,-79.3300
1,M4A,North York,Victoria Village,43.7276,-79.3148
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7223,-79.4504
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.6518,-79.5076
99,M4Y,Downtown Toronto,Church and Wellesley,43.6656,-79.3830
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.7804,-79.2505
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.6325,-79.4939


### Cleaning & Visualizing Data

In [0]:
df = df[(df['Borough']=='Downtown Toronto')|(df['Borough']=='East Toronto')|(df['Borough']=='West Toronto')|(df['Borough']=='Central Toronto')]

Now we only have data for those with Toronto in name

In [37]:
df.reset_index(drop=True,inplace=True)
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783
3,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756
4,M4E,East Toronto,The Beaches,43.6784,-79.2941
5,M5E,Downtown Toronto,Berczy Park,43.6456,-79.3754
6,M5G,Downtown Toronto,Central Bay Street,43.6564,-79.386
7,M6G,Downtown Toronto,Christie,43.6683,-79.4205
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.6496,-79.3833
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.6655,-79.4378


In [38]:
address = 'Toronto'
geolocator = geopy.geocoders.Nominatim(user_agent="ca_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [39]:
tor_map = folium.Map(location=[latitude,longitude],zoom_start=11)

for lat,lng,label in zip(df['Latitude'],df['Longitude'],df['Neighborhood']):
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker(
      [lat,lng],
      radius=5,
      popup=label,
      color='blue',
      fill=True,
      fill_color='#3186cc',
      fill_opacity=0.7,
      parse_html=False
  ).add_to(tor_map)

tor_map

### Cluster Analysis

In [41]:
CLIENT_ID = 'U34CFP3YWEGCMZQTDBFY1C3E5315WD53G0VZ5VC3ZEONGEPB' # your Foursquare ID
CLIENT_SECRET = 'DSQDR2MA5TIH4CCGC2TL23JA53RZQP2MOCS0RXWY2SEVDYVU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: U34CFP3YWEGCMZQTDBFY1C3E5315WD53G0VZ5VC3ZEONGEPB
CLIENT_SECRET:DSQDR2MA5TIH4CCGC2TL23JA53RZQP2MOCS0RXWY2SEVDYVU
