# Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto

### Pre-processing

In [2]:
# importing libraries
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

In [3]:
# getting data from internet
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_page= requests.get(url).text

# using beautiful soup to parse the HTML/XML codes.
soup = BeautifulSoup(raw_wikipedia_page,'xml')
#print(soup.prettify())

### Processing-part-1: extracting raw table (from webpage)

In [4]:
# extracting the raw table inside that webpage
table = soup.find('table')

Postcode      = []
Borough       = []
Neighbourhood = []

# print(table)

# extracting a clean form of the table
for tr_cell in table.find_all('tr'):
    
    counter = 1
    Postcode_var      = -1
    Borough_var       = -1
    Neighbourhood_var = -1
    
    for td_cell in tr_cell.find_all('td'):
        if counter == 1: 
            Postcode_var = td_cell.text
        if counter == 2: 
            Borough_var = td_cell.text
            tag_a_Borough = td_cell.find('a')
            
        if counter == 3: 
            Neighbourhood_var = str(td_cell.text).strip()
            tag_a_Neighbourhood = td_cell.find('a')
            
        counter +=1
        
    if (Postcode_var == 'Not assigned' or Borough_var == 'Not assigned' or Neighbourhood_var == 'Not assigned'): 
        continue
    try:
        if ((tag_a_Borough is None) or (tag_a_Neighbourhood is None)):
            continue
    except:
        pass
    if(Postcode_var == -1 or Borough_var == -1 or Neighbourhood_var == -1):
        continue
        
    Postcode.append(Postcode_var)
    Borough.append(Borough_var)
    Neighbourhood.append(Neighbourhood_var)
    

### Processing-part-2: integrating Postal codes with more than 1 neighbour

In [5]:

unique_p = set(Postcode)
print('num of unique Postal codes:', len(unique_p))
Postcode_u      = []
Borough_u       = []
Neighbourhood_u = []


for postcode_unique_element in unique_p:
    p_var = ''; b_var = ''; n_var = ''; 
    for postcode_idx, postcode_element in enumerate(Postcode):
        if postcode_unique_element == postcode_element:
            p_var = postcode_element;
            b_var = Borough[postcode_idx]
            if n_var == '': 
                n_var = Neighbourhood[postcode_idx]
            else:
                n_var = n_var + ', ' + Neighbourhood[postcode_idx]
    Postcode_u.append(p_var)
    Borough_u.append(b_var)
    Neighbourhood_u.append(n_var)

    

num of unique Postal codes: 84


### Post-processing: creating an appropriate Pandas Dataframe

In [6]:
toronto_dict = {'Postcode':Postcode_u, 'Borough':Borough_u, 'Neighbourhood':Neighbourhood_u}
df_toronto = pd.DataFrame.from_dict(toronto_dict)
df_toronto.to_csv('toronto_part1.csv')
df_toronto.head(14)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1X,Scarborough,Upper Rouge
1,M4G,East York,Leaside
2,M6B,North York,Glencairn
3,M4J,East York,East Toronto
4,M2K,North York,Bayview Village
5,M4L,East Toronto,India Bazaar
6,M5P,Central Toronto,Forest Hill North
7,M1S,Scarborough,Agincourt
8,M8W,Etobicoke,"Alderwood, Long Branch"
9,M2M,North York,"Newtonbrook, Willowdale"


In [7]:
df_toronto.shape

(84, 3)

In [8]:
df_location=pd.read_csv(r'C:\Users\Thor\Downloads\Geospatial_Coordinates.csv')
df_location.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [19]:
x=pd.merge(df_toronto,df_location,left_on='Postcode',right_on='Postal Code')



In [22]:
x.drop(['Postal Code'], axis=1)


KeyError: "['Postal Code'] not found in axis"

In [23]:
y.head(5)
y.to_csv('Toronto.csv')

In [24]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests

# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [30]:
df_toronto = pd.read_csv('Toronto.csv')
df_toronto.head(25)

Unnamed: 0.1,Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,0,M1X,Scarborough,Upper Rouge,43.836125,-79.205636
1,1,M4G,East York,Leaside,43.70906,-79.363452
2,2,M6B,North York,Glencairn,43.709577,-79.445073
3,3,M4J,East York,East Toronto,43.685347,-79.338106
4,4,M2K,North York,Bayview Village,43.786947,-79.385975
5,5,M4L,East Toronto,India Bazaar,43.668999,-79.315572
6,6,M5P,Central Toronto,Forest Hill North,43.696948,-79.411307
7,7,M1S,Scarborough,Agincourt,43.7942,-79.262029
8,8,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484
9,9,M2M,North York,"Newtonbrook, Willowdale",43.789053,-79.408493


In [26]:
# for the city Toronto, latitude and longtitude are manually extracted via google search
toronto_latitude = 43.6532; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

In [28]:
Client_ID='KHMHG35U5UAHUMZ5P4BUQAQFVZB1HCIUEJUIVYPPZVE0VNCQ'
Client_Secret='TNUM2AR00R2FRYASG113SJCI1TAS3RGMIUOMXXC2GE4NZYJ1'
VERSION = '20180605'

In [77]:
scarborough_data = df_toronto[df_toronto['Borough'] =='Downtown Toronto'].reset_index(drop=True)
scarborough_data.head(7)

Unnamed: 0.1,Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,11,M5T,Downtown Toronto,"Chinatown, Grange Park, Kensington Market",43.653206,-79.400049
1,15,M5H,Downtown Toronto,"Adelaide, King",43.650571,-79.384568
2,24,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675
3,29,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
4,33,M5L,Downtown Toronto,Commerce Court,43.648198,-79.379817
5,37,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
6,45,M5S,Downtown Toronto,University of Toronto,43.662696,-79.400049


In [78]:
address_scar = 'Downtown Toronto'
latitude_scar = 43.773077
longitude_scar = -79.257774
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude_scar, longitude_scar))

The geograpical coordinate of Toronto are 43.773077, -79.257774.


In [79]:
map_scarb = folium.Map(location=[latitude_scar, longitude_scar], zoom_start=12)

# add markers to map
for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longitude'], scarborough_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_scarb)  
    
map_scarb