# IBM Data Science Capstone Project : Analysing Toronto, Canada Neighbourhood Data

#### Author : Ajay Rabidas

In [173]:
import numpy as np
import pandas as pd

In [174]:
#print('Hello Capstone Project Course!')

### Installing html parser libraries

In [175]:
#!conda install -c anaconda lxml
#!conda install -c anaconda BeautifulSoup

In [176]:
from bs4 import BeautifulSoup
import requests

In [177]:
columns=['PostalCode', 'Borough', 'Neighborhood']
neighborhoods=pd.DataFrame(columns=columns)

### scraping html from Canada wiki

In [178]:
source=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup=BeautifulSoup(source,'lxml')
table=soup.find('table', class_='wikitable')

In [179]:
#Extracting table from wiki html page
for row in table.tbody.find_all('tr'):
    tr=row.text
    data=tr.split('\n')[1:-1]
    neighborhoods = neighborhoods.append({'PostalCode':data[0],
                                           'Borough': data[1],
                                          'Neighborhood': data[2]}, ignore_index=True)

In [180]:
neighborhoods.drop(0, inplace=True)
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


In [181]:
neighborhoods.shape

(288, 3)

### Data cleaning
##### 1. Dropping rows with Not Assigned Borough
##### 2. Replacing Not Assigned Neighbourhood with adjacent Borough

In [None]:
neighDF= neighborhoods[neighborhoods['Borough']!='Not assigned']
neighDF['Neighborhood'].mask(neighDF['Neighborhood'] =='Not assigned', neighDF['Borough'], inplace=True)

In [184]:
neighDF[neighDF['Borough'] =="Queen's Park"].head()

Unnamed: 0,PostalCode,Borough,Neighborhood
9,M7A,Queen's Park,Queen's Park


### Grouping rows based on PostalCode

In [185]:
#print(neighDF.shape, neighborhoods.shape)
neighborDF = neighDF.groupby(['PostalCode','Borough'])['Neighborhood'].apply(lambda tags: ','.join(tags)).to_frame().reset_index()
neighborDF

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


### Adding Geospatial Data to Toronto dataframe

In [186]:
geographicData=pd.read_csv('http://cocl.us/Geospatial_data')
geographicData.rename(columns = {"Postal Code": "PostalCode"}, inplace=True)
geographicData.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merging Toronto neighborhood and geospatial data into a single consolidated dataframe

In [187]:
geoTorontoDF = pd.merge(neighborDF, geographicData, on='PostalCode', how='outer')
geoTorontoDF.head(20)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


## Exploring neighborhoods of Toronto on folium map

In [188]:
from geopy.geocoders import Nominatim
import folium

In [189]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Totonto, Canada are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Totonto, Canada are 43.653963, -79.387207.


### Create a map of Toronto with neighborhoods superimposed on top.

In [190]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(geoTorontoDF['Latitude'], geoTorontoDF['Longitude'], geoTorontoDF['Borough'], geoTorontoDF['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Map is not visible in github sometimes.
#### To view map, you can access the notebook at :
https://eu-gb.dataplatform.cloud.ibm.com/analytics/notebooks/v2/e47a975d-f54b-4ab6-8a8c-96ff6e1bec68/view?access_token=2486d1a86c3b1e1c29520cde7ac52f43d0229fb120a807baec03adc023362e78