### Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto Part 1

In [3]:
#install Beautiful Soup and requests for Web Scaping
!pip install BeautifulSoup4
!pip install requests



In [4]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [5]:
# getting data from internet
source='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df_Canada=pd.read_html(source)
# Call the position where the table is stored
neighborhood=df_Canada[0]
# Eliminate the first row
neighborhood=neighborhood.drop([0])
# Eliminate "Not assigned", categorical values from "Borough" Column
neighborhood=neighborhood[neighborhood.Borough !='Not assigned']
# Making DataFrame
neighborhood=pd.DataFrame(neighborhood)
# Merging rows with same Postcode
neighborhood.set_index(['Postal Code','Borough'],inplace=True)
merge_result = neighborhood.groupby(level=['Postal Code','Borough'], sort=False).agg( ','.join)
# Setting the index
serial_wise=merge_result.reset_index()
serial_wise.loc[4, 'Neighborhood']='Queen\'s Park'
serial_wise.to_excel('Canada_table.xls')
df=pd.DataFrame(serial_wise)
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [6]:
df.shape

(103, 3)

###  Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto Part 2

In [7]:
!pip3 install geocoder



In [8]:
df_geo_coor = pd.read_csv("http://cocl.us/Geospatial_data")
df_geo_coor.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [9]:
df_toronto = pd.merge(df, df_geo_coor, how='left', left_on = 'Postal Code', right_on = 'Postal Code')
df_toronto.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


### Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto Part 3

### The map of Toronto 

In [10]:
import folium 

In [11]:
address = "Toronto, ON"

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto city are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto city are 43.6534817, -79.3839347.


In [12]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=9)
map_toronto

In [13]:
latitude = 43.6534817
longitude = -79.3839347

### the Borough that contain the word "Toronto

In [29]:
neighborhoods = pd.merge(df, df_geo_coor, how='left', left_on = 'Postal Code', right_on = 'Postal Code')
neighborhoods.head(15)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [30]:
toronto_data= neighborhoods[neighborhoods['Borough'].str.contains('Toronto', na = False)].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [31]:
toronto_data.shape

(39, 5)

### Visualizing all the Neighbourhoods of the above data frame using Folium

In [32]:
df3=toronto_data

In [35]:
map_toronto = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

for lat,lng,borough,neighborhood in zip(df3['Latitude'],df3['Longitude'],df3['Borough'],df3['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=6,
    popup=label,
    color='Purple',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.8,
    parse_html=False).add_to(map_toronto)
map_toronto