# *Segmenting and Clustering Neighborhood in the city of Toronto, Canada*

## First Part

#### Necessary libraries

In [1]:
import pandas as pd

In [2]:
import requests

#### Obtaining Data from page

In [3]:
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wiki_url=requests.get(url)
wiki_url

<Response [200]>

#### Creating pandas DataFrame

In [7]:
wiki_data=pd.read_html(wiki_url.text)
wiki_data=wiki_data[0]
wiki_data

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [14]:
# Getting rid of rows that do not have an assigned Borough
wiki_data=wiki_data[wiki_data["Borough"]!="Not assigned"]

In [15]:
wiki_data

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [24]:
# Checking if there are repeated postal codes
len(wiki_data["Postal Code"].unique())==len(wiki_data)

True

In [25]:
wiki_data.shape

(103, 3)

## Second Part

#### Getting coordinates

In [27]:
import geocoder

In [33]:
# Suggested use of geocoder do not work or takes too long, therefore I am using the csv provided in the assigment

In [35]:
coordinates = pd.read_csv("https://cocl.us/Geospatial_data")
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [50]:
# Checking if both csv have the same Postal Codes and rows
coordinates["Postal Code"].sort_values().to_list()==wiki_data["Postal Code"].sort_values().to_list()

True

In [51]:
#It appears to have the same data

In [63]:
joined_dataframe=wiki_data.join(coordinates.set_index("Postal Code"), on="Postal Code", how="inner")

In [64]:
joined_dataframe=joined_dataframe.sort_values(by="Postal Code").reset_index(drop=True)
joined_dataframe

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


## Part three

In [67]:
from geopy.geocoders import Nominatim
import folium 

In [69]:
# Following the same method as the New York lab to get coordinates from Ontario
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The coordinates of Toronto are 43.6534817, -79.3839347.


#### Creating map of toronto

In [73]:
# Creating the map of Toronto
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# adding markers to map
for latitude, longitude, borough, neighbourhood in zip(joined_dataframe['Latitude'], joined_dataframe['Longitude'], joined_dataframe['Borough'], joined_dataframe['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='red',
        fill=True
        ).add_to(map_Toronto)  
    
map_Toronto

#### Connecting to the Foursquare API, the content of the cell is hidden for security reasons