# Segmenting and Clustering Neighborhoods in Toronto


## Part 1 :

### 1. Creating notebook and importing libraries

In [30]:
import pandas as pd
import numpy as np
import pandas_profiling as pp
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium

### 2. Collecting the datas <br/>
### 2.a) "scraping" with pandas <br/>
The datas to collect are from this page:
https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M


In [3]:
toronto_from_html = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")[0]
toronto_from_html.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


### Exploring data with pandas_profiling

In [None]:
profile = pp.ProfileReport(toronto_from_html)
profile

### 2.b) Ignoring "Not assigned" Borough

In [22]:
toronto_from_html['Borough'].replace('Not assigned', np.nan, inplace=True)
toronto_from_html.dropna(subset=["Borough"], inplace=True)
toronto_from_html.head()

Unnamed: 0,Postal code,Neighborhood,Borough
0,M1B,Malvern / Rouge,Scarborough
1,M1C,Rouge Hill / Port Union / Highland Creek,Scarborough
2,M1E,Guildwood / Morningside / West Hill,Scarborough
3,M1G,Woburn,Scarborough
4,M1H,Cedarbrae,Scarborough


### 2.c) Combining neighborhood with the same postal code

In [15]:
toronto_from_html = toronto_from_html.groupby(['Postal code', 'Neighborhood'], as_index=False).agg(lambda x: ", ".join(x))
toronto_from_html.head()

Unnamed: 0,Postal code,Neighborhood,Borough
0,M1B,Malvern / Rouge,Scarborough
1,M1C,Rouge Hill / Port Union / Highland Creek,Scarborough
2,M1E,Guildwood / Morningside / West Hill,Scarborough
3,M1G,Woburn,Scarborough
4,M1H,Cedarbrae,Scarborough


### 2.d) Assigning Borough to "Not assigned" Neighborhood

In [16]:
for index, row in toronto_from_html.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]
        
toronto_from_html.head()

Unnamed: 0,Postal code,Neighborhood,Borough
0,M1B,Malvern / Rouge,Scarborough
1,M1C,Rouge Hill / Port Union / Highland Creek,Scarborough
2,M1E,Guildwood / Morningside / West Hill,Scarborough
3,M1G,Woburn,Scarborough
4,M1H,Cedarbrae,Scarborough


### 2.e) printing the cleaned dataset shape

In [26]:
toronto_from_html.shape

(103, 3)

## Part 2

### 1. Getting the neighborhood longitude and latitude

In [24]:
long_and_lat = pd.read_csv("Geospatial_Coordinates.csv")
long_and_lat.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### 2. Merging the two datasets into one

In [27]:
long_and_lat.rename(columns={"Postal Code": "Postal code"}, inplace=True)

In [29]:
toronto_final_data = pd.merge(toronto_from_html, long_and_lat, on="Postal code", how="inner")
toronto_final_data.head()

Unnamed: 0,Postal code,Neighborhood,Borough,Latitude,Longitude
0,M1B,Malvern / Rouge,Scarborough,43.806686,-79.194353
1,M1C,Rouge Hill / Port Union / Highland Creek,Scarborough,43.784535,-79.160497
2,M1E,Guildwood / Morningside / West Hill,Scarborough,43.763573,-79.188711
3,M1G,Woburn,Scarborough,43.770992,-79.216917
4,M1H,Cedarbrae,Scarborough,43.773136,-79.239476


### 3. Building a map

In [31]:
address = 'Toronto'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [32]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_final_data['Latitude'], toronto_final_data['Longitude'], toronto_final_data['Borough'], toronto_final_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto 