# Capstone project. Segmenting and Clustering Neighborhoods in Toronto

In [52]:
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim 
import json 
import requests 
from pandas.io.json import json_normalize 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.4.1               |             py_0          26 KB  conda-forge
    ca-certificates-2020.4.5.2 |       hecda079_0         147 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    certifi-2020.4.5.2         |   py36h9f0ad1d_0         152 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                       

### Create a dataframe from Wiki

In [119]:
link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
Toronto_neigh_df = pd.read_html(link)[0]
Toronto_neigh_df = Toronto_neigh_df[Toronto_neigh_df['Borough']!= 'Not assigned']

### Create a function to check if Neighborhood is not 'Not assigned'

In [121]:
def check_neigh(data):
    if data['Neighborhood'] == 'Not assigned':
        data ['Neighborhood'] =  data['Borough']
    else:
        data ['Neighborhood'] =  data['Neighborhood']                                  
    return data

In [126]:
Toronto_neigh_df = Toronto_neigh_df.apply(check_neigh, axis=1)
Toronto_neigh_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [125]:
Toronto_postal_df.shape

(103, 3)

### Create a dataframe with coordinates

In [127]:
!wget -q -O 'Geospatial_Coordinates.csv' http://cocl.us/Geospatial_data
print('Data downloaded!')

Data downloaded!


In [128]:
coord_df = pd.read_csv('Geospatial_Coordinates.csv')
coord_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Create a function that marge boroughs and coordinates dataframes

In [129]:
def apply_coord(data, data_coord):
    coord = data_coord.reset_index()
    data = data.reset_index()
    return pd.merge(data, coord, how='left', left_on='Postal Code', right_on='Postal Code')
     
Toronto_neigh_df = apply_coord(Toronto_neigh_df, coord_df)

In [132]:
Toronto_neigh_df.head()

Unnamed: 0,index_x,Postal Code,Borough,Neighborhood,index_y,Latitude,Longitude
0,2,M3A,North York,Parkwoods,25,43.753259,-79.329656
1,3,M4A,North York,Victoria Village,34,43.725882,-79.315572
2,4,M5A,Downtown Toronto,"Regent Park, Harbourfront",53,43.65426,-79.360636
3,5,M6A,North York,"Lawrence Manor, Lawrence Heights",71,43.718518,-79.464763
4,6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",85,43.662301,-79.389494


### Create a function that get coordinates for particular borough

In [99]:
def get_coord (data, borough):
    address = 'Toronto'
    geolocator = Nominatim(user_agent="ny_explorer")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    return [latitude, longitude]
    print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

### Create a function to create a map

In [133]:
# create map of New York using latitude and longitude values
def create_neigh_map (data, borough):
    map_borough = folium.Map(location=get_coord(data, borough), zoom_start=10)
    data_neigh  = data[data['Borough']==borough]

    # add markers to map
    for lat, lng, borough, neighborhood in zip(data_neigh['Latitude'], data_neigh['Longitude'], data_neigh['Borough'], data_neigh['Neighborhood']):
        label = '{}, {}'.format(neighborhood, borough)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_borough)
    map_borough
    return map_borough

### Get a unique borough 

In [134]:
Toronto_neigh_df['Borough'].unique()

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

### Create maps for each borough

In [135]:
create_neigh_map(Toronto_neigh_df, 'North York')

In [136]:
create_neigh_map(Toronto_neigh_df, 'Downtown Toronto')

In [137]:
create_neigh_map(Toronto_neigh_df, 'Etobicoke')

In [141]:
create_neigh_map(Toronto_neigh_df, 'Scarborough')

In [142]:
create_neigh_map(Toronto_neigh_df, 'East York')

In [143]:
create_neigh_map(Toronto_neigh_df, 'York')

In [144]:
create_neigh_map(Toronto_neigh_df, 'East Toronto')

In [145]:
create_neigh_map(Toronto_neigh_df, 'West Toronto')

In [146]:
create_neigh_map(Toronto_neigh_df, 'Central Toronto')

In [147]:
create_neigh_map(Toronto_neigh_df, 'Mississauga')