# Explore clustered neighborhoods in Toronto

## Import necessary Libraries

In [2]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.

Folium installed
Libraries imported.


### Scrape the Wikipedia page and create the dataframe of canadian postal codes

In [3]:
src = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

# Read the source into a dataframe
get_cont = pd.read_html(src)

# Check how many objects it contains
# print(len(get_cont)) # 3

# Get the 1st object only
can_pc = get_cont[0]

# can_pc.shape # (288, 3)

# Rename columns so that they check out with what is asked in assignment
can_pc.rename(columns={"Postcode":"PostalCode","Neighbourhood":"Neighborhood"}, inplace=True)

# Check whether it matches the output in the assignment
can_pc.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


### Drop cells where "Borough" is "Not assigned"

In [4]:
# Get indexes for which column "Borough" is "Not assigned"
ix_nam_bor = can_pc[can_pc["Borough"]=="Not assigned"].index
# print(ix_nam_bor)

# Delete these row indexes from our dataframe
can_pc.drop(ix_nam_bor, inplace=True)

# Check how many records dropped
can_pc.shape

(211, 3)

### Copy "Borough" into "Neighborhood" if "Neighborhood" is "Not assigned"

In [5]:
ix_nam_neigh = can_pc[can_pc["Neighborhood"]=="Not assigned"].index
# print(ix_nam_neigh)

for ix in ix_nam_neigh:
    can_pc.loc[ix,"Neighborhood"] = can_pc.loc[ix,"Borough"]
    
can_pc.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


### Group "Neighborhoods" by "PostalCode"

In [6]:
# Group by "PostalCode"
can_pc_grp = can_pc.groupby(["PostalCode","Borough"])["Neighborhood"].apply(', '.join).reset_index()

can_pc_grp.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### Print the number of rows of the dataframe

In [7]:
can_pc_grp.shape[0]

103

### Read csv file that has the geographical coordinates of each postal code into dataframe

In [12]:
url = "https://cocl.us/Geospatial_data/Geospatial_Coordinates.csv"
geo_coord = pd.read_csv(url)

# Rename columns so that they check out with what is asked in assignment
geo_coord.rename(columns={"Postal Code":"PostalCode"}, inplace=True)

# Check whether it matches the output in the assignment
geo_coord.head(12)

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


### Merge dataframes

In [18]:
can_pc_grp_geo = pd.merge(can_pc_grp, geo_coord, on="PostalCode")

can_pc_grp_geo.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
