# Part 1

### Import The Required Libraries

In [1]:
import requests
import lxml.html as lh
import pandas as pd
import numpy as np
from IPython.display import Image 
from IPython.core.display import HTML 
import matplotlib as mp # library for visualization
import matplotlib.cm as cm
import matplotlib.colors as colors
from geopy.geocoders import Nominatim 
from sklearn.cluster import KMeans 
import folium 

### Scrape The Website

In [2]:
#assign a handle for the url
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

#Create a handle, page, to handle the contents of the website
page = pd.read_html(url)
  

### Verification of the Object Type...do this or you are gonna spend hours later troubleshooting :)

In [3]:
#Make sure this is a list
type(page)

list

In [4]:
len(page)

3

In [5]:
#Review the data
page[0]

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [6]:
toronto_df = pd.DataFrame(page[0])

toronto_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [7]:
#reset the index of the df
toronto_df.reset_index(drop=True, inplace=True)

toronto_df.shape

(289, 3)

### Remove the not assigned and combine rows

In [8]:
#remove the rows if there is a "Not assigned"
dropped_df=toronto_df[toronto_df.Borough != 'Not assigned']

dropped_df.shape

(212, 3)

In [9]:
# Groupby to merge
toronto_final = dropped_df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
toronto_final.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Replace the Neighbourhood "Not Assigned" with the Borough Name.

In [10]:
#call the neighbourhood column first then use Numpy.where to query that column and replace with Borough contents
toronto_final['Neighbourhood'] = np.where(toronto_final['Neighbourhood'] == 'Not assigned', toronto_final['Borough'], 
                                          toronto_final['Neighbourhood'])
toronto_final.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Call the Final shape

In [11]:
toronto_final.shape

(103, 3)

## Part 2
### I think we should merge these two files along the Postal Code and postcode axis

### Download the Geo Coord File

In [12]:
Geo_coord=pd.read_csv("Geospatial_Coordinates.csv")

In [13]:
#lets look at our loot
Geo_coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Rename the final dataframe from Part 1

In [14]:
toronto_part2 = toronto_final
toronto_part2.reset_index(drop=True, inplace=True)
print(toronto_part2.shape)

toronto_part2.head()

(103, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [15]:
toronto_part2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [16]:
#standardize the column names first
Geo_coord.rename(columns={'Postal Code':'PostalCode'}, inplace=True)
toronto_part2.rename(columns={'Postcode':'PostalCode'}, inplace=True)

### Merge the two files together

In [17]:
df_final_toronto = pd.merge(toronto_part2,Geo_coord, how='inner', left_on='PostalCode', right_on='PostalCode')
df_final_toronto

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Part 3 

### Get an idea of what you are working with before superimposing on a map

In [20]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_final_toronto['Borough'].unique()),
       df_final_toronto.shape[0]))

The dataframe has 11 boroughs and 103 neighborhoods.


In [None]:
### Filter out the Locations in Toronto

In [34]:
#use the function "contains" versus ==
Toronto_data = df_final_toronto[df_final_toronto['Borough'].str.contains('Toronto')].reset_index(drop=True)
Toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [35]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [36]:
# create map of Manhattan using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

# Part 3 Complete