## QUESTION 1

In [168]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [169]:
# read the table as pandas dataframe by BeautifulSoup library

res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))[0]

contents = df.to_numpy() # convert the pd.dataframe into numpy array
contents = contents.reshape((contents.shape[0]*contents.shape[1], 1)) # reshape the array as a column vector

In [170]:
# create lists that will contain Postalcode, Borough, and Neighborhood information. Later these lists will be added to the actual dataframe.
code = []
borough = []
neighborhood = []

for content in contents:
    if 'assigned' not in np.array_str(content): # ignore the content that contains 'assigned' word
        code.append(content[0][0:3])
        
        borough.append(content[0][3:].split('(')[0])
        borough[-1] = borough[-1].replace(' / ', ', ')
        
        if len(content[0][3:].split('(')) == 1:
            neighborhood.append(content[0][3:].split('(')[0])
        else:
            neighborhood.append(content[0][3:].split('(')[1].split(')')[0])
        neighborhood[-1] = neighborhood[-1].replace(' / ', ', ')

In [171]:
df = pd.DataFrame(columns = ['PostalCode', 'Borough', 'Neighborhood']) # create an empty dataframe

df.loc[:, 'PostalCode'] = code
df.loc[:, 'Borough'] = borough
df.loc[:, 'Neighborhood'] = neighborhood

df.sort_values(by=['PostalCode'], inplace=True)
df = df.reset_index(drop=True)
print(df.head())

  PostalCode      Borough                            Neighborhood
0        M1B  Scarborough                          Malvern, Rouge
1        M1C  Scarborough  Rouge Hill, Port Union, Highland Creek
2        M1E  Scarborough       Guildwood, Morningside, West Hill
3        M1G  Scarborough                                  Woburn
4        M1H  Scarborough                               Cedarbrae


In [172]:
df.shape

(103, 3)

## QUESTION 2

In [173]:
# Read the latitude and Longitude values from the provided URL
df_ll = pd.read_csv('http://cocl.us/Geospatial_data')

df_ll.sort_values('Postal Code', inplace=True)

df = pd.concat([df,df_ll['Latitude'], df_ll['Longitude']], axis=1)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## QUESTION 3

In [174]:
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [175]:
# create map of New York using latitude and longitude values

borough_array = df['Borough'].unique()
kclusters = len(borough_array)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

latitude, longitude = 43.6532, -79.3832
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    borough_number = np.where(borough_array == borough)[0][0]
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[borough_number-1],
        fill=True,
        fill_color=rainbow[borough_number-1],
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# Unfortunately, the folium map can not be displayed on github. But I can summarize what the map looks like. I marked every post code (103 in total) on the map. They are clustered and colored by their boroughs. The boroughs seems to have borders as they have in real life toronto map. So, the folium map is accurate in that sense.