# transform the data from website into dataframe

In [37]:
import pandas as pd

# collect data from website
df = pd.DataFrame()
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df = df.append(pd.read_html(url), ignore_index=True)

# delete extra lines
for i in range(0,18):
    del df[i]

# change the order of the colunms
order = ['Postal code', 'Borough', 'Neighborhood']
df = df[order]

#delete rows with a borough that is Not assigned
df = df[~df['Borough'].isin(['Not assigned'])]

#delete rows with NaN
df = df.dropna()

#adjust index
df.index = range(0,len(df))

#???combined several rows into one row with the neighborhoods separated with a comma???

df.shape

(103, 3)

# complete the dataframe

In [56]:
# read csv
csv = pd.read_csv('https://cocl.us/Geospatial_data')
df2 = pd.DataFrame(csv)

# merge two dataframes
df3 = pd.merge(df, df2, how='left', left_on='Postal code', right_on='Postal Code')

In [62]:
# delete extra lines
df3 = df3.drop('Postal Code', axis=1)
df3

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.654260,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,Parkview Hill / Woodbine Gardens,43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


# visualization

In [65]:
import numpy as np # library to handle data in a vectorized manner

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                       

In [71]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[43.75, -79.25], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto