I'm reading the list of postal code of canada from wikipedia, and other list of latitude/longitude of each postal code from other site.
And merging them and finding something to check from data. 

In [1]:
# !pip3 install geocoder
import pandas as pd
import numpy as np

# call the postal codes list and check it
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M", header=0, encoding='utf-8', match="Postal Code")[0]
# df
df[df["Borough"]=="Not assigned"]

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
7,M8A,Not assigned,Not assigned
10,M2B,Not assigned,Not assigned
15,M7B,Not assigned,Not assigned
...,...,...,...
174,M4Z,Not assigned,Not assigned
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned


In [2]:
# cleaning for the postal codes list
df.drop(df[df["Borough"]=="Not assigned"].index, inplace=True)
df.reset_index(drop=True, inplace=True)
df.rename(columns = {"Postal Code":"PostalCode", "Neighbourhood":"Neighborhood"}, inplace = True)
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [3]:
df.shape

(103, 3)

In [4]:
# call and cleaning for the postal codes list
df_latlng = pd.read_csv("http://cocl.us/Geospatial_data")
df_latlng.rename(columns = {"Postal Code":"PostalCode"}, inplace = True)
df_latlng

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [5]:
# merging these 2 lists
df = pd.merge(df, df_latlng, on = "PostalCode")
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [6]:
# check it to find something to note
# I want to try creating map of toronto using latitude and longitude values, so let's start from install & importing the folium package.

!pip3 install folium
import folium



In [7]:
map_toronto = folium.Map(location=[43.753259, -79.329656], zoom_start=10)   # just start from df[0]

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

We can see there are so many postal code point around the 'Toronto Union Station", so let's do some clustering by location information of points.

In [8]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import matplotlib.colors as colors
df_cluster = df[["Latitude", "Longitude"]]
df_cluster

Unnamed: 0,Latitude,Longitude
0,43.753259,-79.329656
1,43.725882,-79.315572
2,43.654260,-79.360636
3,43.718518,-79.464763
4,43.662301,-79.389494
...,...,...
98,43.653654,-79.506944
99,43.665860,-79.383160
100,43.662744,-79.321558
101,43.636258,-79.498509


In [9]:
kmeans = KMeans(n_clusters=6, random_state=0).fit(df_cluster)
df.insert(0, 'Cluster Labels', kmeans.labels_)

# set color scheme for the clusters
x = np.arange(6)
ys = [i + x + (i*x)**2 for i in range(6)]
colors_array = plt.cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


map_clusters = folium.Map(location=[43.753259, -79.329656], zoom_start=10)   # just start from df[0]

# add markers to map
for lat, lng, borough, neighborhood, cluster in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood'], df['Cluster Labels']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7,
        parse_html=False).add_to(map_clusters)  
    
map_clusters