# Coursera Capstone project by Asset Alkhanov

## Part 1

Importing all necessary libraries and also Wikipedia API

In [23]:
import pandas as pd
import numpy as np
import geocoder # import geocoder
import wikipedia
from wikitables import import_tables
import folium
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
#print(wikipedia.search("List of postal codes of Canada: M"))

In [3]:
#print(wikipedia.summary("List of postal codes of Canada: M"))

Below table fetched as a JSON file

In [4]:
tables = import_tables('List of postal codes of Canada: M')
"""
print(tables[0].name)
for row in tables[0].rows:
    print('{Postal code}: {Borough}: {Neighborhood}'.format(**row))
"""
tables_json = tables[0].json()
#print(tables_json)

Filtering necessary data from the table and showing the number of rows and columns in the table

In [5]:
df = pd.read_json( tables_json )
#df2 = pd.DataFrame(df, columns=['Postal code', 'Borough', 'Neighborhood'])
filt = (df['Borough'] != 'Not assigned')
df['Borough'].where(filt, inplace=True)
df.dropna(axis='index', inplace=True)
df.reset_index(drop=True,inplace = True)
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business reply mail Processing Centre
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


Showing shape of the dataframe

In [6]:
df.shape

(103, 3)

## Part 2

Reading CSV file with coordinates and postal codes

In [7]:
coord_df = pd.read_csv('https://cocl.us/Geospatial_data')
coord_df

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


Merging two dataframes into one result dataframe

In [8]:
result = pd.merge(df, coord_df, on=['Postal Code'])
result

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,Business reply mail Processing Centre,43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


## Part 3

Fetching the rows which contains "Toronto" in their Borough name

In [9]:
res = result[result['Borough'].str.contains('Toronto',regex=False)]
res

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


Visualizing the Neighbourhoods using Folium

In [11]:
tor_map = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

for lat,lng,bor,neigh in zip(res['Latitude'],res['Longitude'],res['Borough'],res['Neighborhood']):
    label = '{}, {}'.format(neigh, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(tor_map)
tor_map

KMeans clustering to cluster the the neighborhoods

In [19]:
# number of clusters is 5
k=5
# declaring tor_cluster variable, that stores Toronto clusters
tor_cluster = res.drop(['Postal Code','Borough','Neighborhood'],1)
kmeans = KMeans(n_clusters = k,random_state=0).fit(tor_cluster)
kmeans.labels_
res.insert(0, 'Labels of the Clusters', kmeans.labels_)
res

Unnamed: 0,Labels of the Clusters,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,0,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,0,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,0,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,1,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,0,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,3,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,0,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
31,2,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [26]:
# map creation with folium
clust_map = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

# assigning color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
color_arr = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in color_arr]

for lat, lon, neigh, clust in zip(res['Latitude'], res['Longitude'], res['Neighborhood'], res['Labels of the Clusters']):
    label = folium.Popup(' Cluster ' + str(clust), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[clust-1],
        fill=True,
        fill_color=rainbow[clust-1],
        fill_opacity=0.7).add_to(clust_map)
       
clust_map