### All the parts of the assignment are implemented in this notebook

# Part 1

In [1]:
import numpy as np
import pandas as pd
import lxml
import folium

In [2]:
toronto_df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
toronto_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [3]:
#Select all rows where borough is not assigned
Not_assigned = toronto_df[toronto_df["Borough"]=="Not assigned"].index

#Delete them
toronto_df.drop(Not_assigned, inplace=True)
toronto_df.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [4]:
#If a neighbourhood is not assigned, we copy the value of borough to it
toronto_df.loc[toronto_df['Neighbourhood'] == 'Not assigned', ['Neighbourhood']] = toronto_df['Borough']
toronto_df.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [5]:
#group neighbourhoods having the same postal code and same borough
toronto_df = toronto_df.groupby(['Postal Code','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
toronto_df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [6]:
toronto_df.shape

(103, 3)

# Part 2

In [7]:
toronto_df.to_csv('toronto_df_1.csv',index=False)

In [8]:
#Reimport the dataframe and set index to be the postal code
toronto_df = pd.read_csv("toronto_df_1.csv").set_index("Postal Code")
toronto_df.rename_axis("Postal Code", axis='index', inplace=True)
toronto_df.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern, Rouge"
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [9]:
#Fetch geospatial data
toronto_geo = 'https://cocl.us/Geospatial_data'
geo_data = pd.read_csv(toronto_geo).set_index("Postal Code")
geo_data.head()

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


In [10]:
#Combine the dataframes
toronto_neighbourhoods = toronto_df.join(geo_data)
toronto_neighbourhoods.head(50)

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476
M1J,Scarborough,Scarborough Village,43.744734,-79.239476
M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [11]:
#Export to csv
toronto_neighbourhoods.to_csv("toronto_df_2.csv", index=False)

# Part 3

In [12]:
import folium

#toronto lattitude and longitude
toronto_latitude = 43.6532
toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighbourhood in zip(toronto_neighbourhoods['Latitude'], toronto_neighbourhoods['Longitude'], toronto_neighbourhoods['Borough'], toronto_neighbourhoods['Neighbourhood']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=folium.Popup('{}, {}'.format(neighbourhood, borough), parse_html=True),
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  

    
map_toronto
#this map will not be visible on github, check the readme file

In [13]:
# @hidden_cell
CLIENT_ID = '$$$$'
CLIENT_SECRET = '$$$$'
VERSION = '20200726'

## Clustering of the neighbourhoods that contain Central or Downtown in their Borough

In [14]:
toronto_data = toronto_neighbourhoods[(toronto_neighbourhoods['Borough'].str.contains('Downtown', regex=False)
                                     | toronto_neighbourhoods['Borough'].str.contains('Central', regex=False))]
toronto_data

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
M4P,Central Toronto,Davisville North,43.712751,-79.390197
M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678
M4S,Central Toronto,Davisville,43.704324,-79.38879
M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049
M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675
M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636


In [15]:
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 13)
for lat, lng, borough, neighbourhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], 
                                            toronto_data['Borough'], toronto_data['Neighbourhood']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=folium.Popup('{}, {}'.format(neighbourhood, borough), parse_html=True),
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
map_toronto
#check the readme for the map

In [16]:
#Using k means clustering
from sklearn.cluster import KMeans
k=3
toronto_clustering = toronto_data.drop(['Borough','Neighbourhood'],1)
kmeans = KMeans(n_clusters = k,random_state=0).fit(toronto_clustering)
kmeans.labels_
toronto_data.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_data

Unnamed: 0_level_0,Cluster Labels,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M4N,0,Central Toronto,Lawrence Park,43.72802,-79.38879
M4P,0,Central Toronto,Davisville North,43.712751,-79.390197
M4R,0,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678
M4S,0,Central Toronto,Davisville,43.704324,-79.38879
M4T,2,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
M4V,2,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049
M4W,2,Downtown Toronto,Rosedale,43.679563,-79.377529
M4X,1,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675
M4Y,2,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
M5A,1,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636


In [17]:
import matplotlib.cm as cm
import matplotlib.colors as colors
# create map
map_clusters = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, neighbourhood, cluster in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood'], toronto_data['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters
#check the readme for the map