My assignment "Segmenting and Clustering Neighborhoods in Toronto"
=================================================================

Part 1: Import dataset and clean
------

In [1]:
#Import libraries
import pandas as pd
import numpy as np

In [2]:
#Read file with data
df=pd.read_excel('DataToronto.xlsx')
df.head(11)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


In [3]:
#Filter rows en new dataframe, excluying 'Not assigned' rows in Borough column
df_filter = df[df['Borough'] != 'Not assigned']
df_filter.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [4]:
#Print number rows
df_filter.shape

(103, 3)

Part 2: Obtein latitude and longitude
------

In [5]:
#Read file with data
df_latlon=pd.read_csv('Geospatial_Coordinates.csv')
df_latlon.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [6]:
#Print number rows
df_latlon.shape

(103, 3)

In [7]:
# define the dataframe columns
column_names = ['Postal Code', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
df_complete = pd.DataFrame(columns=column_names)
df_complete.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude


In [8]:
#Fill latitude and longitude columns
for index, row in df_filter.iterrows():
    Postalcode = row['Postal Code']
    Borough_name = row['Borough']
    Neighborhood_name = row['Neighborhood']
    
    #Find latitude and longitude in other dataset
    df_temp = df_latlon.loc[df_latlon.loc[:, 'Postal Code'] == Postalcode]
    Latitude  = df_temp.iloc[0,1] #df_temp['Latitude']
    Longitude = df_temp.iloc[0,2] #df_temp['Longitude']
    #print(Postalcode,Borough_name,Neighborhood_name,Latitude,Longitude,"\n")

    df_complete = df_complete.append({'Postal Code': Postalcode,
                                      'Borough' : Borough_name,
                                      'Neighborhood': Neighborhood_name,
                                      'Latitude': Latitude,
                                      'Longitude': Longitude}, ignore_index=True)


In [9]:
#View new dataset
df_complete.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


Part 3: Show Boroughs of Toronto
------

In [10]:
#Filter rows en new dataframe, only  Boroughs of 'Toronto'
df_Toronto = df_complete[df_complete.Borough.isin(['Central Toronto', 'Downtown Toronto', 'East Toronto','West Toronto'])]
df_Toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [11]:
#Show the Map
#------------
#!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

venues_map = folium.Map(location=[43.7001114, -79.4162979], zoom_start=15) 

for lat, lng, label in zip(df_Toronto.Latitude, df_Toronto.Longitude, df_Toronto.Borough):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map
