# Download Toronto Neighboorhood Information from Wikipedia and format it accordingly

### Installing, importing and setting necessary parameters

In [1]:
# pip install arcgis
from arcgis.gis import GIS
from arcgis.geocoding import get_geocoders, batch_geocode

# Import packages and set API key
import pandas as pd
import numpy as np

  pd.datetime,


### Import data from wikipedia and drop rows from dataframe where 'borough' is equal to 'Not assigned'

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df_CA=pd.read_html(url,header=0)[0]
df_CA.replace('Not assigned',np.NaN,inplace=True)
df_CA.dropna(subset=['Borough'],inplace=True)

### Find coordinates for postal codes and add onto current dataset

In [3]:
postal_codes=df_CA["Postal code"].tolist()

In [4]:
# Set geocode setting for arcgis - sensitive info 
# withheld (i.e., username and password)
gis = GIS("http://www.arcgis.com", "User_Name", "Pass_Word")

In [5]:
# use the first of GIS's configured geocoders
geocoder=get_geocoders(gis)[0]
CA_geo=batch_geocode(postal_codes,source_country="CA")

### Use for loop to create dataframe that stores postal codes and coordinates

In [6]:
## For loop to collect coordinates
Postal_code=[]
Lat=[]
Long=[]
for i in range(len(CA_geo)):
    Postal_code.append(CA_geo[i]['address'])
    Lat.append(CA_geo[i]['location']['y'])
    Long.append(CA_geo[i]['location']['x'])

In [7]:
# Convert collected data into a dataframe
df_Postal_code=pd.DataFrame(Postal_code,columns=['Postal code'])
df_Lat=pd.DataFrame(Lat,columns=['Latitude'])
df_Long=pd.DataFrame(Long,columns=['Longitude'])

In [8]:
# Merge tables to include postal codes and their corrsponding coordinates
df_CA_coords=pd.merge(df_Lat,df_Long,left_index=True,right_index=True)
df_CA_coords_code=pd.merge(df_Postal_code,df_CA_coords,left_index=True,right_index=True)

In [9]:
# Merge tables to include postal codes, boroughs, neighborhoods
# and coordinates (longitude and Latitude)
df_CA_geo=pd.merge(df_CA,df_CA_coords_code)

### Map neighborhoods by Toronto onto map and cluster boroughs

In [10]:
# Get coordinates (latitude, longtitude) for Toronto
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
address='Toronto, CA'
geolocator = Nominatim(user_agent="CA_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [11]:
# Filter boroughs for 'Toronto' from CA postal codes
toronto_geo = df_CA_geo[df_CA_geo['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_geo.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.650964,-79.353041
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.66179,-79.38939
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657491,-79.377529
3,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554
4,M4E,East Toronto,The Beaches,43.678148,-79.295349


#### Create a map of New York with neighborhoods superimposed on top.

In [12]:
import folium # map rendering library

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for df_Lat, df_Long, borough, neighborhood in zip(toronto_geo['Latitude'], toronto_geo['Longitude'], toronto_geo['Borough'], toronto_geo['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [df_Lat, df_Long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)

    
# Map of Canada's postal codes for boroughs in Toronto    
map_toronto