# Week 3 Assignment

## Scraping Data from wikipedia

In [2]:
import pandas as pd

In [3]:
df_trt = pd.read_html(io="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M", na_values=['Not assigned'])[0]

# The neighborhood will be the same as the borough if cell has a borough but not assigned neighborhood
df_trt.Neighbourhood.fillna(df_trt.Borough, inplace = True)

# Delete datafram rows where Borough is not assigned
df_trt.dropna(inplace = True)

df_trt.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Queen's Park
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


In [5]:
# Combine the Neighbourhood with same Postcode and Borough
df_trt_cob = df_trt.groupby(['Postcode','Borough'],as_index=False).agg(lambda n: ', '.join(n))
df_trt_cob.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [6]:
df_trt_cob.shape

(103, 3)

## Get the Latitude and Longitude

In [11]:
lat_long = pd.read_csv('http://cocl.us/Geospatial_data')
lat_long.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [18]:
# Rename the same column name ("Postcode") as above data
lat_long.rename(columns={"Postal Code": "Postcode"},inplace = True)

In [22]:
df_new = pd.merge(df_trt_cob, lat_long)
df_new.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [21]:
df_new.shape

(103, 5)

## Explore and Cluster the Neighborhoods in Toronto

In [35]:
from geopy.geocoders import Nominatim 
address = 'Toronto'
geolocator = Nominatim(user_agent="trt_explorer")
location = geolocator.geocode(address)
lat = location.latitude
lon = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(lat, lon))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [9]:
!conda install -c conda-forge folium=0.5.0 --yes 
import folium 
print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.0.1               |             py_0         575 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.0 MB

The following NEW packages will be 

In [26]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[lat, lon], zoom_start=10)

# add markers to map
for lat, lon, borough, neighbourhood in zip(df_new['Latitude'], df_new['Longitude'], df_new['Borough'], df_new['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [45]:

from folium import plugins

# let's start again with a clean copy of the map of Toronto
map_toronto = folium.Map(location = [lat, lon], zoom_start = 10)

# instantiate a marker cluster object for the postcodes in the dataframe
postcodes = plugins.MarkerCluster().add_to(map_toronto)

# loop through the dataframe and add each data point to the mark cluster
for lat, lon, postcode in zip(df_new['Latitude'], df_new['Longitude'], df_new['Postcode']):
    label = 'lat_long: {}<br>Postcode: {}'.format(location[1], postcode)
    folium.Marker(
        location=[lat, lon],
        icon=None,
        popup=label
    ).add_to(postcodes)

# display map
map_toronto