# Battle of Toronto

## Import Libraries

In [1]:
import numpy as np 
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import matplotlib.cm as cm
import matplotlib.colors as colors
from bs4 import BeautifulSoup
print('BeautifulSoup imported')
from sklearn.cluster import KMeans
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import folium
print('Folium imported')
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim 

BeautifulSoup imported
Folium imported
Collecting package metadata: done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    conda-4.6.4                |           py36_0         877 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    geopy-1.18.1               |             py_0          51 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         961 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.49-py_0

The following packages will be UPDATED:

  conda                                        4.6.3-py36_0 --> 4.6.4-py36_0
  geopy              conda-forge/linux-64::geopy-1.11

## Get the Data from the URL

In [2]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url,'lxml')

##  Formatting the table.

In [3]:
table= soup.find('table')
ngh=table.find_all('td')

## Running Loop and Formatting table

In [4]:
#Running a loop for splitting the data into a table.
postcode=[]
borough=[]
neighbourhood=[]

for i in range(0, len(ngh), 3):
    postcode.append(ngh[i].text.strip())
    borough.append(ngh[i+1].text.strip())
    neighbourhood.append(ngh[i+2].text.strip())
        
df1 = pd.DataFrame(data=[postalcode, borough, neighbourhood]).transpose()
df1.columns = ['Postcode', 'Borough', 'Neighbourhood']
df1.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Grouping and Drop sets

In [5]:
#replacing Not assingned
df1['Borough'].replace('Not assigned', np.nan, inplace=True)
df1.dropna(subset=['Borough'], inplace=True)
#Grouping Neighbourhood with join func
df1 = df1.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df1.columns = ['Postcode', 'Borough', 'Neighbourhood']
#Queens Park
df1['Neighbourhood'].replace('Not assigned', "Queen's Park", inplace=True) 
df1

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


## Data subset

In [6]:
df1.shape

(103, 3)

# Battle of Totonto Part 2 

## Import Location Data, Using the CSV

In [19]:
df_loc = pd.read_csv('http://cocl.us/Geospatial_data')


## Rename the headers to match the existing data frame (Postcode)

In [26]:
df_loc.columns = ['Postcode', 'latitude', 'longitude']
df_loc.head()

Unnamed: 0,Postcode,latitude,longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Merge DF1 and Location Data frame using Postcode

In [27]:
df_mer=pd.merge(df1, df_loc, on=['Postcode'], how='inner')

In [31]:
df_mer

Unnamed: 0,Postcode,Borough,Neighbourhood,latitude,longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Map of Toronto

In [33]:
address = 'Toronto, Canada'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of the City of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of the City of Toronto are 43.653963, -79.387207.


# Pull lats and longs from data frame created 

In [37]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
#Postcode	Borough	Neighbourhood	latitude	longitude
# add markers to map
for lat, lng, borough, neighborhood in zip(df_mer['latitude'], df_mer['longitude'], df_mer['Borough'], df_mer['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# Complete.