In [1]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import folium
import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import ssl
import re

#### We're getting the 140 neighbourhoods...

In [3]:
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

try:
    url = 'https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Toronto'
    html = urllib.request.urlopen(url, context = ctx).read()
    soup = BeautifulSoup(html, 'html.parser')
except urllib.error.HTTPError as err:
    print('***********', err.info())
    quit()
except Exception as err:    
    print(err)
    quit()

    
#wikitable.sortable.jquery-tablesorter    
print('************Starting to scan page...*********************')

ran_list = []
toronto_data_list = []


code_table =  soup.find_all("table",{"class":"wikitable sortable"})[1]

code_table_body = code_table.tbody.find_all('tr')


for char in code_table_body:
    res = char.find_all('td')
    if len(res) <= 0:
        pass
    else:
        cdn = res[0].text.strip()
        ngh = res[1].text.strip()
        borough = res[2].text.strip()
        covngh = res[3].text.strip()
        
        row = {'CDN number' : cdn, 'City-designated neighbourhood': ngh, 
               'Former city/borough': borough, 'Neighbourhoods covered': covngh}
        toronto_data_list.append(row)

toronto_data_140 = pd.DataFrame.from_dict(toronto_data_list)
toronto_data_140.head(20)
toronto_data_140.to_csv('toronto_data_140.csv')


************Starting to scan page...*********************


#### Getting the postal code data...

In [5]:
address_list = []

toronto_data_init = pd.read_csv('toronto_data.csv')
toronto_data_init.drop('Unnamed: 0', axis = 1, inplace = True)

for postal, bor, ngh in zip(toronto_data_init['Postal_Code'], toronto_data_init['Borough'], toronto_data_init['Neighbourhood']):
    resu = ngh.split(',')
    for val in resu:
        val = val.replace(')', ' ')
        val = val.strip() 
        row = {'Postal_Code': postal, 'Borough': bor, 'Neighbourhood': val}
        address_list.append(row)
        
tor_data = pd.DataFrame(address_list)
tor_data.to_csv('toronto_data_4.csv')

In [15]:
geo_coords = pd.read_csv('Geospatial_Coordinates.csv')
toronto_data = pd.merge(tor_data, geo_coords, left_on = ['Postal_Code'], right_on = ['Postal Code'])
toronto_data.drop('Postal Code', axis = 1, inplace = True)

#### Showing both datasets 

In [16]:
toronto_data.head()

Unnamed: 0,Postal_Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,Malvern,43.806686,-79.194353
1,M1B,Scarborough,Rouge,43.806686,-79.194353
2,M1C,Scarborough,Rouge Hill,43.784535,-79.160497
3,M1C,Scarborough,Port Union,43.784535,-79.160497
4,M1C,Scarborough,Highland Creek,43.784535,-79.160497


In [8]:
toronto_data_140.head()

Unnamed: 0,CDN number,City-designated neighbourhood,Former city/borough,Neighbourhoods covered
0,129,Agincourt North,Scarborough,Agincourt and Brimwood
1,128,Agincourt South-Malvern West,Scarborough,Agincourt and Malvern
2,20,Alderwood,Etobicoke,Alderwood
3,95,Annex,Old City of Toronto,The Annex and Seaton Village
4,42,Banbury-Don Mills,North York,Don Mills


In [21]:
rare_addresses = [
    {'Location': 'Blake-Jones', 'Latitude': 43.671400, 'Longitude': -79.336530}
    
]

rare_addresses[0]['Location']

'Blake-Jones'

In [30]:
addresses = []

rare_addresses = [
    {'Location': 'Blake-Jones', 'Latitude': 43.671400, 'Longitude': -79.336530}
    
]



for i in range(toronto_data_140.shape[0]):
    try:
# first we try geocoder to find the location of each neighbourhood
        ngh, bor, covngh = toronto_data_140.iloc[i,1], toronto_data_140.iloc[i,2], toronto_data_140.iloc[i,3]
        address = '{}, Toronto, Canada'.format(ngh)
        geolocator = Nominatim(user_agent='Final_Project_TBotN')
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        
    except AttributeError as err:
# if geocoder cannot find the location, we use the table we already have to get the data
        print('Found attribute error')
        try:
            r = toronto_data.loc[(toronto_data['Borough'].str.contains(ngh)) | (toronto_data['Borough'].str.contains(covngh)) | (toronto_data['Borough'] == bor)].iloc[0,]
            lat, lng = r[3] ,r[4]
        except:
# if the location cannot be found in both tables, the data is entered manually.
# Luckily, there aren't too many of these
            print('Can\'t find location in both tables.')
            for j in range(len(rare_addresses)):
                place = rare_addresses[j]['Location'] == ngh
                lat, lng = rare_addresses[j]['Latitude'], rare_addresses[j]['Longitude']
        
    row = {'Borough': bor, 'Location': address, 'Latitude': latitude, 
               'Longitude': longitude, 'Neighbourhoods covered': covngh}
    addresses.append(row)

toronto_data = pd.DataFrame(addresses)
#toronto_data.head(20)

Done a row
Done a row
Done a row
Done a row
Found attribute error
Banbury-Don Mills
North York
Done a row
Done a row
Done a row
Done a row
Done a row
Found attribute error
Bedford Park-Nortown
North York
Done a row
Found attribute error
Beechborough-Greenbrook
York
Done a row
Done a row
Found attribute error
Birchcliffe-Cliffside
Scarborough
Done a row
Done a row
Found attribute error
Blake-Jones
Old City of Toronto
Done a row
Found attribute error
Briar Hill-Belgravia
York
Done a row
Found attribute error
Bridle Path-Sunnybrook-York Mills
North York
Done a row
Done a row
Found attribute error
Brookhaven-Amesbury
North York
Done a row
Done a row
Found attribute error
Caledonia-Fairbank
York
Done a row
Done a row
Done a row
Done a row
Found attribute error
Clairlea-Birchmount
Scarborough
Done a row
Done a row
Done a row
Done a row
Done a row
Done a row
Done a row
Done a row
Done a row
Found attribute error
Dovercourt-Wallace Emerson-Junction
Old City of Toronto
Done a row
Found attribut

Unnamed: 0,Borough,Location,Latitude,Longitude,Neighbourhoods covered
0,Scarborough,"Agincourt North, Toronto, Canada",43.808038,-79.266439,Agincourt and Brimwood
1,Scarborough,"Agincourt South-Malvern West, Toronto, Canada",43.781969,-79.257689,Agincourt and Malvern
2,Etobicoke,"Alderwood, Toronto, Canada",43.601717,-79.545232,Alderwood
3,Old City of Toronto,"Annex, Toronto, Canada",43.670338,-79.407117,The Annex and Seaton Village
4,North York,"Banbury-Don Mills, Toronto, Canada",43.670338,-79.407117,Don Mills
5,North York,"Bathurst Manor, Toronto, Canada",43.763893,-79.456367,Bathurst Manor
6,Old City of Toronto,"Bay Street Corridor, Toronto, Canada",43.668865,-79.389126,"Bay Street, Financial District"
7,North York,"Bayview Village, Toronto, Canada",43.769197,-79.376662,Bayview Village
8,North York,"Bayview Woods-Steeles, Toronto, Canada",43.798127,-79.382973,Bayview Woods
9,North York,"Bedford Park-Nortown, Toronto, Canada",43.798127,-79.382973,"Bedford Park, Ledbury Park, and Nortown"


In [33]:
toronto_data['Location'] = toronto_data['Location'].str.replace(', Toronto, Canada', '')

In [None]:
# Done so that data doesn't need to be downloaded repeatedly

#toronto_data = pd.read_csv('tor_neighbourhood_locations.csv')

In [35]:
toronto_data.head()

Unnamed: 0,Borough,Location,Latitude,Longitude,Neighbourhoods covered
0,Scarborough,Agincourt North,43.808038,-79.266439,Agincourt and Brimwood
1,Scarborough,Agincourt South-Malvern West,43.781969,-79.257689,Agincourt and Malvern
2,Etobicoke,Alderwood,43.601717,-79.545232,Alderwood
3,Old City of Toronto,Annex,43.670338,-79.407117,The Annex and Seaton Village
4,North York,Banbury-Don Mills,43.670338,-79.407117,Don Mills


Then we get the location of Toronto itself

In [37]:
toronto_address = 'Toronto, Canada'

geolocator = Nominatim(user_agent='explore_toronto')
location = geolocator.geocode(toronto_address)
t_latitude = location.latitude
t_longitude = location.longitude
print('The geograpical coordinate of Toronto, Canada are {}, {}.'.format(t_latitude, t_longitude))

The geograpical coordinate of Toronto, Canada are 43.6534817, -79.3839347.


In [40]:
toronto_map = folium.Map(location = [t_latitude, t_longitude], zoom_start = 10)

for lat, lng, neighbourhood, borough in zip(
    toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Location'], toronto_data['Borough']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='#ff1493',
        fill=True,
        fill_color='#ff69b4',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)  

### The 140 administrative neighbourhoods of Toronto

In [41]:
toronto_map

#### Moving on to the locations of child care centres...

In [43]:
child_care = pd.read_csv('Child care centres.csv')
child_care.drop('_id', axis = 1, inplace = True)
child_care

Unnamed: 0,LOC_ID,LOC_NAME,AUSPICE,ADDRESS,PCODE,ward,PHONE,bldg_type,BLDGNAME,IGSPACE,TGSPACE,PGSPACE,KGSPACE,SGSPACE,TOTSPACE,subsidy,run_date,geometry
0,1013,Lakeshore Community Child Care Centre,Non Profit Agency,101 SEVENTH ST,M8V 3B5,3,(416) 394-7601,Public Elementary School,Seventh Street Public School,0,20,32,52,60,164,Y,06AUG21,"{u'type': u'Point', u'coordinates': (-79.50419..."
1,1014,Alternative Primary School Parent Group,Non Profit Agency,1100 SPADINA RD,M5N 2M6,8,(416) 322-5385,Public Elementary School,North Preparatory Public School,0,0,12,26,45,83,Y,06AUG21,"{u'type': u'Point', u'coordinates': (-79.42037..."
2,1015,Cardinal Leger Child Care Centre (Scarborough),Non Profit Agency,600 MORRISH RD,M1C 4Y1,25,(416) 287-0578,Catholic Elementary School,Cardinal Leger Catholic School,0,10,16,26,50,102,Y,06AUG21,"{u'type': u'Point', u'coordinates': (-79.17546..."
3,1016,George Brown - Richmond Adelaide Childcare Centre,Non Profit Agency,130 ADELAIDE ST W,M5H 3P5,10,(416) 415-2453,Other,,10,15,40,0,0,65,Y,06AUG21,"{u'type': u'Point', u'coordinates': (-79.38381..."
4,1017,Woodland Nursery School (Warden Woods Communit...,Non Profit Agency,1 FIRVALLEY CRT,M1L 1N8,20,(416) 694-1138x157,High Rise Apartment,,0,10,16,0,0,26,Y,06AUG21,"{u'type': u'Point', u'coordinates': (-79.28102..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1022,14280,Mtt Davisville,Commercial Agency,588 MOUNT PLEASANT RD,M4S 2M8,12,(416) 722-8311,House,,0,10,13,0,0,23,N,06AUG21,"{u'type': u'Point', u'coordinates': (-79.38849..."
1023,14281,Free Spirit Montessori Downtown,Commercial Agency,500 QUEENS QUAY W,M5V 3K8,10,(416) 825-8429,Other,,0,0,15,0,0,15,N,06AUG21,"{u'type': u'Point', u'coordinates': (-79.39514..."
1024,14283,Alphabet Station,Commercial Agency,2206 QUEEN ST E,M4E 1E7,19,(416) 402-4702,Other,,10,20,24,0,0,54,N,06AUG21,"{u'type': u'Point', u'coordinates': (-79.28941..."
1025,14284,Learning Jungle Buttonwood,Commercial Agency,100 SIDNEY BELSEY CRES,M6M 5H6,5,(416) 241-3800,Other,,10,30,48,0,0,88,Y,06AUG21,"{u'type': u'Point', u'coordinates': (-79.50976..."


In [44]:
child_care.shape

(1027, 18)

We check the number of centres via shape of table. There are 1027 childcare centres in Toronto.

#### Then we read in data on family centres

In [47]:
com_prog = pd.read_csv('EarlyON Child and Family Centres.csv')
com_prog.drop('_id', axis = 1, inplace = True)
com_prog.head()

Unnamed: 0,loc_id,program,agency,address,phone,rundate,geometry
0,6197,Alexandra Park EarlyON Child and Family Centre,Alexandra Park Residents Association/Community...,105 Grange Crt,4166039603,06AUG21,"{u'type': u'Point', u'coordinates': (-79.39888..."
1,6199,Applegrove EarlyON Child and Family Centre,Applegrove Community Complex,60 Woodfield Rd,4164615392,06AUG21,"{u'type': u'Point', u'coordinates': (-79.32191..."
2,6200,Applegrove Connection EarlyON Child and Family...,Applegrove Community Complex,31 Eastwood Rd,4164615046,06AUG21,"{u'type': u'Point', u'coordinates': (-79.31814..."
3,6202,Birchmount Bluffs EarlyON Child and Family Centre,Birchmount Bluffs Neighbourhood Centre,93 Birchmount Rd,4163967599,06AUG21,"{u'type': u'Point', u'coordinates': (-79.26311..."
4,6209,St. Helen's EarlyON Child and Family Centre,College-Montrose Children's Place,66 Sheridan Ave,4168457223,06AUG21,"{u'type': u'Point', u'coordinates': (-79.43344..."


In [49]:
com_prog.shape

(262, 7)

We see that there are 262 family centres in Toronto 