In [94]:
# Dependencies and Setup

import requests
import pandas as pd
import json
from io import StringIO
from pprint import pprint

# Import API key

from api_keys import geoapify_key
from api_keys import google_key

# Extract Population Data for Toronto Neighbourhoods

### From Toronto Open Data

In [2]:
# Toronto Open Data is stored in a CKAN instance. It's APIs are documented here:
# https://docs.ckan.org/en/latest/api/
 
# To hit our API, you'll be making requests to:
base_url = "https://ckan0.cf.opendata.inter.prod-toronto.ca"
 
# Datasets are called "packages". Each package can contain many "resources"
# To retrieve the metadata for this package and its resources, use the package name in this page's URL:

url = base_url + "/api/3/action/package_show"
params = { "id": "neighbourhood-profiles"}
package = requests.get(url, params = params).json()

url = base_url + "/datastore/dump/" + package["result"]["resources"][0]['id']

resource_dump_data = requests.get(url).text

In [211]:
census_data = df = pd.read_csv(StringIO(resource_dump_data))

hood_name = census_data.columns[6:].tolist()
hood_num = census_data.iloc[0,6:]
population_2016 = census_data.iloc[2,6:]

population_data = pd.DataFrame(columns = ['Hood_ID', 'Population'])

population_data['Hood_ID'] = hood_num

population_data['Population'] = population_2016

population_data.reset_index(drop=True, inplace=True)

population_data

Unnamed: 0,Hood_ID,Population
0,129,29113
1,128,23757
2,20,12054
3,95,30526
4,42,27695
...,...,...
135,94,14349
136,100,11817
137,97,12528
138,27,27593


# Extract Longitude and Latitude for Toronto Neighbourhood

### Using Google API Unstructured Search

In [103]:
lon = []
lat = []
loc_data = []

for hood in hood_name:
    url = f'https://maps.googleapis.com/maps/api/geocode/json?key={google_key}={hood}%2C%20Toronto%2C%20Ontario%2C%20Canada'
    
    data = (requests.get(url).json())
    
    lon.append(data['results'][0]['geometry']['location']['lng'])
    lat.append(data['results'][0]['geometry']['location']['lat'])
    
    loc_data.append(data)


In [168]:
city_google = []

for row in loc_data:
    city_google.append(row['results'][0]['address_components'][1]['long_name'])
    
city_google

['Scarborough',
 'Scarborough',
 'Etobicoke',
 'Old Toronto',
 'North York',
 'North York',
 'Old Toronto',
 'North York',
 'North York',
 'Old Toronto',
 'York',
 'Scarborough',
 'Toronto',
 'Toronto',
 'Old Toronto',
 'York',
 'North York',
 'East York',
 'North York',
 'Old Toronto',
 'York',
 'Old Toronto',
 'Scarborough',
 'Yonge Street',
 'Scarborough',
 'North York',
 'Scarborough',
 'Toronto',
 'Toronto',
 'Toronto',
 'North York',
 'Scarborough',
 'Old Toronto',
 'North York',
 'Old Toronto',
 'Old Toronto',
 'Etobicoke',
 'Scarborough',
 'Toronto',
 'North York',
 'Etobicoke',
 'Etobicoke',
 'North York',
 'Old Toronto',
 'Old Toronto',
 'North York',
 'Old Toronto',
 'Scarborough',
 'North York',
 'Old Toronto',
 'Old Toronto',
 'Scarborough',
 'North York',
 'Etobicoke',
 'North York',
 'North York',
 'York',
 'Scarborough',
 'Etobicoke',
 'Old Toronto',
 'York',
 'Scarborough',
 'Old Toronto',
 'Etobicoke',
 'Etobicoke',
 'Old Toronto',
 'Scarborough',
 'North York',
 'Old

In [215]:
population_data['lon'] = lon
population_data['lat'] = lat

population_data['District'] = city_google

population_data.head

Unnamed: 0,Hood_ID,Population,lon,lat,District
0,129,29113,-79.262029,43.803660,Scarborough
1,128,23757,-79.266438,43.783507,Scarborough
2,20,12054,-79.544905,43.600797,Etobicoke
3,95,30526,-79.407585,43.669833,Old Toronto
4,42,27695,-79.343506,43.737257,North York
...,...,...,...,...,...
135,94,14349,-79.424260,43.678193,Old Toronto
136,100,11817,-79.398642,43.706431,Old Toronto
137,97,12528,-79.397993,43.687211,Old Toronto
138,27,27593,-79.477311,43.766562,North York


In [182]:
population_data.to_csv("../Resources/population_data.csv", index=False)

In [207]:
districts = population_data['District'].unique()

districts


array(['Scarborough', 'Etobicoke', 'Old Toronto', 'North York', 'York',
       'Toronto', 'East York', 'Yonge Street', 'Queens Quay West'],
      dtype=object)

In [208]:
d_lat = []
d_lon = []
d_loc_data =[]

for district in districts:
    url = f'https://maps.googleapis.com/maps/api/geocode/json?key={google_key}={district}%2C%20Toronto%2C%20Ontario%2C%20Canada'
    
    d_data = (requests.get(url).json())
    
    d_lon.append(d_data['results'][0]['geometry']['location']['lng'])
    d_lat.append(d_data['results'][0]['geometry']['location']['lat'])
    
    d_loc_data.append(d_data)


In [213]:
district_df = pd.DataFrame()

district_df['District'] = districts 
district_df['d_lon'] = d_lon
district_df['d_lat'] = d_lat

district_df


Unnamed: 0,District,d_lon,d_lat
0,Scarborough,-79.231752,43.776426
1,Etobicoke,-79.513198,43.620495
2,Old Toronto,-79.380323,43.64868
3,North York,-79.411079,43.761538
4,York,-79.450354,43.695679
5,Toronto,-79.383184,43.653226
6,East York,-79.341664,43.691201
7,Yonge Street,-79.44675,43.912011
8,Queens Quay West,-79.389518,43.638252


In [224]:
population_data = population_data.merge(district_df)
population_data['Population'] = population_data['Population'].str.replace(',', '').astype(int)

sum(population_data['Population'].values)

2731571

In [217]:
population_data.to_csv("../Resources/population_data.csv", index=False)