# Battle of the Neighborhood Code

### Load all the Data from all the various sources

Toronto neighborhoods by postal code

In [2]:
# Load the required libraries
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Found the table using beautifulsoup and used Pandas to read it in. 
res = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))


# WRANGLE/Transform THE DATA
# Convert the list back into a dataframe
data = pd.DataFrame(df[0])

# Rename the columns as instructed
data = data.rename(columns={0:'PostalCode', 1:'Bourough', 2:'Neighborhood'})

# Get rid of the first row which contained the table headers from the webpage
data = data.iloc[1:]


# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
data = data[~data['Bourough'].str.contains('Not assigned')]


# More than one neighborhood can exist in one postal code area. 
#For example, in the table on the Wikipedia page, you will notice 
#that M5A is listed twice and has two neighborhoods: Harbourfront 
#and Regent Park. These two rows will be combined into one row with 
#the neighborhoods separated with a comma
df2=data.groupby(['PostalCode', 'Bourough']).apply(lambda group: ', '.join(group['Neighborhood']))


# Convert the Series back into a DataFrame and put the 'Neighbourhood' column label back in
df2=df2.to_frame().reset_index()
df2 = df2.rename(columns={0:'Neighborhood'})

# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
df2.loc[df2.Neighborhood == 'Not assigned', 'Neighborhood' ] = df2.Bourough

# Display the DataFrame
df2.head()

Unnamed: 0,PostalCode,Bourough,Neighborhood
0,M1HScarborough(Cedarbrae),M2HNorth York(Hillcrest Village),M3HNorth York(Bathurst Manor / Wilson Heights ...
1,M1JScarborough(Scarborough Village),M2JNorth York(Fairview / Henry Farm / Oriole),M3JNorth York(Northwood Park / York University)
2,M1KScarborough(Kennedy Park / Ionview / East B...,M2KNorth York(Bayview Village),M3KNorth York(Downsview)East (CFB Toronto)
3,M1LScarborough(Golden Mile / Clairlea / Oakridge),M2LNorth York(York Mills / Silver Hills),M3LNorth York(Downsview)West
4,M1MScarborough(Cliffside / Cliffcrest / Scarbo...,M2MNorth York(Willowdale / Newtonbrook),M3MNorth York(Downsview)Central


### Toronto geospatial cooridinates with Toronto Postal Code Data loaded from csv file

In [3]:
# Load Toronto geospatial cooridinates
!wget -O to_geo_space.csv http://cocl.us/Geospatial_data

#Read into dataframe
gf = pd.read_csv('to_geo_space.csv')

#rename the coloumns so the match
gf = gf.rename(columns={'Postal Code':'PostalCode'})

#Merge the Toronto data with geo cooridinate data
gf_new = pd.merge(df2, gf, on='PostalCode', how='inner')

# display the new dataframe
gf_new.head()

--2021-05-29 18:19:16--  http://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 52.116.122.240, 52.116.127.25
Connecting to cocl.us (cocl.us)|52.116.122.240|:80... connected.
HTTP request sent, awaiting response... 308 Permanent Redirect
Location: https://cocl.us/Geospatial_data [following]
--2021-05-29 18:19:17--  https://cocl.us/Geospatial_data
Connecting to cocl.us (cocl.us)|52.116.122.240|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2021-05-29 18:19:18--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 107.152.29.197
Connecting to ibm.box.com (ibm.box.com)|107.152.29.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2021-05-29 18:19:18--  https://ibm.box.com/public/static/9afz

Unnamed: 0,PostalCode,Bourough,Neighborhood,Latitude,Longitude


### Toronto neighborhoods populations broken down by postal code

In [4]:
# Load this data from Stats Canada
df_pop = pd.read_csv('https://www12.statcan.gc.ca/census-recensement/2016/dp-pd/hlt-fst/pd-pl/Tables/File.cfm?T=1201&SR=1&RPP=9999&PR=0&CMA=0&CSD=0&S=22&O=A&Lang=Eng&OFT=CSV',encoding = 'unicode_escape')
# Rename the columns appropiatley
df_pop = df_pop.rename(columns={'Geographic code':'PostalCode', 'Geographic name':'PostalCod2', 'Province or territory':'Province', 'Incompletely enumerated Indian reserves and Indian settlements, 2016':'Incomplete', 'Population, 2016':'Population_2016', 'Total private dwellings, 2016':'TotalPrivDwellings', 'Private dwellings occupied by usual residents, 2016':'PrivDwellingsOccupied'})
df_pop= df_pop.drop(columns=['PostalCod2', 'Province', 'Incomplete', 'TotalPrivDwellings', 'PrivDwellingsOccupied'])

# Get rid of the first row 
df_pop = df_pop.iloc[1:]
df_pop.head()

Unnamed: 0,PostalCode,Population_2016
1,A0A,46587.0
2,A0B,19792.0
3,A0C,12587.0
4,A0E,22294.0
5,A0G,35266.0


Combination of Toronto Neighbourhood populations data with Toronto Postal Code data from csv file

In [5]:
#Merge the Toronto Pop data with geo postalcode data
gf_new
gf_new = pd.merge(df_pop, gf_new, on='PostalCode', how='right')
# sort on population
gf_new = gf_new.sort_values(by=['Population_2016'], ascending=False)

# display the new dataframe
gf_new.head()

Unnamed: 0,PostalCode,Population_2016,Bourough,Neighborhood,Latitude,Longitude


Toronto Neighbourhood income after tax data with Toronto Postal Code data

In [6]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,PostalCode,Yearly Income After Tax
0,M1A,Null
1,M2A,Null
2,M3A,57698
3,M4A,49453
4,M5A,46938


Combination of Toronto Neighbourhood income data with Toronto Postal Code data

In [7]:

body = client_05ec939e3b734088aef83ea67860081b.get_object(Bucket='battleoftheneighborhood2-donotdelete-pr-z9ulrxywbbpfpe',Key='Income Data 1.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_10 = pd.read_csv(body)
df_data_10.head()


Unnamed: 0,PostalCode,Income After Tax,Population,Bourough,Neighborhood,Latitude,Longitude
0,M2P,115237,7843,North York,York Mills West,43.752758,-79.400049
1,M5M,111821,25975,North York,"Bedford Park, Lawrence Manor East",43.733283,-79.41975
2,M4N,109841,15330,Central Toronto,Lawrence Park,43.72802,-79.38879
3,M5R,108271,26496,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678
4,M8X,97210,10787,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944


Therefore the median income after tax is 57,000 dollars per year

#### Foursquare Credentials 

In [8]:
CLIENT_ID = 'H4IENDZBKM12ZJN1B0TWFI3W2CZGV3YQV20FJKXPYVE4K5WC' 


CLIENT_SECRET = 'PZR23TJF5UJEUMBM3NYCAFR4K2JXPQN1WPNGN3LFQQU4GVMO' 


VERSION = '20210628'

In [9]:
#Let's explore neighborhoods in our dataframe.
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

LIMIT = 200 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [10]:

body = client_05ec939e3b734088aef83ea67860081b.get_object(Bucket='battleoftheneighborhood2-donotdelete-pr-z9ulrxywbbpfpe',Key='Income Data 2.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_8 = pd.read_csv(body)
df_data_8.head()


Unnamed: 0,PostalCode,Yearly Income After Tax,Population,Bourough,Neighborhood,Latitude,Longitude
0,M2P,115237,7843,North York,York Mills West,43.752758,-79.400049
1,M5M,111821,25975,North York,"Bedford Park, Lawrence Manor East",43.733283,-79.41975
2,M4N,109841,15330,Central Toronto,Lawrence Park,43.72802,-79.38879
3,M5R,108271,26496,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678
4,M8X,97210,10787,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944


#### Get all the Venues in Toronto

In [12]:

body = client_05ec939e3b734088aef83ea67860081b.get_object(Bucket='battleoftheneighborhood2-donotdelete-pr-z9ulrxywbbpfpe',Key='Neighborhood.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_11 = pd.read_csv(body)
df_data_11.head()


Unnamed: 0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Unnamed: 6
0,"Adelaide, King, Richmond",100,100,100,100,100,100
1,Agincourt,4,4,4,4,4,4
2,"Agincourt North, L'Amoreaux East, Milliken, St...",2,2,2,2,2,2
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",10,10,10,10,10,10
4,"Alderwood, Long Branch",10,10,10,10,10,10
