# Best location for a company building installation

# Packages Importing

In [1]:
import wikipedia as wp
import requests
import pandas as pd
import numpy as np
import io

# Data Creation & Cleaning

In [2]:
html = wp.page("List_of_postal_codes_of_Canada:_M").html().encode('UTF-8')
df = pd.read_html(html)[0]
df.head(20)
df_values=[]
for array in df.values:
    df_values.extend(array)
rows=[]
for val in df_values:
    row=[]
    pos_neigh_start = val.find('(')
    pos_neigh_end = val.find(')')
    postal_code = val[0:3]
    borough = val[3:pos_neigh_start]
    if 'Not assigne' not in borough and borough in ['Downtown Toronto','East Toronto', 'West Toronto','Central Toronto']:
        neighbourhood = val[pos_neigh_start+1:pos_neigh_end]
        row.extend((postal_code,borough,neighbourhood))
        rows.append(row)
        
df_toronto = pd.DataFrame(columns=['Postal Code','Borough','Neighbourhood'],data=rows)
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M5A,Downtown Toronto,Regent Park / Harbourfront
1,M5B,Downtown Toronto,"Garden District, Ryerson"
2,M5C,Downtown Toronto,St. James Town
3,M4E,East Toronto,The Beaches
4,M5E,Downtown Toronto,Berczy Park


### data shape

In [3]:
df_toronto.shape

(36, 3)

# Adding Geolocalisation attributes 

In [4]:
url = 'https://cocl.us/Geospatial_data'
lat_long = requests.get(url).text
lat_long_df = pd.read_csv(io.StringIO(lat_long))
lat_long_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [5]:
df_toronto = df_toronto.merge(lat_long_df,on='Postal Code')
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


## Defining Foursquare Credentials and version

In [6]:
'''CLIENT_ID = ****
CLIENT_SECRET = ****
ACCESS_TOKEN = *****
VERSION = *****
'''
CLIENT_ID = 'ZNSVXXYC5ONWS1MPMQDOETPJRXUVMM0MWJL0J3BSEQUJQWWV' # your Foursquare ID
CLIENT_SECRET = 'YKBLJIX2YDEKPIGCT3243ABIVK5RBAM4QND2NJE0XGN10QDV' # your Foursquare Secret
ACCESS_TOKEN = 'MGLTHW5P3AQECSSEXJ53TAHDBJ0AE3M0D2QZHKJSFNLPDS41' # your FourSquare Access Token
VERSION = '20180604'

# Data Exploration

In [7]:
# for each neighborhood, getting the top 10 venues within a radius of 500 meters using Foursquare API

In [8]:
def getVenues(names, latitudes, longitudes, radius, venues_limit):
    venues_list=[]    
    for neighbourhood_name, neighbourhood_latitude, neighbourhood_longitude in zip(names, latitudes, longitudes):

        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            neighbourhood_latitude, 
            neighbourhood_longitude, 
            radius, 
            venues_limit)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            neighbourhood_name, 
            neighbourhood_latitude, 
            neighbourhood_longitude, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    venues_df = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    venues_df.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(venues_df)

In [9]:
df_toronto_venues = getVenues(names=df_toronto['Neighbourhood'],
                           latitudes=df_toronto['Latitude'],
                           longitudes=df_toronto['Longitude'],
                           radius=500, 
                           venues_limit=10)
df_toronto_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Regent Park / Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Regent Park / Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Regent Park / Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,Regent Park / Harbourfront,43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
4,Regent Park / Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [10]:
#getting list of venues categories

In [25]:
venues_categories = df_toronto_venues['Venue Category'].unique()
venues_categories

array(['Bakery', 'Coffee Shop', 'Distribution Center', 'Restaurant',
       'Spa', 'Park', 'Breakfast Spot', 'Gym / Fitness Center',
       'Historic Site', 'Chocolate Shop', 'Clothing Store',
       'Burrito Place', 'Comic Shop', 'Pizza Place', 'Café', 'Plaza',
       'Music Venue', 'Theater', 'Ramen Restaurant', 'Burger Joint',
       'Japanese Restaurant', 'Creperie', 'Middle Eastern Restaurant',
       'Food Truck', 'Cosmetics Shop', 'Hotel', 'Gym', 'Trail',
       'Health Food Store', 'Pub', 'Neighborhood', 'Liquor Store',
       'Vegetarian / Vegan Restaurant', 'Beer Bar', 'Museum',
       'Farmers Market', 'Cocktail Bar', 'Thai Restaurant',
       'Modern European Restaurant', 'Gastropub', 'Grocery Store',
       'Italian Restaurant', 'Candy Store', 'Concert Hall', 'Steakhouse',
       'Seafood Restaurant', 'Bar', 'Brewery', 'Bank', 'Supermarket',
       'Salad Place', 'Dessert Shop', 'Lake', 'Performing Arts Venue',
       'Sporting Goods Shop', 'Ice Cream Shop', 'Asian Restaur

In [23]:
# setting important venues categories that need to be present near the company

In [24]:
important_venues = ['Coffee Shop',
 'Restaurant',
 'Park',
 'Breakfast Spot',
 'Gym / Fitness Center',
 'Burrito Place',
 'Pizza Place',
 'Café',
 'Ramen Restaurant',
 'Japanese Restaurant',
 'Creperie',
 'Middle Eastern Restaurant',
 'Food Truck',
 'Hotel',
 'Gym',
 'Health Food Store',
 'Vegetarian / Vegan Restaurant',
 'Beer Bar',
 'Cocktail Bar',
 'Thai Restaurant',
 'Modern European Restaurant',
 'Italian Restaurant',
 'Steakhouse',
 'Seafood Restaurant',
 'Bar',
 'Brewery',
 'Bank',
 'Supermarket',
 'Salad Place',
 'Asian Restaurant',
 'Korean Restaurant',
 'Cuban Restaurant',
 'Greek Restaurant',
 'Yoga Studio',
 'Fast Food Restaurant',
 'Sushi Restaurant',
 'Bus Line',
 'Garden',
 'Food & Drink Shop',
 'Sandwich Place',
 'Mexican Restaurant',
 'Chinese Restaurant',
 'Indian Restaurant',
 'Eastern European Restaurant',
 'French Restaurant',
 'College Gym',
 'Falafel Restaurant',
 'Vietnamese Restaurant',
 'American Restaurant',
 'Theme Restaurant'
 'Airport',
 'Airport Lounge',
 'Airport Food Court',
 'Airport Terminal',
 'Airport Gate']

In [None]:
# keep only important venues

In [31]:
df_toronto_important_venues = df_toronto_venues[df_toronto_venues['Venue Category'].isin(important_venues)]
df_toronto_important_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
1,Regent Park / Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
3,Regent Park / Harbourfront,43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
5,Regent Park / Harbourfront,43.65426,-79.360636,Corktown Common,43.655618,-79.356211,Park
6,Regent Park / Harbourfront,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
7,Regent Park / Harbourfront,43.65426,-79.360636,The Extension Room,43.653313,-79.359725,Gym / Fitness Center


In [32]:
#One hot encoding to transform categories to binary columns

In [64]:
df_toronto_venues_transformed = pd.get_dummies(df_toronto_important_venues[['Venue Category']], prefix="", prefix_sep="")
df_toronto_venues_transformed.insert(loc=0, column='Neighbourhood', value=df_toronto_important_venues['Neighbourhood'].values)
df_toronto_venues_transformed = df_toronto_venues_transformed.groupby('Neighbourhood').sum().reset_index()

df_toronto_venues_transformed.head()

Unnamed: 0,Neighbourhood,Airport Food Court,Airport Gate,Airport Lounge,Airport Terminal,American Restaurant,Asian Restaurant,Bank,Bar,Beer Bar,...,Salad Place,Sandwich Place,Seafood Restaurant,Steakhouse,Supermarket,Sushi Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Berczy Park,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,1,1,0,0
1,Brockton / Parkdale Village / Exhibition Place,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,CN Tower / King and Spadina / Railway Lands / ...,1,1,2,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Central Bay Street,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Christie,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [65]:
#get the mean of values as a score of venues presence

In [66]:
df_toronto_venues_transformed['Score'] = df_toronto_venues_transformed.mean(axis=1)
df_toronto_venues_transformed  = df_toronto_venues_transformed[['Neighbourhood','Score','Airport Terminal']]
df_toronto_venues_transformed

Unnamed: 0,Neighbourhood,Score,Airport Terminal
0,Berczy Park,0.113208,0
1,Brockton / Parkdale Village / Exhibition Place,0.132075,0
2,CN Tower / King and Spadina / Railway Lands / ...,0.113208,1
3,Central Bay Street,0.150943,0
4,Christie,0.113208,0
5,Church and Wellesley,0.113208,0
6,Commerce Court / Victoria Hotel,0.132075,0
7,Davisville,0.150943,0
8,Davisville North,0.150943,0
9,Dufferin / Dovercourt Village,0.113208,0


In [80]:
%matplotlib qt 
import seaborn as sns
import matplotlib.pyplot as plt
sns.lineplot(x='Neighbourhood',y='Score',data=df_toronto_venues_transformed)
plt.xticks(fontsize=8,rotation=90)
plt.grid()
plt.show()

In [49]:
# keep the neighborhood with highest score with the presence of an airport terminal

In [55]:
df_toronto_venues_transformed = df_toronto_venues_transformed[df_toronto_venues_transformed["Airport Terminal"]!=0]
max_score = df_toronto_venues_transformed['Score'].max()
best_neighbourhood = df_toronto_venues_transformed.loc[df_toronto_venues_transformed['Score'] == max_score, 'Neighbourhood'].values[0]
best_neighbourhood

'CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport'

# ******* The best neiighbourhood for the company building is : 'CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport' ******