In [1]:
#Google Places API notebook

import pandas as pd
import requests
import os

In [None]:
# Retreiving API key from environment variable
key = os.environ.get('api_key_places')

In [2]:
# For Sam's API key, comment out
from dotenv import load_dotenv
load_dotenv()
key = os.getenv('YELP_API_KEY2')

In [4]:
# Importing Yelp hotels dataframe

df_hotels = pd.read_csv('yelp_hotels.csv')
df_hotels.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,transactions,...,coordinates.latitude,coordinates.longitude,location.address1,location.address2,location.address3,location.city,location.zip_code,location.country,location.state,location.display_address
0,yeesT9W64tpVq5Gcae18Sg,delta-hotels-by-marriott-toronto-toronto-2,Delta Hotels by Marriott Toronto,https://s3-media2.fl.yelpcdn.com/bphoto/TRWtlS...,False,https://www.yelp.com/biz/delta-hotels-by-marri...,161,hotels,4.0,[],...,43.643069,-79.383865,75 Lower Simcoe Street,,,Toronto,M5J 3A6,CA,ON,"['75 Lower Simcoe Street', 'Toronto, ON M5J 3A..."
1,kKnGY0Xj7dG4ubjrbygynw,residence-inn-by-marriott-toronto-downtown-tor...,Residence Inn by Marriott Toronto Downtown,https://s3-media4.fl.yelpcdn.com/bphoto/PFaLls...,False,https://www.yelp.com/biz/residence-inn-by-marr...,95,hotels,4.0,[],...,43.64479,-79.39053,255 Wellington Street W,,,Toronto,M5V 3P9,CA,ON,"['255 Wellington Street W', 'Toronto, ON M5V 3..."
2,XHrPQKUTJXJGPH9etKUYUw,courtyard-by-marriott-toronto-downtown-toronto,Courtyard by Marriott Toronto Downtown,https://s3-media2.fl.yelpcdn.com/bphoto/d6S6ga...,False,https://www.yelp.com/biz/courtyard-by-marriott...,152,hotels,3.0,[],...,43.66241,-79.383149,475 Yonge Street,,,Toronto,M4Y 1X7,CA,ON,"['475 Yonge Street', 'Toronto, ON M4Y 1X7', 'C..."
3,zYRBRfYuq_6x-wNKa8NqrA,le-germain-hotel-toronto,Le Germain Hotel,https://s3-media3.fl.yelpcdn.com/bphoto/m1977c...,False,https://www.yelp.com/biz/le-germain-hotel-toro...,55,hotels,4.5,[],...,43.645751,-79.391025,30 Mercer Street,,,Toronto,M5V 1H3,CA,ON,"['30 Mercer Street', 'Toronto, ON M5V 1H3', 'C..."
4,PCkJAv7WQJ08C1RA6ilqWA,1-hotel-toronto-toronto,1 Hotel Toronto,https://s3-media4.fl.yelpcdn.com/bphoto/abOBDo...,False,https://www.yelp.com/biz/1-hotel-toronto-toron...,5,hotels,5.0,[],...,43.642826,-79.401439,550 Wellington Street W,,,Toronto,M5V 2V4,CA,ON,"['550 Wellington Street W', 'Toronto, ON M5V 2..."


In [5]:
# Viewing columns

df_hotels.columns

Index(['id', 'alias', 'name', 'image_url', 'is_closed', 'url', 'review_count',
       'categories', 'rating', 'transactions', 'price', 'phone',
       'display_phone', 'distance', 'coordinates.latitude',
       'coordinates.longitude', 'location.address1', 'location.address2',
       'location.address3', 'location.city', 'location.zip_code',
       'location.country', 'location.state', 'location.display_address'],
      dtype='object')

In [6]:
# Deciding which columns to keep

df_hotels = df_hotels[['id', 'name', 'review_count',
       'categories', 'rating', 'price', 'distance', 'coordinates.latitude',
       'coordinates.longitude', 'location.address1',
       'location.city', 'location.zip_code',
       'location.country', 'location.state', 'location.display_address']]

In [7]:
# Renaming columns

df_hotels.columns = ['hotel_id', 'hotel_name', 'hotel_review_count',
       'hotel_categories', 'hotel_rating', 'hotel_price', 'hotel_distance', 'hotel_latitude',
       'hotel_longitude', 'hotel_address1', 
       'hotel_city', 'hotel_zip_code',
       'hotel_country', 'hotel_state', 'hotel_display_address']

In [80]:
# Definging the function to find all POI's of a specific category within a set distance from any hotels, using the Yelp API again

def find_POI_yelp(hotel_ids,latitudes,longitudes,radius,limit,categories):
    
    #fields = 'fsq_id,name,rating,popularity,price'
    list_POI = []
    
    # Find all POI's within 1000m of each bike station.  This will return one column with station_id and one column containing lists of POI's.
    for hotel_id,lat,long in zip(hotel_ids,latitudes,longitudes):
        url = 'https://api.yelp.com/v3/businesses/search'
        params = {
            'limit': limit,
            'latitude': lat,
            'longitude': long,
            'term': categories,
            'categories': categories,
            'radius': radius
        }

        headers = {"accept": "application/json","Authorization": 'bearer ' + key}
        response = requests.get(url, headers=headers,params=params).json()['businesses']
        list_POI.append([hotel_id,response])
    
    # convert to dataframe
    df = pd.DataFrame(list_POI,columns=['hotel_id','businesses'])

    # create 1 row for each match of bike station to POI.  There are duplicates of both bike stations and POI's, but each matchup is unique.
    df = df.explode('businesses').reset_index(drop=True) 
    df = pd.concat([df,pd.json_normalize(df['businesses'])],axis=1).drop('businesses',axis=1)
    
    return df
    #return requests.get(url, headers=headers,params=params).json()#['businesses']


In [88]:
# Running the function to find all POI's within a certain radius of every hotel

n = 1 # number of hotels to use
r = 150 # radius in metres
limit = 20 # max number of POI's to return for each hotel
categories = 'restaurants' # POI description

df = df_hotels.head(n)

df_POI = find_POI_yelp(df['hotel_id'],df['hotel_latitude'],df['hotel_longitude'],r,limit,categories)

In [89]:
df_POI.shape

(15, 25)

In [90]:
df_POI.head()

Unnamed: 0,hotel_id,id,alias,name,image_url,is_closed,url,review_count,categories,rating,...,coordinates.latitude,coordinates.longitude,location.address1,location.address2,location.address3,location.city,location.zip_code,location.country,location.state,location.display_address
0,yeesT9W64tpVq5Gcae18Sg,9eu2lQMROTKdrPyGkwZLYw,soco-kitchen-bar-toronto,SOCO Kitchen + Bar,https://s3-media3.fl.yelpcdn.com/bphoto/85--in...,False,https://www.yelp.com/biz/soco-kitchen-bar-toro...,236,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",3.5,...,43.642653,-79.383847,75 Lower Simcoe Street,,,Toronto,M5J 3A6,CA,ON,"[75 Lower Simcoe Street, Toronto, ON M5J 3A6, ..."
1,yeesT9W64tpVq5Gcae18Sg,x0MaTw6He1w_1nddeSJU2Q,the-roof-at-soco-toronto,The Roof at SOCO,https://s3-media2.fl.yelpcdn.com/bphoto/gF-WNQ...,False,https://www.yelp.com/biz/the-roof-at-soco-toro...,17,"[{'alias': 'newcanadian', 'title': 'Canadian (...",4.0,...,43.64303,-79.38379,75 Lower Simcoe Street,Fourth Floor,,Toronto,M5J 3A6,CA,ON,"[75 Lower Simcoe Street, Fourth Floor, Toronto..."
2,yeesT9W64tpVq5Gcae18Sg,D1lAVtlav4atQTJnIvtcpw,taverna-mercatto-toronto,Taverna Mercatto,https://s3-media2.fl.yelpcdn.com/bphoto/I8Guuj...,False,https://www.yelp.com/biz/taverna-mercatto-toro...,229,"[{'alias': 'italian', 'title': 'Italian'}]",3.5,...,43.642672,-79.38276,120 Bremner Boulevard,,,Toronto,M5J 3A6,CA,ON,"[120 Bremner Boulevard, Toronto, ON M5J 3A6, C..."
3,yeesT9W64tpVq5Gcae18Sg,9-Lipa9HiHL_EAfeDfPq1g,busters-sea-cove-toronto-3,Buster's Sea Cove,https://s3-media3.fl.yelpcdn.com/bphoto/4jrvkf...,False,https://www.yelp.com/biz/busters-sea-cove-toro...,33,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",3.5,...,43.642566,-79.383051,120 Bremner Boulevard,,,Toronto,M5J 0A1,CA,ON,"[120 Bremner Boulevard, Toronto, ON M5J 0A1, C..."
4,yeesT9W64tpVq5Gcae18Sg,qAbBXmszDBJ4iZ8jdqeamQ,aroma-espresso-bar-toronto-47,Aroma Espresso Bar,https://s3-media2.fl.yelpcdn.com/bphoto/ZQEt3h...,False,https://www.yelp.com/biz/aroma-espresso-bar-to...,39,"[{'alias': 'cafes', 'title': 'Cafes'}]",3.5,...,43.642386,-79.383637,137 Bremner Boulevard,,,Toronto,M5J 3A7,CA,ON,"[137 Bremner Boulevard, Toronto, ON M5J 3A7, C..."


In [91]:
# Viewing columns in df_POI

df_POI.columns

Index(['hotel_id', 'id', 'alias', 'name', 'image_url', 'is_closed', 'url',
       'review_count', 'categories', 'rating', 'transactions', 'price',
       'phone', 'display_phone', 'distance', 'coordinates.latitude',
       'coordinates.longitude', 'location.address1', 'location.address2',
       'location.address3', 'location.city', 'location.zip_code',
       'location.country', 'location.state', 'location.display_address'],
      dtype='object')

In [92]:
# Deciding which columns to keep in df_POI

df_POI = df_POI[['hotel_id', 'id', 'name',
       'review_count', 'categories', 'rating', 'transactions', 'price',
       'distance', 'coordinates.latitude',
       'coordinates.longitude', 'location.address1', 
       'location.city', 'location.zip_code',
       'location.country', 'location.state', 'location.display_address']]

In [93]:
# Renaming columns in df_POI

df_POI.columns = ['hotel_id', 'poi_id', 'poi_name',
       'poi_review_count', 'poi_categories', 'poi_rating', 'poi_transactions', 'poi_price',
       'poi_distance', 'poi_latitude',
       'poi_longitude', 'poi_address1', 
       'poi_city', 'poi_zip_code',
       'poi_country', 'poi_state', 'poi_display_address']

In [102]:
# Creating new poi_price column that turns the number of dollar signs to integers

df_POI['poi_price_int'] = df_POI['poi_price'].str.len()

In [104]:
df_POI.head()

Unnamed: 0,hotel_id,poi_id,poi_name,poi_review_count,poi_categories,poi_rating,poi_transactions,poi_price,poi_distance,poi_latitude,poi_longitude,poi_address1,poi_city,poi_zip_code,poi_country,poi_state,poi_display_address,poi_price2,poi_price_int
0,yeesT9W64tpVq5Gcae18Sg,9eu2lQMROTKdrPyGkwZLYw,SOCO Kitchen + Bar,236,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",3.5,[],$$,46.260363,43.642653,-79.383847,75 Lower Simcoe Street,Toronto,M5J 3A6,CA,ON,"[75 Lower Simcoe Street, Toronto, ON M5J 3A6, ...",2.0,2.0
1,yeesT9W64tpVq5Gcae18Sg,x0MaTw6He1w_1nddeSJU2Q,The Roof at SOCO,17,"[{'alias': 'newcanadian', 'title': 'Canadian (...",4.0,[],,11.736958,43.64303,-79.38379,75 Lower Simcoe Street,Toronto,M5J 3A6,CA,ON,"[75 Lower Simcoe Street, Fourth Floor, Toronto...",,
2,yeesT9W64tpVq5Gcae18Sg,D1lAVtlav4atQTJnIvtcpw,Taverna Mercatto,229,"[{'alias': 'italian', 'title': 'Italian'}]",3.5,[],$$,99.247399,43.642672,-79.38276,120 Bremner Boulevard,Toronto,M5J 3A6,CA,ON,"[120 Bremner Boulevard, Toronto, ON M5J 3A6, C...",2.0,2.0
3,yeesT9W64tpVq5Gcae18Sg,9-Lipa9HiHL_EAfeDfPq1g,Buster's Sea Cove,33,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",3.5,[],$$,86.123705,43.642566,-79.383051,120 Bremner Boulevard,Toronto,M5J 0A1,CA,ON,"[120 Bremner Boulevard, Toronto, ON M5J 0A1, C...",2.0,2.0
4,yeesT9W64tpVq5Gcae18Sg,qAbBXmszDBJ4iZ8jdqeamQ,Aroma Espresso Bar,39,"[{'alias': 'cafes', 'title': 'Cafes'}]",3.5,[],$$,78.124941,43.642386,-79.383637,137 Bremner Boulevard,Toronto,M5J 3A7,CA,ON,"[137 Bremner Boulevard, Toronto, ON M5J 3A7, C...",2.0,2.0


In [125]:
# Creating a pivot table that groups df_POI by hotel_id and aggregates the means for price, rating, and review counts for the POI's nearby each hotel.  It also aggregates the count of POI's near each hotel.

pivot_poi = df_POI.pivot_table(index='hotel_id',values=['poi_review_count','poi_rating','poi_price2'],
                               aggfunc=['mean','count']).reset_index()

pivot_poi.columns = ['hotel_id','mean_poi_price2','mean_poi_rating','mean_poi_review_count','count_poi_price2','count_poi','count_poi_review_count']
pivot_poi = pivot_poi[['hotel_id','mean_poi_price2','mean_poi_rating','mean_poi_review_count','count_poi']]


pivot_poi.head()		

Unnamed: 0,hotel_id,mean_poi_price2,mean_poi_rating,mean_poi_review_count,count_poi
0,yeesT9W64tpVq5Gcae18Sg,1.8,2.766667,58.133333,15


In [128]:
# Merging df_hotles with pivot_poi

df_merge = df_hotels.merge(pivot_poi,on='hotel_id', how='left')
df_merge.head()

Unnamed: 0,hotel_id,hotel_name,hotel_review_count,hotel_categories,hotel_rating,hotel_price,hotel_distance,hotel_latitude,hotel_longitude,hotel_address1,...,hotel_address3,hotel_city,hotel_zip_code,hotel_country,hotel_state,hotel_display_address,mean_poi_price2,mean_poi_rating,mean_poi_review_count,count_poi
0,yeesT9W64tpVq5Gcae18Sg,Delta Hotels by Marriott Toronto,161,hotels,4.0,$$,3623.82022,43.643069,-79.383865,75 Lower Simcoe Street,...,,Toronto,M5J 3A6,CA,ON,"['75 Lower Simcoe Street', 'Toronto, ON M5J 3A...",1.8,2.766667,58.133333,15.0
1,kKnGY0Xj7dG4ubjrbygynw,Residence Inn by Marriott Toronto Downtown,95,hotels,4.0,$$,3320.732218,43.64479,-79.39053,255 Wellington Street W,...,,Toronto,M5V 3P9,CA,ON,"['255 Wellington Street W', 'Toronto, ON M5V 3...",,,,
2,XHrPQKUTJXJGPH9etKUYUw,Courtyard by Marriott Toronto Downtown,152,hotels,3.0,$$,1718.07143,43.66241,-79.383149,475 Yonge Street,...,,Toronto,M4Y 1X7,CA,ON,"['475 Yonge Street', 'Toronto, ON M4Y 1X7', 'C...",,,,
3,zYRBRfYuq_6x-wNKa8NqrA,Le Germain Hotel,55,hotels,4.5,$$$,3202.097393,43.645751,-79.391025,30 Mercer Street,...,,Toronto,M5V 1H3,CA,ON,"['30 Mercer Street', 'Toronto, ON M5V 1H3', 'C...",,,,
4,PCkJAv7WQJ08C1RA6ilqWA,1 Hotel Toronto,5,hotels,5.0,,3517.449544,43.642826,-79.401439,550 Wellington Street W,...,,Toronto,M5V 2V4,CA,ON,"['550 Wellington Street W', 'Toronto, ON M5V 2...",,,,


In [129]:
# Writing df_merge to a .csv file

df_merge.to_csv('hotel_and_restaurants.csv', index=False)