In [55]:
import os
from dotenv import load_dotenv
load_dotenv()

GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')

In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import googlemaps
import time

In [57]:
gmaps = googlemaps.Client(key=GOOGLE_API_KEY)

In [25]:
def get_place_data(keyword, coords):
    data = []
    page_token = None
    results = {"next_page_token": True}
    i = 1
    while 'next_page_token' in results:
        print(f"Searching: {keyword} | page {i}")
        results = gmaps.places_nearby(location=coords, radius=6000, keyword=keyword, page_token=page_token)
        
        if 'next_page_token' in results:
            page_token = results['next_page_token']
        
        for result in results['results']:
            row = {
                'place_id': result['place_id'],
                'name': result['name'],
                
                'avg_rating': result['rating'] if 'rating' in result else np.nan,
                'user_ratings_total': result['user_ratings_total'] if 'user_ratings_total' in result else np.nan,
                
                'lat': result['geometry']['location']['lat'],
                'lng': result['geometry']['location']['lng'],
                
                'types': ",".join(result['types']),
                
                'business_status': result['business_status'] if 'business_status' in result else np.nan,
            }
            
            data.append(row)
        time.sleep(2)
        i += 1
    return data

In [21]:
# Speicify the location and radius of the search

# https://developers.google.com/maps/documentation/places/web-service/search-nearby

victoria_bc_canada_coords = "48.407326,-123.329773"

# Pick types from the list below:

# https://developers.google.com/maps/documentation/places/web-service/supported_types

searches = ["hotels", "lodging", "bed and breakfast", "Places to stay", "hostels", "motels", "resorts", "campgrounds", "rv parks"]


In [26]:
all_data = []
for search in searches:
    all_data.extend(get_place_data(search, victoria_bc_canada_coords))

Searching: hotels | page 1
Searching: hotels | page 2
Searching: hotels | page 3
Searching: lodging | page 1
Searching: lodging | page 2
Searching: lodging | page 3
Searching: bed and breakfast | page 1
Searching: bed and breakfast | page 2
Searching: bed and breakfast | page 3
Searching: Places to stay | page 1
Searching: Places to stay | page 2
Searching: Places to stay | page 3
Searching: hostels | page 1
Searching: motels | page 1
Searching: motels | page 2
Searching: motels | page 3
Searching: resorts | page 1
Searching: resorts | page 2
Searching: resorts | page 3
Searching: campgrounds | page 1
Searching: rv parks | page 1


In [33]:
# Add the data to the dataframe
df = pd.DataFrame(all_data)
print("Scraped: ", df.shape)

df = df.drop_duplicates(subset=['name'], keep='first')
print("Unique:", df.shape)

Scraped:  (343, 9)
Unique: (97, 9)


In [34]:
# Get the place details for all the locations
place_details = []
for index, row in df.iterrows():
    place_detail = gmaps.place(place_id=row['place_id'], fields=['editorial_summary', 'reviews', 'place_id', 'price_level'])
    place_details.append(place_detail)

In [35]:
# https://developers.google.com/maps/documentation/places/web-service/details

# for every place detail
for place_detail in place_details:
    place_id = place_detail['result']['place_id']

    # get the price level
    df.loc[df['place_id'] == place_id, 'price_level'] = place_detail['result']['price_level'] if 'price_level' in place_detail['result'] else np.nan

    # get the editorial summary
    df.loc[df['place_id'] == place_id, 'editorial_summary'] = place_detail['result']['editorial_summary']['overview'] if 'editorial_summary' in place_detail['result'] else np.nan

    # get the reviews
    if 'reviews' in place_detail['result']:
        reviews_blob = ''
        for review in place_detail['result']['reviews']:
            reviews_blob += review['text']
            reviews_blob += '\n\n\n'
        df.loc[df['place_id'] == place_id, 'reviews'] = reviews_blob


In [36]:
# print the shape of the dataframe
print(df.head())

                      place_id                              name  avg_rating  \
0  ChIJq9Z_wrBzj1QRTxNDocvoQk8  The Vic, Ascend Hotel Collection         4.4   
1  ChIJ4Rxmw5p0j1QRS4ikaNyNDds             Bedford Regency Hotel         3.6   
2  ChIJiYEIrJF0j1QRzZmiWZXTSRA   Victoria Marriott Inner Harbour         4.4   
3  ChIJ5f5ruXhzj1QRnCT8nCWObSI                         Metro Inn         3.2   
4  ChIJTXYT-Z5zj1QRJiu1ds-766c    Red Lion Inn & Suites Victoria         3.9   

   user_ratings_total        lat         lng  \
0                  78  48.442703 -123.386117   
1                 369  48.425546 -123.367807   
2                1282  48.421478 -123.364647   
3                 106  48.441108 -123.370038   
4                1182  48.451352 -123.375029   

                                     types business_status  price_level  \
0  lodging,point_of_interest,establishment     OPERATIONAL          NaN   
1  lodging,point_of_interest,establishment     OPERATIONAL          NaN   
2  lo

In [38]:
# print missing values as a percentage
print(df.isnull().sum()/df.shape[0])

place_id              0.000000
name                  0.000000
avg_rating            0.000000
user_ratings_total    0.000000
lat                   0.000000
lng                   0.000000
types                 0.000000
business_status       0.000000
price_level           0.989691
editorial_summary     0.298969
reviews               0.103093
dtype: float64


In [39]:
# Save the dataframe to a csv file
df.to_csv("hotels.csv", index=False)