In [3]:
import os
from dotenv import load_dotenv
load_dotenv()
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import googlemaps
import time

In [4]:
# https://googlemaps.github.io/google-maps-services-python/docs/index.html
gmaps = googlemaps.Client(key=GOOGLE_API_KEY)

In [5]:
def get_place_data(keyword, coords):
    data = []
    page_token = None
    results = {"next_page_token": True}
    i = 1
    while 'next_page_token' in results:
        print(f"Searching: {keyword} | page {i}")
        results = gmaps.places_nearby(location=coords, radius=6000, keyword=keyword, page_token=page_token)
        
        if 'next_page_token' in results:
            page_token = results['next_page_token']
        
        for result in results['results']:
            row = {
                'place_id': result['place_id'],
                'name': result['name'],
                'avg_rating': result['rating'] if 'rating' in result else np.nan,
                'lat': result['geometry']['location']['lat'],
                'lng': result['geometry']['location']['lng'],
                'types': ",".join(result['types']),
                'business_status': result['business_status'] if 'business_status' in result else np.nan,
                'user_ratings_total': result['user_ratings_total'] if 'user_ratings_total' in result else np.nan,
            }
            data.append(row)
        time.sleep(2)
        i += 1
    return data

In [6]:
# Speicify the location and radius of the search

# https://developers.google.com/maps/documentation/places/web-service/search-nearby

victoria_bc_canada_coords = "48.407326,-123.329773"

# Pick types from the list below:

# https://developers.google.com/maps/documentation/places/web-service/supported_types

searches = ["parks", "best breakfast", "best brunch", "best lunch", "best dinner", "tourist attractions", "view point scenic", "historical sites", "beaches", "things to do", "clothing stores", "fashion shopping"]

In [7]:
all_data = []
for search in searches:
    all_data.extend(get_place_data(search, victoria_bc_canada_coords))

Searching: parks | page 1
Searching: parks | page 2
Searching: parks | page 3
Searching: best breakfast | page 1
Searching: best breakfast | page 2
Searching: best breakfast | page 3
Searching: best brunch | page 1
Searching: best brunch | page 2
Searching: best brunch | page 3
Searching: best lunch | page 1
Searching: best lunch | page 2
Searching: best lunch | page 3
Searching: best dinner | page 1
Searching: best dinner | page 2
Searching: best dinner | page 3
Searching: tourist attractions | page 1
Searching: tourist attractions | page 2
Searching: tourist attractions | page 3
Searching: view point scenic | page 1
Searching: view point scenic | page 2
Searching: historical sites | page 1
Searching: historical sites | page 2
Searching: beaches | page 1
Searching: beaches | page 2
Searching: things to do | page 1
Searching: things to do | page 2
Searching: clothing stores | page 1
Searching: clothing stores | page 2
Searching: clothing stores | page 3
Searching: fashion shopping | pa

In [8]:
# Add the data to the dataframe
df = pd.DataFrame(all_data)
print("Scraped: ", df.shape)

df = df.drop_duplicates(subset=['name'], keep='first')
print("Unique:", df.shape)

Scraped:  (611, 8)
Unique: (390, 8)


In [9]:
# Get the place details for all the locations
place_details = []
for index, row in df.iterrows():
    place_detail = gmaps.place(place_id=row['place_id'], fields=['editorial_summary', 'reviews', 'place_id', 'price_level'])
    place_details.append(place_detail)

In [10]:
# https://developers.google.com/maps/documentation/places/web-service/details

# for every place detail
for place_detail in place_details:
    place_id = place_detail['result']['place_id']

    # get the price level
    df.loc[df['place_id'] == place_id, 'price_level'] = place_detail['result']['price_level'] if 'price_level' in place_detail['result'] else np.nan

    # get the editorial summary
    df.loc[df['place_id'] == place_id, 'editorial_summary'] = place_detail['result']['editorial_summary']['overview'] if 'editorial_summary' in place_detail['result'] else np.nan

    # get the reviews
    if 'reviews' in place_detail['result']:
        reviews_blob = ''
        for review in place_detail['result']['reviews']:
            reviews_blob += review['text']
            reviews_blob += '\n\n\n'
        df.loc[df['place_id'] == place_id, 'reviews'] = reviews_blob


In [11]:
# print the shape of the dataframe
print(df.head())

                      place_id                             name  avg_rating  \
0  ChIJgXufFGp0j1QR73Bk2bSuNOk            Oak Bay Park Services         2.8   
1  ChIJueKTCmtzj1QRxJZTfhNWlsY  Esquimalt Gorge Park & Pavilion         4.7   
2  ChIJs2EiwWhzj1QRrbXfYuTM0xE                    Anderson Park         3.5   
3  ChIJ_duB57V2j1QR6B8HPH5RqZc          Phyllis Park View Point         4.6   
4  ChIJOSvdK45zj1QRjSxErs44NH4                Peacock Hill Park         4.8   

         lat         lng                                              types  \
0  48.429269 -123.319069  park,tourist_attraction,point_of_interest,esta...   
1  48.446345 -123.404949  park,tourist_attraction,point_of_interest,esta...   
2  48.438636 -123.395190               park,point_of_interest,establishment   
3  48.459801 -123.275502               park,point_of_interest,establishment   
4  48.450314 -123.353779               park,point_of_interest,establishment   

  business_status  user_ratings_total  price_level

In [12]:
# Save the dataframe to a csv file
df.to_csv("locations.csv", index=False)