In [9]:
import os
import json
import requests
import pandas as pd
import time


In [None]:
google_place = os.environ["GOOGLE_API"]

In [11]:
# SkyTrain station information is retrieved from the csv file
SkyTrainDF = pd.read_csv('../API_Data/skytrain_stations.csv')

In [25]:
SkyTrainDF

Unnamed: 0,Name,Rating,Number of Ratings,Place ID,Latitude,Longitude
0,Broadway-City Hall Station,4.1,90,ChIJQx4neN1zhlQRdHFbwLLpxeM,49.262997,-123.114541
1,Vancouver City Centre,4.3,146,ChIJ04sXE39xhlQR1Woi1gmeQXY,49.282469,-123.118613
2,King Edward Station @ Platform 1,4.0,36,ChIJifoZXPJzhlQRtHmyjDhlNAc,49.249187,-123.115857
3,Marine Drive Station,4.2,91,ChIJ9_pZvfZ0hlQRtF0YmDWM4lI,49.20967,-123.11702
4,Granville,4.0,89,ChIJFxdoy3hxhlQRXYS-3DIa4Sk,49.283292,-123.116134
5,Olympic Village Station,4.1,54,ChIJlfAnGNxzhlQR-C1WKlNCF0g,49.26654,-123.11562
6,Burrard,4.2,181,ChIJWWb7iIFxhlQRBL4QZPdZs4I,49.285553,-123.120204
7,Sea Island Centre,3.5,13,ChIJfdGuxM10hlQRc5O28XA1Muk,49.193046,-123.15789
8,Stadium-Chinatown,3.6,158,ChIJOd5wkHtxhlQRZ-hsRW79874,49.279642,-123.109815
9,Templeton,4.1,38,ChIJJ7z-3MR0hlQRdu2zx6WWbTc,49.196481,-123.146402


## Google Places API Request
Due to limitations placed by Google, 3 pages of 20 results each will be retrieved for each SkyTrain station. The total number of restaurants retrieved from Google Places ends up at 709.

Some information that was collected from Yelp could not be collected from Google Places in the same way. URL, Category, and distance from the station will remain absent from the data collected via Google Places.

In [51]:
# Define API key and endpoint
url = 'https://maps.googleapis.com/maps/api/place/textsearch/json'

# Initialize an empty list to store venue details
venue_details = []

# Iterate over each station to retrieve coordinates and name
for station_index, station in SkyTrainDF.iterrows():
    station_latitude = str(
        station['Latitude']
    )
    station_longitude = str(
        station['Longitude']
    )
    station_name = str(
        station['Name']
    )
    # Define the parameters for request
    params = {
        'query': f'Restaurants near SkyTrain {station_name} Vancouver',
        'type': 'restaurant',
        'radius': 500,  # Radius in meters
        'location': { 'lat': station_latitude, 'lng': station_longitude},
        'key': google_place
    }

    # Send the initial request
    response = requests.get(url, params=params)

    # Parse the response as JSON
    data = response.json()


    # Handle the response data
    if response.status_code == 200:
        results = data['results']

        for result in results:
            name = result['name']
            address = result['formatted_address']
            rating = result.get('rating', 'N/A')
            review_count = result['user_ratings_total']
            place_id = result['place_id']
            location = result['geometry']['location']
            latitude = location['lat']
            longitude = location['lng']
            skytrain_station = station_name

            venue_details.append({
                'Venue Name': name,
                'Address': address,
                'Average Rating': rating,
                'Review Count': review_count,
                'SkyTrain Station': station_name,
                'Latitude': latitude,
                'Longitude': longitude,
                'ID': place_id,
                'Address': address,
                'Source': 'Google'
            })

        # Check if there are more results and retrieve them using pagination
        while 'next_page_token' in data:
            next_page_token = data['next_page_token']
            params['pagetoken'] = next_page_token

            # allow the next page token to become valid
            time.sleep(3)

            # Send the next page request
            response = requests.get(url, params=params)

            # Parse the response as JSON
            data = response.json()

            if response.status_code == 200:
                results = data['results']

                for result in results:
                    name = result['name']
                    address = result['formatted_address']
                    rating = result.get('rating', 'N/A')
                    review_count = result['user_ratings_total']
                    place_id = result['place_id']
                    location = result['geometry']['location']
                    latitude = location['lat']
                    longitude = location['lng']
                    skytrain_station = station_name
                        
                    venue_details.append({
                        'Venue Name': name,
                        'Address': address,
                        'Average Rating': rating,
                        'Review Count': review_count,
                        'SkyTrain Station': station_name,
                        'Latitude': latitude,
                        'Longitude': longitude,
                        'ID': place_id,
                        'Address': address,
                        'Source': 'Google'
                    })
            else:
                print('Request failed with status code:', response.status_code)
    else:
        print('Request failed with status code:', response.status_code)

# Create a DataFrame from the station details list
google_venues = pd.DataFrame(venue_details)

# the DataFrame
#google_rest

## Results
Due to the way that Google Places prioritizes results, a high average rating seems to seriously skew what is returned from the API request. This results in restaurants that are well regarded showing up several times in the data, limitting the overall depth. 

No option to filter the GET request to disregard rating makes this an issue that cannot be reasonably resolved. 

In [61]:
google_venues.to_csv('../API_Data/google_venues.csv', index=False)

In [62]:
google_venues.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1907 entries, 0 to 1906
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Venue Name        1907 non-null   object 
 1   Address           1907 non-null   object 
 2   Average Rating    1907 non-null   float64
 3   Review Count      1907 non-null   int64  
 4   SkyTrain Station  1907 non-null   object 
 5   Latitude          1907 non-null   float64
 6   Longitude         1907 non-null   float64
 7   ID                1907 non-null   object 
 8   Source            1907 non-null   object 
dtypes: float64(3), int64(1), object(5)
memory usage: 134.2+ KB


In [64]:
uniques = google_venues['Venue Name'].unique()
uniques

array(['Rogue Kitchen & Wetbar', 'Kamei on Broadway', 'Marulilu Café',
       'Chambar Restaurant', 'Saku Broadway', "Salmon n' Bannock",
       'Nightingale', 'Cactus Club Cafe Broadway + Ash',
       'Suika Japanese Restaurant', 'A.BENTO',
       'Shizenya Restaurant on Broadway', 'Dynasty Seafood Restaurant',
       'Stages Restaurant & Lounge', 'A&W Canada', 'Peaceful Restaurant',
       'Chipotle Mexican Grill', 'Kim Son Vietnamese Food', 'Yolks',
       'Thai Away Home at City Square', 'Glowbal', 'Pho Extreme Xe Lua',
       "McDonald's", 'White Spot Dunsmuir', 'Shanghai Lu Restaurant',
       'Sauzzy Thai', 'Cibo Trattoria', 'Basil Box', 'Sushi Dragon',
       'Tractor Everyday Healthy Foods - Ash & Broadway',
       'Heritage Asian Eatery', 'Eh! Restaurant',
       'Cilantro Indian Cuisine',
       'Johnny’s on Oak Restaurant Vancouver (Best Neighbours Restaurant)',
       'P2B Restaurant + Bar', 'Falafel King Mediterranean Food',
       'Diva at the Met', 'Italian Kitchen', 'P