In [1]:
# IMPORT PACKAGES

# Import general packages
import pandas as pd
from parsel import Selector
import time

# Import Selenium packages
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By

# Import Beautiful Soup packages
from bs4 import BeautifulSoup

In [2]:
# IMPORT VENUES IN AMS

# Import excel file with venues in Amsterdam (from Jeroen)
venues_ams = pd.read_excel (r'/Users/LizzyDRB/Documents/MScDS/GoogleReviews/amsterdam_venues_place_id.xlsx')

# Preview of data
venues_ams

Unnamed: 0,name,street,housenumber,city,postcode,lat,lon,RD_x,RD_y,tile_code,place_id
0,De Kletskop,Zeedijk,10,Amsterdam,1012AX,52.375980,4.900340,121846.813111,487678.771043,2436_9753,ChIJ1zQxxrkJxkcRG90MVXyWSIc
1,Haven van Texel,,,,,52.375789,4.900279,121842.516201,487657.492050,2436_9753,ChIJFS4HyLkJxkcRoMP5yz4NcJ4
2,Kam Yin,Warmoesstraat,6,Amsterdam,,52.376236,4.899454,121786.693292,487707.593209,2435_9754,ChIJC6hwM7gJxkcRj8RMeZoWSTA
3,De Ooievaar,Sint Olofspoort,1,Amsterdam,1012AJ,52.376284,4.899940,121819.816378,487712.766897,2436_9754,ChIJWwxw8NEJxkcREUXIGY2SHVg
4,Bitterzoet,Spuistraat,2,Amsterdam,1012TS,52.377357,4.894142,121425.813798,487834.767054,2428_9756,ChIJsY8odsgJxkcRX1QkUoMSOKk
...,...,...,...,...,...,...,...,...,...,...,...
2968,,,,,,52.410843,4.921504,123312.910259,491548.242693,2466_9830,
2969,,,,,,52.412294,4.922164,123358.846813,491709.431973,2467_9834,
2970,,,,,,52.371458,4.930970,123929.340496,487162.055915,2478_9743,
2971,Czaar,,,,,52.371840,4.931803,123986.342783,487204.201669,2479_9744,ChIJk67ICQwJxkcR4vod07nA2qA


In [3]:
# DROP NAN

# Change place_id's into strings
for row in range(venues_ams.shape[0]):
    venues_ams['place_id'][row] = str(venues_ams['place_id'][row])
    
# Drop rows where place_id is nan
for row in range(venues_ams.shape[0]):
    if venues_ams['place_id'][row] == 'nan':
        venues_ams = venues_ams.drop(row)
        
# Reset the index to range from 0, 1965        
venues_ams = venues_ams.reset_index()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  venues_ams['place_id'][row] = str(venues_ams['place_id'][row])


In [4]:
# FUNCTIONS

# Function to get review overview from scraper
# Returns dataframe with Name, Rate, Time, Text
def review_overview(result_set, name):
    rev_dict = {'Name': name,
                'Review Rate': [],
                'Review Time': [],
                'Review Text' : []}
    for result in result_set:
        review_rate = result.find('span', class_='ODSEW-ShBeI-H1e3jb')["aria-label"]
        review_time = result.find('span',class_='ODSEW-ShBeI-RgZmSc-date').text
        review_text = result.find('span',class_='ODSEW-ShBeI-text').text
        rev_dict['Review Rate'].append(review_rate)
        rev_dict['Review Time'].append(review_time)
        rev_dict['Review Text'].append(review_text)   
    return(pd.DataFrame(rev_dict))

In [5]:
# Count rows = 1965
print('Shape of venues_ams is', venues_ams.shape)

venues_ams

Shape of venues_ams is (1965, 12)


Unnamed: 0,index,name,street,housenumber,city,postcode,lat,lon,RD_x,RD_y,tile_code,place_id
0,0,De Kletskop,Zeedijk,10,Amsterdam,1012AX,52.375980,4.900340,121846.813111,487678.771043,2436_9753,ChIJ1zQxxrkJxkcRG90MVXyWSIc
1,1,Haven van Texel,,,,,52.375789,4.900279,121842.516201,487657.492050,2436_9753,ChIJFS4HyLkJxkcRoMP5yz4NcJ4
2,2,Kam Yin,Warmoesstraat,6,Amsterdam,,52.376236,4.899454,121786.693292,487707.593209,2435_9754,ChIJC6hwM7gJxkcRj8RMeZoWSTA
3,3,De Ooievaar,Sint Olofspoort,1,Amsterdam,1012AJ,52.376284,4.899940,121819.816378,487712.766897,2436_9754,ChIJWwxw8NEJxkcREUXIGY2SHVg
4,4,Bitterzoet,Spuistraat,2,Amsterdam,1012TS,52.377357,4.894142,121425.813798,487834.767054,2428_9756,ChIJsY8odsgJxkcRX1QkUoMSOKk
...,...,...,...,...,...,...,...,...,...,...,...,...
1960,2961,Pension Homeland,,,,,52.372880,4.916515,122945.963716,487326.545156,2458_9746,ChIJtdZbkaYJxkcRc9Qcc-bXUGc
1961,2962,Frank's Smoke House,Oostenburgervoorstraat,1,,,52.368902,4.921978,123315.154310,486881.482173,2466_9737,ChIJvy3dlqAJxkcRbg-tib1GMBU
1962,2963,Caf√© Daan & Daan,Kattenburgerplein,39,Amsterdam,1018KK,52.370886,4.916357,122933.817222,487104.766978,2458_9742,ChIJq5WAS6EJxkcReT5wzYdIZm4
1963,2971,Czaar,,,,,52.371840,4.931803,123986.342783,487204.201669,2479_9744,ChIJk67ICQwJxkcR4vod07nA2qA


In [6]:
# INITIATE ALL VARIABLES

# Create df containing all Google Maps links
cols = ['google_link']
url_list = []
 
for row in range(venues_ams.shape[0]):
    p = venues_ams['place_id'][row]
    url = 'https://www.google.com/maps/place/?q=place_id:'+p
    url_list.append([url])
 
google_links = pd.DataFrame(url_list, columns=cols)

# Append df to venues_ams
venues_ams = venues_ams.join(google_links)

In [7]:
# NEW VARIABLES FOR SAVING REVIEWS
# Create new dataframe that will contain info of the venuews
cols2 = ['name', 'place_id', 'url']
total_reviews = pd.DataFrame(columns=cols) # final df for all reviews
running_reviews2 = pd.DataFrame(columns=cols2) # for testing

In [8]:
for index, cols in venues_ams.iterrows():
    print(cols[1], cols[12])

De Kletskop https://www.google.com/maps/place/?q=place_id:ChIJ1zQxxrkJxkcRG90MVXyWSIc
Haven van Texel https://www.google.com/maps/place/?q=place_id:ChIJFS4HyLkJxkcRoMP5yz4NcJ4
Kam Yin https://www.google.com/maps/place/?q=place_id:ChIJC6hwM7gJxkcRj8RMeZoWSTA
De Ooievaar https://www.google.com/maps/place/?q=place_id:ChIJWwxw8NEJxkcREUXIGY2SHVg
Bitterzoet https://www.google.com/maps/place/?q=place_id:ChIJsY8odsgJxkcRX1QkUoMSOKk
New Dutch https://www.google.com/maps/place/?q=place_id:ChIJs4EH2rwJxkcRIUllSD3jWA8
Jennifer https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
Sluyswacht https://www.google.com/maps/place/?q=place_id:ChIJzccHg74JxkcRufHICkn86wo
Cafe de Zon https://www.google.com/maps/place/?q=place_id:ChIJdZ5ys88JxkcRJl584hErf4Y
Nossa Senhora https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
Maria https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
Oost-West https://www.google.com/maps/place/?q=place_id:ChIJE9k0Z

Grey Area Coffeeshop https://www.google.com/maps/place/?q=place_id:ChIJcwAOLMQJxkcRMhBjTnvRGy4
Grill Burger https://www.google.com/maps/place/?q=place_id:ChIJVzdjrMcJxkcRKHJVwuANd1I
Frens Haringhandel https://www.google.com/maps/place/?q=place_id:ChIJIQ-8oOoJxkcRjgLvr60bjnA
Cafe Het Paleis https://www.google.com/maps/place/?q=place_id:ChIJ1aDIqsYJxkcRwW0zWDQi5O0
Tisfris https://www.google.com/maps/place/?q=place_id:ChIJ6yq_j74JxkcRaHZl1QpM4cI
Eetcafe van Beeren https://www.google.com/maps/place/?q=place_id:ChIJfa7F_bsJxkcRkTrq3fNR68Q
Me Naam Naan https://www.google.com/maps/place/?q=place_id:ChIJ59iXV7kJxkcRo4v4NDsj2GQ
Amstelhaven https://www.google.com/maps/place/?q=place_id:ChIJGYfrDZoJxkcR7b7PdqcF6FQ
Starbucks https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
Mr. Crab Seafood Bistro https://www.google.com/maps/place/?q=place_id:ChIJ3cqn8MYJxkcRGjY2YYWXN4Q
Gollem https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
Lellebel https://www.goo

Barra https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
Coffeeshop Club Media https://www.google.com/maps/place/?q=place_id:ChIJMQaer_IJxkcRjDvF1a_sOE4
Rotisserie Amsterdam https://www.google.com/maps/place/?q=place_id:ChIJvww2eHXixUcRrz7dypxonLE
Het IJsboefje https://www.google.com/maps/place/?q=place_id:ChIJu4MOdJ0JxkcR9Oiba_J1IWw
Coffeeshop Ibiza https://www.google.com/maps/place/?q=place_id:ChIJ-bLLsJEJxkcRS6TH8T9IhQQ
Marmaris https://www.google.com/maps/place/?q=place_id:ChIJIxWW8b-9vxQRvrLu0Q5zjt8
Restaurant Stedelijk https://www.google.com/maps/place/?q=place_id:ChIJ86pqYOUJxkcR_AFqfNWpVfY
De kleine Valk https://www.google.com/maps/place/?q=place_id:ChIJxfr9Ru0JxkcR7OjcrL8q49M
Oud-Zuid https://www.google.com/maps/place/?q=place_id:ChIJrfbTX_cJxkcRlFVF-X9Nz5k
Caf√© Flinck https://www.google.com/maps/place/?q=place_id:ChIJzQdlRo0JxkcRblJR5Vhquqg
Blauw https://www.google.com/maps/place/?q=place_id:ChIJLUFCYATixUcR6YBOTuasB5Y
Chocolate Bar https://www.google.

Vinnies https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
Lucca Due https://www.google.com/maps/place/?q=place_id:ChIJReMZzM4JxkcRSisrzhJsWY0
nan https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
YamYam https://www.google.com/maps/place/?q=place_id:ChIJrRVrrtkJxkcRFJ8Dx9HggkQ
Mitsos https://www.google.com/maps/place/?q=place_id:ChIJEQGgF9kJxkcR9r1pIdgMc8A
Bagels & Beans Amsterdam, Willem de Zwijgerlaan https://www.google.com/maps/place/?q=place_id:ChIJVzbyMXHixUcR8Iu0SVrV-jE
Jun https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
Graceland Bar-B-Q https://www.google.com/maps/place/?q=place_id:ChIJEauWrXfixUcRsaJ-ARKu5bk
Cuddle https://www.google.com/maps/place/?q=place_id:ChIJh5ZQo8cJxkcRF3QZkk5-L7I
La Bettola https://www.google.com/maps/place/?q=place_id:ChIJ11HCSsYJxkcRJ6CixcnJtnY
De Spaanse Ruiter https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
Angus Steak House https://www.google.com/map

Tempo Doeloe https://www.google.com/maps/place/?q=place_id:ChIJQcaVSJQJxkcRTGExotj3gA4
Vishuisje Herengracht https://www.google.com/maps/place/?q=place_id:ChIJvRORo5UJxkcRJ8TSYxnJPzg
Vapiano https://www.google.com/maps/place/?q=place_id:ChIJl2HJg5UJxkcR3t2tEWSKQOw
Boerejongens https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
La Casona https://www.google.com/maps/place/?q=place_id:ChIJ-3LFrZUJxkcRlmFtjg721CA
Litedark https://www.google.com/maps/place/?q=place_id:ChIJNYhvppUJxkcRL2kmL89xoh4
XXX by Charles & Charley https://www.google.com/maps/place/?q=place_id:ChIJVXealLU_xkcRja_At0z9AGY
Alfonso https://www.google.com/maps/place/?q=place_id:ChIJF83lrpUJxkcRRltWeMDc91s
De Bajes https://www.google.com/maps/place/?q=place_id:ChIJ9wQloZUJxkcRiWx8G53vXjA
Ali https://www.google.com/maps/place/?q=place_id:ChIJfTkup5UJxkcRufG0kkZZBlw
Het Karbeel https://www.google.com/maps/place/?q=place_id:ChIJkwcxQ7gJxkcRTMOG1SIYNBQ
China Si Chuan Restaurant https://www.google.com/maps

In [9]:
# START THE CHROME DRIVER

# Open Chromedriver
chromedrive_path = '/Users/LizzyDRB/Documents/MScDS/GoogleReviews/chromedriver' # use the path to the driver you downloaded from previous steps
driver = webdriver.Chrome(chromedrive_path)

# Open Google Maps
url='https://www.google.com/maps/place/'
driver.get(url)

# Accept cookies
wait = WebDriverWait(driver, 10)

button_cookies = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="yDmH0d"]/c-wiz/div/div/div/div[2]/div[1]/div[4]/form/div/div/button/span')))
driver.execute_script("arguments[0].click()", button_cookies)

  driver = webdriver.Chrome(chromedrive_path)


In [None]:
# Loop over all Google Maps venue links
for rows, cols in venues_ams.iterrows():
    if cols[1] == "Kam Yin":
        continue
    
    url=cols[12]
    driver.get(url)
    
    # Find name of venue
    driver.implicitly_wait(5)
    find_name = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[1]/div[1]/h1/span[1]').text

    if find_name == "Amsterdam":
        continue
    
    # Go to All Reviews page
    wait = WebDriverWait(driver, 20)

    button_reviews = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[1]/div[2]/div/div[1]/span[1]/span/span[1]/span[2]/span[1]/button')))
    driver.execute_script("arguments[0].click()", button_reviews)

    # Parse Reviews
    page_content = driver.page_source
    
    # Parse content from HTML page
    response = Selector(page_content)

    # Iterate over reviews
    results = []

    for el in response.xpath('//div/div[@data-review-id]/div[contains(@class, "content")]'):
        results.append({
            #'title': el.xpath('.//div[contains(@class, "title")]/span/text()').extract_first(''),
            'rating': el.xpath('.//span[contains(@aria-label, "stars")]/@aria-label').extract_first('').replace('stars' ,'').strip(),
            'body': el.xpath('.//span[contains(@class, "text")]/text()').extract_first(''),
        })

    #Find the total number of reviews
    driver.implicitly_wait(5)
    total_number_of_reviews = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]/div[2]/div/div[2]/div[2]').text.split(" ")[0]


    if '.' in total_number_of_reviews:
        total_number_of_reviews = int(total_number_of_reviews.replace('.',''))
    else:
        int(total_number_of_reviews)

    #Find scroll layout
    scrollable_div = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]')

    #Scroll as many times as necessary to load all reviews
    for i in range(0,(round(int(total_number_of_reviews)/10 - 1))):
        driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrollable_div)
        time.sleep(10)

    # Parse Reviews with Beautiful Soup
    response = BeautifulSoup(driver.page_source, 'html.parser')
    reviews = response.find_all('div', class_='ODSEW-ShBeI NIyLF-haAclf gm2-body-2')

    
    # Put reviews in dataframe
    df_reviews = review_overview(reviews, cols[1])

    running_reviews2 = running_reviews2.append(df_reviews)

  find_name = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[1]/div[1]/h1/span[1]').text
  total_number_of_reviews = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]/div[2]/div/div[2]/div[2]').text.split(" ")[0]
  scrollable_div = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]')


In [144]:
running_reviews

Unnamed: 0,google_link,Name,Review Rate,Review Time,Review Text
0,,De Kletskop,5 stars,4 months ago,Great 'bruin café' in Amsterdam that I discove...
1,,De Kletskop,5 stars,2 years ago,"Súper cool people, the bartender was super nic..."
2,,De Kletskop,5 stars,a month ago,Friendly staff.
3,,De Kletskop,5 stars,2 years ago,Had a great evening in this cafe in the centre...
4,,De Kletskop,5 stars,6 years ago,Cosy local pub near the central station and Am...
...,...,...,...,...,...
15,,Little Thai Prince,5 stars,5 months ago,Amazing food and service ! Really nice people ...
16,,Little Thai Prince,5 stars,8 months ago,"Excellent service, very customer friendly.\n\n..."
17,,Little Thai Prince,3 stars,a month ago,Decent Thai food.
18,,Little Thai Prince,4 stars,9 months ago,"Very good Thai food, lovely people!"
