# Setting up the Chromedriver

In [None]:
!pip install scrapy
!pip install selenium
!pip install selenium-stealth
!apt-get update
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin

import sys
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')

# Python Selenium Script

In [19]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from scrapy import Selector
from selenium_stealth import stealth
import os
import csv
import time


FIELD_NAMES = ['userName', 'address', 'rating', 'dateOfReview', 'reviewDesc']

def writeCSV(data, fieldName, FILE_NAME):
    fileExists = os.path.isfile(FILE_NAME)
    with open(FILE_NAME, 'a', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldName, lineterminator='\n')
        if not fileExists:
            writer.writeheader()
        writer.writerow(data)

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome('chromedriver',chrome_options=chrome_options)

stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )

driver.get(input("Enter the URL: "))

try:
  WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, "//section[@aria-label='Recommended Reviews']//ul[contains(@class, 'undefined list')]/li")))
except:
  pass

FILE_NAME = f'''{driver.find_element_by_xpath("//h1").get_attribute('innerHTML').replace(" ","_").strip().lower()}.csv'''

while True:
  html = driver.page_source
  respObj = Selector(text=html)

  reviews = respObj.xpath("//section[@aria-label='Recommended Reviews']//ul[contains(@class, 'undefined list')]/li")
  for review in reviews:
    userName = review.xpath("normalize-space(.//a[contains(@href, 'user_details')]/text())").get()
    address = review.xpath("normalize-space(.//a[contains(@href, 'user_details')]/parent::span/following-sibling::div/span/text())").get()
    rating = review.xpath(".//div[contains(@aria-label, 'star rating')]/@aria-label").get() # Replace "star rating"
    dateOfReview = review.xpath("normalize-space(.//div[contains(@aria-label, 'star rating')]/parent::span/parent::div/following-sibling::div/span/text())").get()
    reviewDesc = review.xpath(".//span[contains(@class, ' raw')]/text()").getall()

    data = {
        FIELD_NAMES[0]: userName,
        FIELD_NAMES[1]: address,
        FIELD_NAMES[2]: rating.replace("star rating","").strip(),
        FIELD_NAMES[3]: dateOfReview,
        FIELD_NAMES[4]: " ".join(revDesc.strip() for revDesc in reviewDesc)
    }

    writeCSV(data, FIELD_NAMES, FILE_NAME)
    data.clear()

  nextPage = respObj.xpath("//a/span[contains(@class, 'chevron-right')]")
  if nextPage:
    print(nextPage)
    nextBtnElem = driver.find_element_by_xpath("//a/span[contains(@class, 'chevron-right')]")
    driver.execute_script("arguments[0].click()", nextBtnElem)
    time.sleep(2)
  else:
    break






Enter the URL: https://www.yelp.com/biz/knead-some-love-ny-weehawken-3
