In [1]:
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.remote.webelement import WebElement
import csv
import os
import pandas as pd
from enum import Enum

In [None]:
%pip install selenium

In [2]:
class BrowserOption(Enum):
    """Option for webbrowser
    """
    EDGE = 1
    CHROME = 2
    FIREFOX = 3
    SAFARI = 4



class FileHandler():
    @staticmethod
    def is_file_empty(file_name: str) -> bool:
        """Return True if file is empty

        Args: 
            - file_name: file's name that is needed to be check
        """
        return os.stat(file_name).st_size == 0


    @staticmethod
    def write_to_csv(reviews: list[WebElement], points: list[WebElement], file_name: str) -> None:
        header = ['Review', 'Point']
        file = open(file_name, 'a', encoding='UTF8', newline='')
        writer = csv.writer(file)

        if FileHandler.is_file_empty(file_name):
            writer.writerow(header)
        for review, point in zip(reviews, points):
            row = [review.text, point.text]
            writer.writerow(row)



class FoodyCrawler():
    @staticmethod
    def get_driver(browser_option: BrowserOption = BrowserOption.CHROME):
        """Return driver depended on BrowserOption Enum
        
        Args:
            - browser_option: the option of browser's driver
        """
        if browser_option == BrowserOption.EDGE:
            options = webdriver.EdgeOptions()
            options.add_argument("--blink-settings=imagesEnabled=false")
            options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
            return webdriver.ChromiumEdge(options=options)
        elif browser_option == BrowserOption.FIREFOX:
            return webdriver.Firefox()
        elif browser_option == BrowserOption.SAFARI:
            return webdriver.Safari()       
        else:
            options = webdriver.ChromeOptions()
            options.add_argument("--blink-settings=imagesEnabled=false")
            options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
            return webdriver.Chrome(options=options)  


    def __init__(self, browser_option: BrowserOption, url_file: str, file_name_to_save: str ) -> None:
        """Create a new instance of FoodyCrawler
        
        Args:
            - browser_option: the option of browser's driver
            - url_file: a file that stores a list of url linked to restaurants
            - file_name_to_save: file's name to save data crawled from links in url_file
        """
        self.driver = FoodyCrawler.get_driver(browser_option)
        self.url_file = url_file
        self.file_name_to_save = file_name_to_save


    def crawl(self, url: str) -> tuple[list[WebElement], list[WebElement]]:
        """Crawl data from single url
        
        Args:
            - url: a url linked to a restaurant
        """
        url = url+'/binh-luan'
        self.driver.get(url)
        self.driver.maximize_window()
        self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        
        while True:
            try:
                view_review_button = self.driver.find_element(By.PARTIAL_LINK_TEXT, "Xem thêm bình luận")
                self.driver.execute_script("arguments[0].click()", view_review_button)
                time.sleep(1)
                self.driver.execute_script("arguments[0].scrollIntoView();", view_review_button)
            except:
                break
    
        review_selector = "div.review-des > div.rd-des > span"
        point_selector = "div.review-user > div > div.review-points > span"

        reviews = self.driver.find_elements(By.CSS_SELECTOR, review_selector)
        points = self.driver.find_elements(By.CSS_SELECTOR, point_selector)
        return reviews, points
    

    def start_crawling(self) -> None:
        """Start crawling data from url_link"""
        with open(self.url_file, 'r') as file:
            url_list = file.readlines()
            for url in url_list:
                print(url)
                reviews, points = self.crawl(url)
                FileHandler.write_to_csv(reviews, points, self.file_name_to_save)
                self.driver.execute_script("window.open('');")
                self.driver.close()
                self.driver.switch_to.window(self.driver.window_handles[0])

        self.driver.quit()


In [None]:
foody = FoodyCrawler(browser_option=BrowserOption.EDGE, url_file='restaurants.txt', file_name_to_save='data_1.csv')
foody.start_crawling()