https://javvycoffee.com/products/javvy-coffee-concentrate

In [6]:
#Import libraries
import re
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.edge.options import Options
from selenium.webdriver.edge.service import Service
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException

def scrape_reviews():
    # Configure Edge (remove headless mode for debugging)
    edge_options = Options()
    edge_options.add_argument("--start-maximized") 

    service = Service(EdgeChromiumDriverManager().install())
    driver = webdriver.Edge(service=service, options=edge_options)

    try:
        #Open the product page
        url = "https://javvycoffee.com/products/coffee-concentrate"
        driver.get(url)

        #Wait for the reviews container to load 
        wait = WebDriverWait(driver, 30)
        wait.until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR, ".oke-w-reviews-main.oke-w-reviews-main--nav-divider")
            )
        )

        # Click "Show More" button up on the review page
        click_count = 0
        max_clicks = 970   

        while click_count < max_clicks:
            try:
                # Wait until the button is visible and clickable
                show_more_button = wait.until(
                    EC.element_to_be_clickable((By.CLASS_NAME, "oke-showMore-button"))
                )

                # Ensure the button is in view before clicking
                driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", show_more_button)
                time.sleep(1)

                # Click the button using JavaScript
                print(f"Clicking 'Show More' button...")
                driver.execute_script("arguments[0].click();", show_more_button)

                # Wait for new reviews to load
                time.sleep(3)
                click_count += 1

            except (NoSuchElementException, TimeoutException):
                print("No more 'Show More' button found. All reviews loaded.")
                break

        # Extract all reviews
        review_containers = driver.find_elements(
            By.CSS_SELECTOR, ".oke-w-reviews-main.oke-w-reviews-main--nav-divider ul li"
        )

        data = []

        for container in review_containers:
            # Extract reviewer name
            try:
                reviewer_name = container.find_element(By.CSS_SELECTOR, ".oke-w-reviewer-name").text.strip()
            except:
                reviewer_name = ""

            # Extract review title
            try:
                review_title = container.find_element(By.CSS_SELECTOR, ".oke-reviewContent-title.oke-title").text.strip()
            except:
                review_title = ""

            # Extract review body
            try:
                review_body = container.find_element(By.CSS_SELECTOR, ".oke-reviewContent-body.oke-bodyText p").text.strip()
            except:
                review_body = ""

            # Extract review date
            try:
                review_date = container.find_element(By.CSS_SELECTOR, ".oke-w-reviewMinimal-date").text.strip()
            except:
                review_date = ""

            # Extract rating
            try:
                rating_text = container.find_element(By.CSS_SELECTOR, ".oke-stars .oke-a11yText").text.strip()
                match = re.search(r"Rated\s+(\d+(?:\.\d+)?)\s+out of", rating_text)
                review_rating = match.group(1) if match else rating_text
            except:
                review_rating = ""

            data.append({
                "Reviewer Name": reviewer_name,
                "Review Title": review_title,
                "Review Body": review_body,
                "Review Date": review_date,
                "Review Rating": review_rating
            })

        # Convert to DataFrame
        df = pd.DataFrame(data)

        # Clean the DataFrame
        df.replace("", pd.NA, inplace=True)
        df.dropna(how="any", inplace=True)

        # Print or save the DataFrame
        print(df)
        df.to_csv("javvy_reviews_970_not-encoding.csv", index=False)
        df.to_csv("javvy_reviews_970_encoding.csv", index=False, encoding="utf-8-sig")

    finally:
        driver.quit()

if __name__ == "__main__":
    scrape_reviews()


Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking 'Show More' button...
Clicking