# **Import Libraries**

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
from bs4 import BeautifulSoup
import pandas as pd
from selenium.common.exceptions import NoSuchElementException, TimeoutException

In [2]:
def get_product_urls(url_main, total_pages):
    """
    Mengambil URL produk dari kategori dengan jumlah halaman yang ditentukan secara manual menggunakan Selenium.
    """
    product_urls = []
    base_url = url_main.split('?')[0]  # Ambil base URL tanpa parameter

    # Inisialisasi Chrome WebDriver
    driver = webdriver.Chrome()  # Pastikan chromedriver sudah di-setup di PATH

    try:
        for page in range(1, total_pages + 1):
            url_with_page = f"{base_url}?ob=5&page={page}"
            print(f"Processing page {page}/{total_pages}: {url_with_page}")
            
            # Akses URL menggunakan WebDriver
            driver.get(url_with_page)
            sleep(3)

            for _ in range(20):
                driver.execute_script("window.scrollBy(0, 250)")
                sleep(1)

            # Ambil halaman HTML setelah scrolling selesai
            html = driver.page_source
            soup = BeautifulSoup(html, "html.parser")

            # Cari elemen produk berdasarkan class
            product_divs = soup.find_all('div', {"class": "css-bk6tzz e1nlzfl2"})
            for div in product_divs:
                a_tag = div.find('a')
                if a_tag and a_tag.get('href'):
                    href = a_tag['href']
                    if "https://ta.tokopedia.com/promo/v1/clicks/" not in href:
                        product_urls.append(href)
                        print(f"URL added: {href}")
                    else:
                        print(f"Skipping promo URL: {href}")
    finally:
        driver.quit()

    print(f"Total product URLs collected: {len(product_urls)}")
    return product_urls

In [3]:
def scrape_reviews_and_ratings(product_urls):
    
    reviews = []
    ratings = []

    driver = webdriver.Chrome()

    try:
        for product_url in product_urls:
            print(f"Processing: {product_url}")
            driver.get(product_url)

            while True:
                # Scroll the page to load all reviews
                for _ in range(20):
                    driver.execute_script("window.scrollBy(0, 250)")
                    sleep(1)

                html = driver.page_source
                soup = BeautifulSoup(html, "html.parser")

                # Extract reviews and ratings
                for product in soup.find_all('div', {"class": "css-1k41fl7"}):
                    review_element = product.find('span', {"data-testid": "lblItemUlasan"})
                    reviews.append(review_element.get_text() if review_element else 'None')

                    rating_element = product.find('div', {"class": "rating"})
                    ratings.append(rating_element.get('aria-label') if rating_element else 'None')

                # Check if "Next" button exists and is enabled
                try:
                    next_button_container = driver.find_element(By.CLASS_NAME, "css-1xqkwi8")
                    next_button = next_button_container.find_element(
                        By.XPATH, './/button[contains(@class, "css-16uzo3v-unf-pagination-item") and @aria-label="Laman berikutnya"]'
                    )
                    is_disabled = next_button.get_attribute("disabled")  # Check if button is disabled
                    if is_disabled:
                        print("No more pages to navigate for this product.")
                        break

                    # Scroll to and click the next button
                    driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", next_button)
                    driver.execute_script("arguments[0].click();", next_button)
                    sleep(2)
                except (NoSuchElementException, TimeoutException):
                    print("No 'Next' button found. Moving to next product.")
                    break

    finally:
        driver.quit()

    print(f"Scraped {len(reviews)} reviews and {len(ratings)} ratings.")
    data = pd.DataFrame({'Review': reviews, 'Rating': ratings})
    return data

In [4]:
def main(url_main):

    for url in url_main:
        product_urls = get_product_urls(url,100)

    # Scrape reviews and ratings for each product
    data = scrape_reviews_and_ratings(product_urls)

    return data

In [None]:
urlss = ['https://www.tokopedia.com/p/fashion-pria/sepatu-pria/sneakers-pria?ob=5&page=1']

data = main(urlss)

# Optionally, save the data to a CSV file
data.to_csv("reviews.csv", index=False)

Processing page 1/100: https://www.tokopedia.com/p/fashion-pria/sepatu-pria/sneakers-pria?ob=5&page=1
Skipping promo URL: https://ta.tokopedia.com/promo/v1/clicks/8a-xgVY2gmUEHsU7oAjFH_1Nbm-xgVY789CBUsthbm-Orfua9fBjUstiHmUDUSz2Q31i6sJRHpeFopyaopKDUMoFyaUEH_nFHmFircYpq9z2Qfdi6sJDUMVDgaUEUMoxPcuSQR-N9f-N9RCaQfzOyReibm-XP3Oig9-wQfgwy3zpUsthoZFiQSuWyMua9fVjrOYag9Ji6sJObm-sy9zwq3zpUs2QH_rO6ZFh6AnFbAJdoAuPbm-X9foxQMz2gcV7guYxgIHi6sJ7HmFiy3-wPcupPmUEUjdibm-FQRo79fVDgaUEUMzBgiUDUSgBrSo2Qfdi6i-fHiUDUMVj9RosQR-BUstpHsHX6_1aHsjOH_rfHprdbm-pHOYDQfri6i-B812kgJxGgBBXZSgjH7NDZ325q1ONoI2o8jOE_92o8jOE_92o8jOE_92oH1B2PfBgHO-N3Ao6QVByZM2xe7jfZ32oP1NEe9268jVE_S2C8jNEe9268jVE_9B-r7BWPchB3czfyfOZgMHa_SgsQuu2_fB-P7B2PfBiQ_BO3_-uq1Y2Z_VoqBBR_92oo1Y_HA-6zcrp_Vzz81NJe9zgqj1a_MjFHB2k3_-6zchk_jP-H1N1Z3BHe72fyfODQMV9o3gqzOgR3A-Dq7BkQfBoe7BpZ37N83V9gICiQABRyf7Nqfz9_sCyHMh0Z325q1OAZ9o-Q_BNyuPjrc-D692xzpBR3A-Dq7BkQfBoe7BpZ3NcHu2yZsuyHO-t3sooq1Y2ZMgoucrF_BydPOxEeMgozsBM_1ydPOxEeMgoe7BpZ3N6qMUpZMhyHj2Nysoj8B2_Z_g-qjON_

KeyboardInterrupt: 

In [11]:
data.tail(10)

Unnamed: 0,Review,Rating
44,Modelnya keren! Bahan nya nyaman dipakai pas d...,bintang 5
45,menurut saya pribadi sih produk sangat oke kua...,bintang 4
46,packingnya baik walaupun ya lakbannya gak nutu...,bintang 4
47,"baru pertama kali nyoba brand lokal, dan menco...",bintang 5
48,"Barang sudah sampai, Salut Box nya nyampe sini...",bintang 5
49,Mantap min barang sesuai ekspektasi💯 pokoknya ...,bintang 5
50,"mantap model nya cakep lah, untuk kenyamananny...",bintang 5
51,enak dipakai tapi ya masih ada kurangnya dibag...,bintang 5
52,"barang bagus sesuai,empuk pas dipake,packaging...",bintang 5
53,"satu kata, mantabb\n\nbahan oke\nkenyamanan ok...",bintang 5
