In [2]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
from bs4 import BeautifulSoup
import pandas as pd
import re
import requests
# import dot

# Konfigurasi akun untuk login (isi dengan akun Anda)
SHOPEE_LOGIN = {
    "phone": "6285133796123",  # Nomor telepon atau email
    "password": "@Zulfajri@123"  # Password akun
}

def extract_ids_from_url(url):
    """Ekstrak shop_id dan item_id dari URL produk Shopee."""
    match = re.search(r'i\.(\d+)\.(\d+)', url)
    if match:
        return match.group(1), match.group(2)
    return None, None

def login_to_shopee(driver):
    """Login ke Shopee menggunakan akun yang telah dikonfigurasi."""
    try:
        print("Melakukan login ke Shopee...")
        
        # Jika tidak ada kredensial login, skip login
        if not SHOPEE_LOGIN["phone"] or not SHOPEE_LOGIN["password"]:
            print("Kredensial login tidak tersedia. Silakan isi SHOPEE_LOGIN di kode.")
            print("Mencoba melanjutkan tanpa login...")
            return False
        
        # Tunggu halaman login dimuat
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.NAME, "loginKey"))
        )
        
        # Isi form login
        phone_input = driver.find_element(By.NAME, "loginKey")
        password_input = driver.find_element(By.NAME, "password")
        
        phone_input.clear()
        phone_input.send_keys(SHOPEE_LOGIN["phone"])
        time.sleep(1)
        
        password_input.clear()
        password_input.send_keys(SHOPEE_LOGIN["password"])
        time.sleep(1)
        
        # Klik tombol login
        login_button = driver.find_element(By.XPATH, "//button[contains(@class, 'btn-solid-primary') or contains(text(), 'Log in')]")
        login_button.click()
        
        # Tunggu proses login selesai
        print("Menunggu proses login...")
        time.sleep(5)
        
        # Cek apakah login berhasil
        if "buyer/login" not in driver.current_url:
            print("Login berhasil!")
            return True
        else:
            print("Login gagal atau memerlukan verifikasi tambahan")
            return False
            
    except Exception as e:
        print(f"Error saat login: {e}")
        return False

def scrape_reviews_with_selenium(product_url):
    """Scrape review data menggunakan Selenium dengan login session."""
    
    # Setup Chrome options
    chrome_options = Options()
    # chrome_options.add_argument("--headless")  # Hapus baris ini jika ingin melihat browser
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
    
    # Tambahkan argumen untuk menjaga session
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    
    driver = webdriver.Chrome(options=chrome_options)
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    
    reviews_data = []
    
    try:
        # Langsung buka halaman produk tanpa login terlebih dahulu
        print(f"Membuka halaman produk: {product_url}")
        driver.get(product_url)
        driver.maximize_window()
        time.sleep(5)
        
        # Cek apakah di-redirect ke halaman login
        if "buyer/login" in driver.current_url:
            print("Terdeteksi redirect ke halaman login. Mencoba login...")
            login_success = login_to_shopee(driver)
            
            if login_success:
                print("Login berhasil, membuka kembali halaman produk...")
                driver.get(product_url)
                time.sleep(5)
                
                # Cek lagi apakah masih di halaman login
                if "buyer/login" in driver.current_url:
                    print("Masih di halaman login setelah login. Menggunakan fallback API...")
                    shop_id, item_id = extract_ids_from_url(product_url)
                    if shop_id and item_id:
                        return fetch_reviews_api(shop_id, item_id)
                    return []
            else:
                print("Login gagal. Menggunakan fallback API...")
                shop_id, item_id = extract_ids_from_url(product_url)
                if shop_id and item_id:
                    return fetch_reviews_api(shop_id, item_id)
                return []
        else:
            print("Berhasil mengakses halaman produk tanpa perlu login")
        
        # Scroll ke bagian review
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);")
        time.sleep(3)
        
        # Cari dan klik tab review/ulasan
        try:
            # Coba berbagai selector untuk tab review
            review_tab_selectors = [
                "//div[contains(text(), 'Ulasan')]",
                "//div[contains(text(), 'Penilaian Produk')]",
                "//div[contains(text(), 'Review')]", 
                "//span[contains(text(), 'Ulasan')]",
                "//span[contains(text(), 'Review')]",
                "//a[contains(@href, 'ratings')]"
            ]
            
            tab_found = False
            for selector in review_tab_selectors:
                try:
                    review_tab = WebDriverWait(driver, 5).until(
                        EC.element_to_be_clickable((By.XPATH, selector))
                    )
                    review_tab.click()
                    time.sleep(3)
                    tab_found = True
                    print("Tab review ditemukan dan diklik")
                    break
                except:
                    continue
                    
            if not tab_found:
                print("Tab review tidak ditemukan, melanjutkan scraping...")
        except:
            print("Tab review tidak ditemukan, melanjutkan scraping...")
        
        # Scroll untuk memuat lebih banyak review
        for i in range(5):  # Scroll 5 kali untuk memuat lebih banyak review
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
        
        # Parse HTML dengan BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        
        # Debug: simpan HTML untuk analisis
        with open('debug_page.html', 'w', encoding='utf-8') as f:
            f.write(soup.prettify())
        print("HTML halaman disimpan ke debug_page.html untuk analisis")
        
        # Cari elemen review dengan berbagai selector yang mungkin
        review_selectors = [
            'div[data-testid*="review"]',
            '.shopee-product-rating',
            '.product-rating-overview__content',
            '.shopee-product-comment-list',
            'div[class*="review"]',
            'div[class*="review-content-wrapper"]',
            'div[class*="rating"]',
            '.shopee-product-rating__content',
            '.item-rating-overview',
            '[data-testid="lblPDPDetailProductRatingNumber"]'
        ]
        
        reviews_found = False
        review_elements = []
        
        for selector in review_selectors:
            review_elements = soup.select(selector)
            if review_elements:
                print(f"Ditemukan {len(review_elements)} elemen review dengan selector: {selector}")
                reviews_found = True
                break
        
        if not reviews_found:
            print("Tidak ditemukan elemen review dengan selector yang ada")
            print("Mencoba fallback dengan API Shopee...")
            # Fallback: gunakan API Shopee
            shop_id, item_id = extract_ids_from_url(product_url)
            if shop_id and item_id:
                return fetch_reviews_api(shop_id, item_id)
        
        # Extract review data dari HTML
        for i, element in enumerate(review_elements[:5]):  # Ambil maksimal 10 review pertama
            try:
                print(f"Elements: {element}")
                
                # Cari komentar
                comment_elem = element.find(string=re.compile(r'.{10,}'))  # Teks dengan minimal 10 karakter
                comment = comment_elem.strip() if comment_elem else "No comment"
                
                reviews_data.append({
                    'comment': comment
                })
                
                # Print review yang ditemukan
                print(f"Comment: {comment}")
                print("-" * 80)
                
            except Exception as e:
                print(f"Error parsing review {i+1}: {e}")
                continue
                
    except Exception as e:
        print(f"Error during scraping: {e}")
    
    finally:
        driver.quit()
    
    return reviews_data

def fetch_reviews_api(shop_id, item_id, limit=20, max_reviews=50):
    """Fallback: Ambil review menggunakan API Shopee."""
    ratings_url = (
        'https://partner.shopeemobile.com/api/v2/product/get_comment?'
        'itemid={item_id}&limit={limit}&offset={offset}&shopid={shop_id}&page_size=10'
    )
    offset = 0
    all_ratings = []
    
    print("Menggunakan API Shopee untuk mengambil review...")
    
    while len(all_ratings) < max_reviews:
        url = ratings_url.format(shop_id=shop_id, item_id=item_id, limit=limit, offset=offset)
        try:
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            data = response.json()
        except Exception as e:
            print(f"Error fetching data dari API: {e}")
            break

        ratings = data.get('data', {}).get('ratings', [])
        if not ratings:
            break

        for rating in ratings:
            review_data = {
                'comment': rating.get('comment', '')
            }
            all_ratings.append(review_data)
            
            # Print review yang ditemukan
            print(f"Comment: {review_data['comment']}")
            print('-' * 80)

            if len(all_ratings) >= max_reviews:
                break

        if len(ratings) < limit:
            break
        offset += limit
    
    return all_ratings

def save_reviews_to_csv(reviews_data, filename='shopee_reviews.csv'):
    """Simpan data review ke file CSV."""
    if reviews_data:
        df = pd.DataFrame(reviews_data)
        df.to_csv(filename, index=False, encoding='utf-8-sig')
        print(f"Berhasil menyimpan {len(reviews_data)} review ke {filename}")
    else:
        print("Tidak ada data review untuk disimpan.")

def main():
    # URL produk dari test_scraping.py
    product_url = "https://shopee.co.id/SKINTIFIC-5X-Ceramide-Barrier-Moisturizer-Gel-30g-Cream-pencerah-Wajah-Calming-Whitening-Brightening-Dark-Spot-Pelembab-Wajah-Pemutih-Wajah-Facial-Moisturizer-Day-Cream-Night-Cream-Skincare-BPOM-Niacinamide-with-Centella-Acid-Sooth-Sunburn-i.380266264.22324918043?is_from_login=true"
    
    # product_url = "https://reviews.femaledaily.com/products/moisturizer/gel/skintific/5x-ceramide-barrier-repair-moisture-gel-moisturizer"
    
    print("Memulai scraping review dari:", product_url)
    print("Login akan dilakukan hanya jika diperlukan (redirect ke halaman login)")
    
    # Scrape reviews
    reviews = scrape_reviews_with_selenium(product_url)
    
    if reviews:
        print(f"\nBerhasil mengambil {len(reviews)} review")
        # Simpan ke CSV
        save_reviews_to_csv(reviews)
    else:
        print("Tidak berhasil mengambil review")

# Jalankan fungsi main
main()

Memulai scraping review dari: https://shopee.co.id/SKINTIFIC-5X-Ceramide-Barrier-Moisturizer-Gel-30g-Cream-pencerah-Wajah-Calming-Whitening-Brightening-Dark-Spot-Pelembab-Wajah-Pemutih-Wajah-Facial-Moisturizer-Day-Cream-Night-Cream-Skincare-BPOM-Niacinamide-with-Centella-Acid-Sooth-Sunburn-i.380266264.22324918043?is_from_login=true
Login akan dilakukan hanya jika diperlukan (redirect ke halaman login)
Membuka halaman produk: https://shopee.co.id/SKINTIFIC-5X-Ceramide-Barrier-Moisturizer-Gel-30g-Cream-pencerah-Wajah-Calming-Whitening-Brightening-Dark-Spot-Pelembab-Wajah-Pemutih-Wajah-Facial-Moisturizer-Day-Cream-Night-Cream-Skincare-BPOM-Niacinamide-with-Centella-Acid-Sooth-Sunburn-i.380266264.22324918043?is_from_login=true
Terdeteksi redirect ke halaman login. Mencoba login...
Melakukan login ke Shopee...
Menunggu proses login...
Login berhasil!
Login berhasil, membuka kembali halaman produk...
Masih di halaman login setelah login. Menggunakan fallback API...
Menggunakan API Shopee unt

In [13]:
main('mug')