In [13]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

# List of product URLs
product_urls = [
    {"url": "https://www.amazon.in/dp/B0DRW3K6FY?psc=1", "source": "Amazon", "product_name": "U.S. Polo Assn. Men's Sneaker"},
    {"url": "https://www.amazon.in/dp/B0DJRKQNJ6?psc=1", "source": "Amazon", "product_name": "Kaaytee Premium Neck Pillow "},
    {"url": "https://www.amazon.in/gp/product/B08L5WHFT9/", "source": "Amazon", "product_name": "Apple iPhone 12 (64GB) - Black"},
    {"url": "https://www.amazon.in/gp/product/B09CGKT1XF/", "source": "Amazon", "product_name": "Jabra Elite 3 Bluetooth"},
    {"url": "https://www.amazon.in/dp/B0CDYYY7PM?psc=1", "source": "Amazon", "product_name": "GreenFinity Basil/Sabja Seeds"},
    {"url": "https://www.amazon.in/dp/B0DGX5X7NZ?psc=1", "source": "Amazon", "product_name": "HP 15 AI Laptop"},
    {"url": "https://www.amazon.in/gp/product/B09TVVGXWS/", "source": "Amazon", "product_name": "Oneplus Bullets Z2 Bluetooth Wireless in Ear Earphones with Mic"},
    {"url": "https://www.amazon.in/dp/B0DZ754B4S?psc=1", "source": "Amazon", "product_name": "Portable Air Conditioners "},
    {"url": "https://www.amazon.in/dp/B0DH1ZJRHQ?psc=1", "source": "Amazon", "product_name": "PAGALY Ultra HD Projector"},
    {"url": "https://www.amazon.in/dp/B0DXZXR5VB?psc=1", "source": "Amazon", "product_name": "PASLDA Wireless Carplay Adapter"},
    {"url": "https://www.amazon.in/dp/B0BQ1MD93L?psc=1", "source": "Amazon", "product_name": "Classmate Short Notebook"},
    {"url": "https://www.amazon.in/dp/B0C4Y1VTNM?psc=1", "source": "Amazon", "product_name": "Apsara A4 Size Long Notebook "},
    {"url": "https://www.amazon.in/dp/B0DNWLHHHP?psc=1", "source": "Amazon", "product_name": "Baybee Run Battery Operated Jeep"},
    {"url": "https://www.amazon.in/dp/B0D81KTFY5?psc=1", "source": "Amazon", "product_name": "Giordano Fashionista Collection Analog Bracelet Wrist Watch"},
    {"url": "https://www.amazon.in/dp/B0CQYG9MBS?psc=1", "source": "Amazon", "product_name": "Blue Heaven Cookie & Souffle Matte Lipstick"},
    {"url": "https://www.amazon.in/dp/B0CXJ4LRB2?psc=1", "source": "Amazon", "product_name": "Pinkmint Men Cotton Soild Spread Collar Long Sleeve Shirt "},
    {"url": "https://www.amazon.in/dp/B0DHRR8DW6?psc=1", "source": "Amazon", "product_name": "SUPER GOLDEN Office Table "},
    {"url": "https://www.amazon.in/gp/product/B07GNDDNMW/", "source": "Amazon", "product_name": "SIHOO High Back Home Office Chair"},
    {"url": "https://www.amazon.in/dp/B0DYPCPQ1G?psc=1", "source": "Amazon", "product_name": "MiNi CoOlEr"},
    {"url": "https://www.amazon.in/dp/B0DWN3Y394?psc=1", "source": "Amazon", "product_name": "Baybee 2 in 1 Foldable Kids Playpen for Babies"},
    ]


# Setting up Selenium WebDriver with User-Agent
options = Options()
options.add_argument("--headless")  
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--log-level=3")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64)")

# Initializeing WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Function to extract Amazon reviews
def extract_amazon_reviews(url, max_pages=3):
    reviews = []
    driver.get(url)
    time.sleep(3)

    try:
        for _ in range(max_pages):  # Loop for multiple pages
            WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".review-text-content span")))

            review_elements = driver.find_elements(By.CSS_SELECTOR, ".review-text-content span")
            for review in review_elements:
                reviews.append(review.text.strip())

            # Click "Next Page" if exists
            try:
                next_page = driver.find_element(By.CSS_SELECTOR, "li.a-last a")
                driver.execute_script("arguments[0].click();", next_page)
                time.sleep(3)  # Wait for page load
            except:
                break  # No more pages

    except Exception as e:
        print(f"Error extracting Amazon reviews: {e}")

    return reviews


# Store extracted data
review_records = []

# Loop through product URLs
for product in product_urls:
    url = product["url"]
    source = product["source"]
    product_name = product["product_name"]

    print(f"Scraping reviews for: {product_name} from {source}")

    if "amazon.in" in url:
        reviews = extract_amazon_reviews(url)
    else:
        reviews = []

    for review in reviews:
        review_records.append({
            "product_name": product_name,
            "reviews": review,
            "source": source
        })

# Convert to Pandas DataFrame
df_reviews = pd.DataFrame(review_records)

# Save to CSV with UTF-8 encoding
df_reviews.to_csv("Reviews_Dataset(20_Products).csv", index=False, encoding="utf-8")

# Close WebDriver
driver.quit()

print(" Reviews dataset saved to 'Reviews_Dataset(20_Products).csv'")


Scraping reviews for: U.S. Polo Assn. Men's Sneaker from Amazon
Scraping reviews for: Kaaytee Premium Neck Pillow  from Amazon
Scraping reviews for: Apple iPhone 12 (64GB) - Black from Amazon
Scraping reviews for: Jabra Elite 3 Bluetooth from Amazon
Scraping reviews for: GreenFinity Basil/Sabja Seeds from Amazon
Scraping reviews for: HP 15 AI Laptop from Amazon
Scraping reviews for: Oneplus Bullets Z2 Bluetooth Wireless in Ear Earphones with Mic from Amazon
Scraping reviews for: Portable Air Conditioners  from Amazon
Scraping reviews for: PAGALY Ultra HD Projector from Amazon
Scraping reviews for: PASLDA Wireless Carplay Adapter from Amazon
Scraping reviews for: Classmate Short Notebook from Amazon
Scraping reviews for: Apsara A4 Size Long Notebook  from Amazon
Scraping reviews for: Baybee Run Battery Operated Jeep from Amazon
Scraping reviews for: Giordano Fashionista Collection Analog Bracelet Wrist Watch from Amazon
Scraping reviews for: Blue Heaven Cookie & Souffle Matte Lipstick f