In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException
import time
import re

# Define the path to the chromedriver
chrome_driver_path = r"C:\Users\New\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"
service = Service(executable_path=chrome_driver_path)

def amazon_login(driver, email, password):
    try:
        # Open the Amazon login page
        driver.get("https://www.amazon.in/ap/signin")

        # Enter email
        email_input = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "ap_email")))
        email_input.send_keys(email)
        driver.find_element(By.ID, "continue").click()

        # Enter password
        password_input = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "ap_password")))
        password_input.send_keys(password)
        driver.find_element(By.ID, "signInSubmit").click()

        print("Login successful.")
    except Exception as e:
        print("Login failed:", e)
        driver.quit()
        exit()

# Function to scrape reviews of a specific product
def scrape_product_reviews(driver, product_url):
    driver.get(product_url)
    time.sleep(3)  # Allow the page to load

    reviews_data = []
    try:
        reviews_tab = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//a[@data-hook='see-all-reviews-link-foot']"))
        )
        reviews_tab.click()
        time.sleep(3)

        while True:
            reviews = driver.find_elements(By.XPATH, "//div[@data-hook='review']")
            for review in reviews:
                try:
                    reviewer_name = review.find_element(By.CSS_SELECTOR, "span.a-profile-name").text
                except:
                    reviewer_name = "N/A"

                try:
                    review_date = review.find_element(By.CSS_SELECTOR, "span.review-date").text
                except:
                    review_date = "N/A"

                try:
                    review_text = review.find_element(By.CSS_SELECTOR, "span.review-text-content span").text
                except:
                    review_text = "N/A"

                try:
                    review_rating = review.find_element(By.CSS_SELECTOR, "i.review-rating span").text
                except:
                    review_rating = "N/A"

                try:
                    review_image = review.find_element(By.CSS_SELECTOR, "img.review-image-tile").get_attribute("src")
                except:
                    review_image = "N/A"

                reviews_data.append({
                    "Reviewer Name": reviewer_name,
                    "Review Date": review_date,
                    "Review Text": review_text,
                    "Review Rating": review_rating,
                    "Review Image": review_image
                })

            try:
                next_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.CSS_SELECTOR, "li.a-last a"))
                )
                next_button.click()
                time.sleep(3)
            except:
                break

    except Exception as e:
        print("Error while scraping reviews:", e)

    return reviews_data

# Main function to scrape reviews for a specific product
def main():
    driver = webdriver.Chrome(service=service)

    # Log in to Amazon
    email = input("Enter your Amazon email: ")
    password = input("Enter your Amazon password: ")
    amazon_login(driver, email, password)

    product_url = input("Enter the URL of the product to scrape reviews: ")

    reviews = scrape_product_reviews(driver, product_url)

    df = pd.DataFrame(reviews)

    # Save to Excel file with a safe product name
    safe_product_name = re.sub(r'[\\/:*?"<>|]', '', product_url.split('/')[3])  # Extract and sanitize product name from URL
    output_filename = f"{safe_product_name}_reviews.xlsx"
    df.to_excel(output_filename, index=False)

    print(f"Reviews saved to {output_filename}")

    driver.quit()

if __name__ == "__main__":
    main()


Enter your Amazon email:  9344267871
Enter your Amazon password:  Choco123


Login successful.


Enter the URL of the product to scrape reviews:  https://www.amazon.in/ASIAN-Mens-Wonder-13-1Green-Sneaker/dp/B01MRN1BY4/ref=sr_1_1_sspa?crid=Q4RS2XP085XG&dib=eyJ2IjoiMSJ9.FXI1Ik-PWneazYS0StAbHiAEqyXSfaVqzw0nJ96mgJVdLqwz2riBLzz9jhzNSvfl4ozFvUwJA8KEIJb0PoHcOaouQQ5JMU36RXSc9R0v9mkXTVHVUfhWCuNC6bMj7IwJGlNlER6P211xQrFkJHmymgDSUOI01CGz0kzQUPNS8uUNtcTU-Ovkezu9RrVY5Cf0I2xl9FcthbLG34DI0JqmJEHwRHXakZ4acbdvy1C4UowaC1RzBZa6CZRKH2bZf-7Xf8j9mH1LsaX7flKmIuHSFDl72TeacQpJeQODWgLkes8.KYu7idrdqqwIglCc3PrOXf5T6zuR7vNFTthrHp6p6UQ&dib_tag=se&keywords=ASIAN+Men%27s+Wonder-13+Sports+Running+Shoes&nsdOptOutParam=true&qid=1735409942&sprefix=asian+men%27s+wonder-13+sports+running+shoes%2Caps%2C223&sr=8-1-spons&sp_csd=d2lkZ2V0TmFtZT1zcF9hdGY&psc=1


In [None]:
https://www.amazon.in/ASIAN-Mens-Wonder-13-1Green-Sneaker/dp/B01MRN1BY4/ref=sr_1_1_sspa?crid=Q4RS2XP085XG&dib=eyJ2IjoiMSJ9.FXI1Ik-PWneazYS0StAbHiAEqyXSfaVqzw0nJ96mgJVdLqwz2riBLzz9jhzNSvfl4ozFvUwJA8KEIJb0PoHcOaouQQ5JMU36RXSc9R0v9mkXTVHVUfhWCuNC6bMj7IwJGlNlER6P211xQrFkJHmymgDSUOI01CGz0kzQUPNS8uUNtcTU-Ovkezu9RrVY5Cf0I2xl9FcthbLG34DI0JqmJEHwRHXakZ4acbdvy1C4UowaC1RzBZa6CZRKH2bZf-7Xf8j9mH1LsaX7flKmIuHSFDl72TeacQpJeQODWgLkes8.KYu7idrdqqwIglCc3PrOXf5T6zuR7vNFTthrHp6p6UQ&dib_tag=se&keywords=ASIAN+Men%27s+Wonder-13+Sports+Running+Shoes&nsdOptOutParam=true&qid=1735409942&sprefix=asian+men%27s+wonder-13+sports+running+shoes%2Caps%2C223&sr=8-1-spons&sp_csd=d2lkZ2V0TmFtZT1zcF9hdGY&psc=1