In [8]:
import time
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

In [9]:
# Function to extract Product Title
def get_title(soup):
    try:
        title = soup.find("span", attrs={"id": 'productTitle'}).text.strip()
    except AttributeError:
        title = ""
    return title

# Function to extract Product Price using Selenium
def get_price_selenium(driver):
    try:
        price_element = driver.find_element(By.XPATH, "//span[@class='a-offscreen']")
        price = price_element.text.strip()
    except:
        price = ""
    return price

# Function to extract Product Rating
def get_rating(soup):
    try:
        rating = soup.find("i", attrs={'class': 'a-icon a-icon-star a-star-4-5'}).string.strip()
    except AttributeError:
        try:
            rating = soup.find("span", attrs={'class': 'a-icon-alt'}).string.strip()
        except:
            rating = ""    
    return rating

# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id': 'acrCustomerReviewText'}).string.strip()
    except AttributeError:
        review_count = ""    
    return review_count

# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find("div", attrs={'id': 'availability'}).find("span").string.strip()
    except AttributeError:
        available = "Not Available"    
    return available

In [None]:
if __name__ == '__main__':
    # Set up Selenium Chrome WebDriver
    chrome_options = Options()
    chrome_options.add_argument("--headless")  
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver_path = ' '  
    driver = webdriver.Chrome(service=Service(chrome_driver_path), options=chrome_options)

    # The webpage URL
    URL = "https://www.amazon.com/s?k=playstation+4&ref=nb_sb_noss_2"

    # Open the URL with Selenium
    driver.get(URL)
    time.sleep(2)  

    # Fetch links as List of WebElements
    links = driver.find_elements(By.CSS_SELECTOR, "a.a-link-normal.s-no-outline")

    # Store the links
    links_list = [link.get_attribute("href") for link in links]

    d = {"title": [], "price": [], "rating": [], "reviews": [], "availability": []}

    # Loop for extracting product details from each link 
    for link in links_list:
        driver.get(link)
        time.sleep(2)  # Wait for page to load

        # Get the page source after rendering JavaScript
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, "html.parser")

        # Function calls to display all necessary product information
        d['title'].append(get_title(soup))
        d['price'].append(get_price_selenium(driver))
        d['rating'].append(get_rating(soup))
        d['reviews'].append(get_review_count(soup))
        d['availability'].append(get_availability(soup))

    # Close the WebDriver
    driver.quit()

    # Convert data to DataFrame and save to CSV
    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['title'].replace('', np.nan, inplace=True)
    amazon_df = amazon_df.dropna(subset=['title'])
    amazon_df.to_csv("amazon_data_selenium.csv", header=True, index=False)