## imports

In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager


## getting the links for each laptop

In [16]:
# Set up Selenium WebDriver
driver = webdriver.Chrome()
driver.get("https://www.jarir.com/sa-en/computers-peripherals/laptops.html")

# Set to scroll until no new products load
SCROLL_PAUSE_TIME = 2
product_links = set()
last_height = driver.execute_script("return document.body.scrollHeight")

while True:
    # Scroll down to bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(SCROLL_PAUSE_TIME)  # Wait for page to load

    # Extract product links
    products = driver.find_elements(By.CSS_SELECTOR, "a.product-tile__link")
    for product in products:
        link = product.get_attribute("href")
        if link:
            product_links.add(link)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break  # Exit loop if no new products are loaded
    last_height = new_height

# Close the driver
driver.quit()


In [17]:
# Convert product links set to a DataFrame
product_links_df = pd.DataFrame(list(product_links), columns=["Product Links"])

In [18]:
# Save the DataFrame to a CSV file
product_links_df.to_csv("product_links.csv", index=False)

In [19]:
# Print confirmation
print(f"Saved {len(product_links)} product links to 'product_links.csv'")

Saved 205 product links to 'product_links.csv'


## using the links to get the info

In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

# Set up Selenium WebDriver
driver = webdriver.Chrome()

# Load the CSV file containing the product links
product_links_df = pd.read_csv("product_links.csv")
product_links = product_links_df["Product Links"].tolist()#[6:10]

# Initialize an empty list to store product details
product_data = []

# Iterate through each product link
for link in product_links:
    driver.get(link)
    time.sleep(2)  # Wait for the page to load

    # Handle cookie consent overlay
    try:
        # Wait for the "Accept all" button to be visible and clickable
        accept_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, ".cmpboxbtnyes"))
        )
        # Click the "Accept all" button to close the cookie consent overlay
        accept_button.click()
        time.sleep(2)  # Wait for the overlay to disappear
    except Exception as e:
        print(f"No cookie overlay found for {link}: {e}")

    # Try to locate and click the "Show more" button inside the specification div
    try:
        show_more_buttons = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.XPATH, '//div[@data-card="specification"]//a[contains(@class, "card__show")]/span[text()="Show more"]'))
        )
        if len(show_more_buttons) > 0:
            # Click the first "Show more" button found in the specification div
            show_more_buttons[0].click()
            print("Clicked the 'Show more' button in specification section")
            # Wait for the content to load after clicking the first button
            WebDriverWait(driver, 10).until(
                EC.invisibility_of_element_located((By.XPATH, '//div[@data-card="specification"]//a[contains(@class, "card__show")]/span[text()="Show more"]'))
            )
            print("Specification content loaded")
        else:
            print(f"No 'Show more' button found in specification section for {link}")
    except Exception as e:
        print(f"Error clicking the 'Show more' button for {link}: {e}")

    # Extract the product title
    try:
        product_title = driver.find_element(By.CSS_SELECTOR, "h2.product-title__title").text.strip()
    except Exception as e:
        product_title = "Not available"
        print(f"Error extracting product title for {link}: {e}")

    # Extract the product price
    try:
        product_price = driver.find_element(By.CSS_SELECTOR, ".price_alignment .price__currency + span").text.strip()
    except Exception as e:
        product_price = "Not available"
        print(f"Error extracting product price for {link}: {e}")

    # Scroll to the bottom of the page after clicking the "Show more" button
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Wait for the page to fully load and new content to appear

    # Find the product specification table
    rows = driver.find_elements(By.CSS_SELECTOR, "table.table--info.table--bordered-bottom .table__row")

    # Dictionary to hold the details for each product
    product_info = {
        "Product Title": product_title,
        "Product Price": product_price
    }

    for row in rows:
        # Extract the column name and value
        try:
            column_name = row.find_element(By.CSS_SELECTOR, "th.table__item").text.strip()
            value = row.find_element(By.CSS_SELECTOR, "td.table__item").text.strip()
            product_info[column_name] = value
        except Exception as e:
            print(f"Skipping a row due to missing data: {e}")

    # Append the product info to the product_data list
    product_data.append(product_info)

# Convert the list of dictionaries to a DataFrame
product_details_df = pd.DataFrame(product_data)

# Save the DataFrame to a CSV file
product_details_df.to_csv("product_details.csv", index=False)

# Print confirmation
print(f"Saved product details to 'product_details.csv'")

# Close the driver
driver.quit()


In [None]:
product_details_df

In [None]:
product_details_df.columns

In [None]:
product_details_df.shape