## imports

In [37]:
# Selenium modules for controlling Chrome browser
from selenium.webdriver import Chrome  # For initializing and controlling the Chrome browser
from selenium import webdriver  # Provides access to the webdriver, allowing interaction with web browsers
from selenium.webdriver.chrome.options import Options  # For configuring Chrome browser options (e.g., headless mode)
from selenium.webdriver.chrome.service import Service  # For managing the ChromeDriver service (e.g., starting, stopping)

# Selenium modules for interacting with web elements
from selenium.webdriver.common.by import By  # For locating elements on a webpage (e.g., By.ID, By.XPATH)
from selenium.webdriver.support.ui import Select  # For interacting with <select> HTML elements (dropdowns)
from selenium.webdriver.support.ui import WebDriverWait  # For implementing explicit waits until a condition is met
from selenium.webdriver.support import expected_conditions as EC  # For defining conditions to wait for (e.g., element visibility)

# Other useful libraries
from fake_useragent import UserAgent  # For generating random user agents to mimic different browsers
import time  # For adding delays (e.g., time.sleep) during the script execution
import requests  # For making HTTP requests to interact with websites directly without using a browser
from bs4 import BeautifulSoup  # For parsing and extracting data from HTML content
import pandas as pd 

## browser Automation with Selenium

In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from fake_useragent import UserAgent
import time

# Set up Chrome options
options = Options()
ua = UserAgent()
userAgent = ua.random

# Set random user-agent and disable notifications
options.add_argument(f'user-agent={userAgent}')
options.add_argument("--disable-notifications")  # Disable notification popups
options.add_argument('--blink-settings=imagesEnabled=false')  # Disable images to save bandwidth
# options.add_argument("--headless")

# Initialize WebDriver with options
driver = webdriver.Chrome(options=options)

# Open the Jarir website
driver.get("https://www.jarir.com/computers-tablets.html")

# List to store the laptop details
laptop_data = []

try:
    # Wait for the page to load and the specific element to appear
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, '//*[@id="viewport"]/div[2]/div[2]'))
    )

    # Scroll the page to load more products
    def scroll_page():
        # Scroll down the page by a small amount, simulating user scrolling
        driver.execute_script("window.scrollBy(0, 1000);")
        time.sleep(3)  # Wait for new products to load

    # Scroll and scrape multiple times
    for _ in range(5):  # Adjust the range to scroll more times, or use a while loop for infinite scroll
        scroll_page()

        # Locate the starting point (specific element) where we want to begin scraping
        start_element = driver.find_element(By.XPATH, '//*[@id="viewport"]/div[2]/div[2]')

        # Find all product elements within this container
        product_elements = start_element.find_elements(By.CLASS_NAME, 'product-title__title')

        # Loop through multiple products and extract data
        for product_name in product_elements:
            try:
                # Extracting product price (adjust XPath based on your actual page structure)
                product_price = product_name.find_element(By.XPATH, '../../..//span[contains(@class,"ar-number")]')

                # Find the product-title__info container that holds the specifications
                product_info_container = product_name.find_element(By.XPATH, '../../..//p[contains(@class,"product-title__info")]')

                # Find all the <span> elements inside the product-title__info container
                info_boxes = product_info_container.find_elements(By.CLASS_NAME, 'product-title__info--box')

                # Assigning each product detail to a variable, with 'null' if not found
                details = {
                    'product_name': product_name.text.strip(),
                    'screen_size': 'null',
                    'processor': 'null',
                    'ram': 'null',
                    'storage': 'null',
                    'os': 'null',
                    'price': product_price.text.strip()
                }

                # Loop through the info_boxes and assign values to the details dictionary
                if len(info_boxes) > 0:
                    details['screen_size'] = info_boxes[0].text.strip()
                if len(info_boxes) > 1:
                    details['processor'] = info_boxes[1].text.strip()
                if len(info_boxes) > 2:
                    details['ram'] = info_boxes[2].text.strip()
                if len(info_boxes) > 3:
                    details['storage'] = info_boxes[3].text.strip()
                if len(info_boxes) > 4:
                    details['os'] = info_boxes[4].text.strip()

                # Append the details to the laptop_data list
                laptop_data.append(details)

            except Exception as e:
                print(f"Error while processing product '{product_name.text}':", e)

    # Create a DataFrame from the collected laptop data
    df = pd.DataFrame(laptop_data)


except Exception as e:
    print("Error occurred while scraping:", e)

finally:
    # Close the driver after scraping
    driver.quit()


Error while processing product 'MSI Titan 18HX Gaming Laptop': Message: no such element: Unable to locate element: {"method":"xpath","selector":"../../..//span[contains(@class,"ar-number")]"}
  (Session info: chrome=131.0.6778.69); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF6CDCE2775+28773]
	(No symbol) [0x00007FF6CDC4AFB0]
	(No symbol) [0x00007FF6CDAE552A]
	(No symbol) [0x00007FF6CDB38EAE]
	(No symbol) [0x00007FF6CDB3919C]
	(No symbol) [0x00007FF6CDB2C2FC]
	(No symbol) [0x00007FF6CDB5EFDF]
	(No symbol) [0x00007FF6CDB2C1C6]
	(No symbol) [0x00007FF6CDB5F1B0]
	(No symbol) [0x00007FF6CDB7F1A4]
	(No symbol) [0x00007FF6CDB5ED43]
	(No symbol) [0x00007FF6CDB2A548]
	(No symbol) [0x00007FF6CDB2B6B1]
	GetHandleVerifier [0x00007FF6CE00F45D+3358029]
	GetHandleVerifier [0x00007FF6CE02430D+3443709]
	GetHandleVerifier [0x00007FF6CE0183FD+3394797]
	GetHandleVer

In [None]:
# Print the DataFrame to see the results
df

In [None]:
df.shape

In [None]:
# Optionally, save the DataFrame to a CSV file
df.to_csv('laptop_data.csv', index=False)