# Web Scraping with Selenium

## Libraries

In [15]:
import pandas as pd
import re

In [16]:
# import module
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

## Launch an instance of Google Chrome

In [17]:
# Importing the Servive class from the selenium.webdriver.chrome.service module
service = Service()

# Creating an instance of ChromeOptions to specify various Chrome driver options
options = webdriver.ChromeOptions()

# Initializing the Chrome WebDriver with the specified service and options
driver = webdriver.Chrome(service=service, options=options)


In [18]:
# Define the URL of the website to be scraped
url = 'http://books.toscrape.com/'

# Navigate to the specified URL using the Chrome WebDriver
driver.get(url)

# Books Titles

In [19]:
#Initialize an empty list to store scraped data
titleBooks = []

# Extract data
for selector in driver.find_elements(By.CSS_SELECTOR, "article.product_pod"):
    titleElement = selector.find_element(By.CSS_SELECTOR, "h3 > a")
    title = titleElement.get_attribute("title")
    # add to the list
    titleBooks.append(title)

# Click on the link

In [21]:
# Initializes an empty list to store the stock quantities of each product.
stockList = []

# Locate the product elements on the page
product_elements = driver.find_elements(By.CSS_SELECTOR, "article.product_pod")

# Iterate over each product element to click on the title and extract information
for product in product_elements:
    # Find the book title element and click on it
    titleElement = product.find_element(By.CSS_SELECTOR, "h3 > a")
    title = titleElement.get_attribute("title")
    
    # Click on the title to navigate to the product detail page
    titleElement.click()

    # Extract the stock quantity
    try:
        stock_text = driver.find_element(By.CLASS_NAME, 'instock').text
        qtStock = int(re.search(r'In stock \((\d+) available\)', stock_text).group(1))
    except (ValueError, AttributeError):
        qtStock = 0  # Set to 0 if stock info is unavailable or cannot be parsed
    stockList.append(qtStock)

    # Go back to the previous page
    driver.back()

In [22]:
# close the driver
driver.quit()

# DataFrame

In [23]:
# Create a DataFrame with the collected data
df_books = pd.DataFrame({
    'Title': titleBooks, # This column contains the titles of the books, sourced from the 'titleBooks' list.
    'Stock': stockList # This column contains the stock quantities, sourced from the 'stockList' list.
})

# Display the DataFrame
print(df_books)

                                                Title  Stock
0                                A Light in the Attic     22
1                                  Tipping the Velvet     20
2                                          Soumission     20
3                                       Sharp Objects     20
4               Sapiens: A Brief History of Humankind     20
5                                     The Requiem Red     19
6   The Dirty Little Secrets of Getting Your Dream...     19
7   The Coming Woman: A Novel Based on the Life of...     19
8   The Boys in the Boat: Nine Americans and Their...     19
9                                     The Black Maria     19
10     Starving Hearts (Triangular Trade Trilogy, #1)     19
11                              Shakespeare's Sonnets     19
12                                        Set Me Free     19
13  Scott Pilgrim's Precious Little Life (Scott Pi...     19
14                          Rip it Up and Start Again     19
15  Our Band Could Be Yo

In [24]:
# Save the DataFrame in a CSV file
df_books.to_csv('books_stock.csv', index=False)