In [38]:
import requests
from bs4 import BeautifulSoup
import urllib3
import pandas as pd
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [39]:
# Lists to store scraped data

all_products = []
all_prices = []
all_descriptions = []
all_categories = []
all_tags = []
all_stock_status = []
all_related_products = []

# -------- Step 1: Scrape product names and prices from all 48 pages --------
for page in range(1, 49):  
    if page == 1:
        url = "https://scrapeme.live/shop/"   # first page has a different URL
    else:
        url = f"https://scrapeme.live/shop/page/{page}/"

    response = requests.get(url, verify=False)  # disable SSL verification (expired certificate)
    soup = BeautifulSoup(response.text, "html.parser")

    # find product names
    products = soup.find_all("h2", class_="woocommerce-loop-product__title")
    # find product prices
    prices = soup.find_all('span', class_='woocommerce-Price-amount amount')

    # append product names
    for product in products:
        all_products.append(product.text.strip())
    # append product prices
    for price in prices:
        all_prices.append(price.text.strip())



In [40]:
# Scrape extra details for each product
for product_name in all_products:
    # build product page URL
    url = f'https://scrapeme.live/shop/{product_name}/'
    response = requests.get(url, verify=False)
    soup = BeautifulSoup(response.text, "html.parser")
    # extract details
    descriptions = soup.find_all('div', class_='woocommerce-product-details__short-description')
    categories = soup.find_all('span', class_='posted_in')
    tags = soup.find_all('span', class_='tagged_as')
    stock_status = soup.find_all('p', class_='stock in-stock')
    related_products = soup.find_all('h2', class_='woocommerce-loop-product__title')
    # save details as plain text
    for desc in descriptions:
        all_descriptions.append(desc.text.strip())
    for cat in categories:
        all_categories.append(cat.text.strip())
    for tg in tags:
        all_tags.append(tg.text.strip())
    for stock in stock_status:
        all_stock_status.append(stock.text.strip())
    for related in related_products:
        all_related_products.append(related.text.strip())



In [46]:
#(group every 3 together)

grouped_related = [
    ", ".join(all_related_products[i:i+3])
    for i in range(0, len(all_related_products), 3)
]


In [47]:

# 1) Clean prices (remove £0.00 and align length)
clean_prices = [p for p in all_prices if p != "£0.00"]  

#  same length as products
clean_prices = clean_prices[:len(all_products)]


In [48]:
data = {
    "Product": all_products,
    "Price": clean_prices,
    "Description": all_descriptions,
    "Category": all_categories,
    "Tags": all_tags,
    "Stock Status": all_stock_status,
    "Related Products": grouped_related
}



In [49]:
print(len(all_products )) 
print(len(clean_prices) )
print(len(all_descriptions) )
print(len(all_categories))
print(len(all_tags)) 
print(len(all_stock_status)) 
print(len(grouped_related))

755
755
755
755
755
755
755


In [50]:
df = pd.DataFrame(data)

# Save to CSV
df.to_csv("scraped_products.csv", index=False)