In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def get_product_details(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract Product Code
    product_code_elem = soup.select_one('.key-information__sku')
    product_code = product_code_elem.get_text(strip=True).split(":")[-1].strip() if product_code_elem else None

    # Extract EAN
    ean_elem = soup.select_one('.key-information__ean')
    ean = ean_elem.get_text(strip=True).split(":")[-1].strip() if ean_elem else None

    # Extract Brand
    brand_elem = soup.select_one('.key-information__item span.key-information__label:contains("Brand:") + span')
    brand = brand_elem.get_text(strip=True) if brand_elem else None

    return {
        'product_code': product_code,
        'ean': ean,
        'brand': brand
    }

start_urls = [f"https://www.techbuyer.com/uk/computing/desktop-pc-s?p={i}" for i in range(1,9)]  # Adjust the range as needed

# Initialize empty lists to store data
names = []
image_links = []
product_codes = []
eans = []
brands = []

for url in start_urls:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    products = soup.select('ol.products__items > li.products__item')
    
    for product in products:
        name_elem = product.select_one('strong.products__item-name > a')
        name = name_elem.get_text(strip=True) if name_elem else None

        view_product_link = product.find('a', title='View Product')
        link = view_product_link['href'] if view_product_link and 'href' in view_product_link.attrs else None

        # Extract image link from src attribute
        image_elem = product.select_one('.products__image-photo')
        image_link = image_elem['src'] if image_elem else None

        if link:
            product_details = get_product_details(link)
        else:
            product_details = {'product_code': None, 'ean': None, 'brand': None}

        # Append data to lists
        names.append(name)
        image_links.append(image_link)
        product_codes.append(product_details['product_code'])
        eans.append(product_details['ean'])
        brands.append(product_details['brand'])

# Create DataFrame
data = {
    'name': names,
    'image_link': image_links,
    'product_code': product_codes,
    'ean': eans,
    'brand': brands
}

df = pd.DataFrame(data)




In [2]:
# Save DataFrame as Excel file
excel_file_path = "C:\SOBIN_CHACKO\Desktop.xlsx"
df.to_excel(excel_file_path, index=False)