In [4]:
import requests
from bs4 import BeautifulSoup
import time
import csv

def fetch_url(url,  headers):
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.text

def parse_html(html):
    return BeautifulSoup(html, 'html.parser')

def get_brands(soup):
    brand_links = [a['href'] for a in soup.select('.item a')]
    return [base_url + link for link in brand_links]

def get_models(brand_url, session, headers):
    html = fetch_url(brand_url, session, headers)
    soup = parse_html(html)
    model_links = [a['href'] for a in soup.select('a.accordion-toggle, .in td a')]
    return [base_url + link for link in model_links]

def get_car_names(model_url, session, headers):
    html = fetch_url(model_url, session, headers)
    soup = parse_html(html)
    carname_links = [a['href'] for a in soup.select('[data-title="Name"] a')]
    return [base_url + link for link in carname_links]

def get_groups_link(carname_url, session, headers):
    html = fetch_url(carname_url, session, headers)
    soup = parse_html(html)
    groups_link = soup.select_one('.nav-tabs li:nth-of-type(3) a')
    if groups_link:
        return base_url + groups_link['href']
    return None

def get_group_details(groups_link_url, session, headers):
    html = fetch_url(groups_link_url, session, headers)
    soup = parse_html(html)
    groupnames = [td.get_text(strip=True) for td in soup.select('tr.treegrid-collapsed:nth-of-type(n+2) td')]
    group_links = [a['href'] for a in soup.select('td a')]
    group_links = [base_url + link for link in group_links]
    return groupnames, group_links

def get_parts(group_link_url, session, headers):
    html = fetch_url(group_link_url, session, headers)
    soup = parse_html(html)
    part_rows = soup.select('tr.part-search-tr')
    parts = []
    for row in part_rows:
        part_number = row.select_one('a').get_text(strip=True)
        part_name = row.select_one('td:nth-of-type(2)').get_text(strip=True)
        part_image_code = row.select_one('td.codeonimage').get_text(strip=True)
        part_amount_image = row.select_one('td:nth-of-type(4)').get_text(strip=True)
        parts.append({
            'part_number': part_number,
            'part_name': part_name,
            'part_image_code': part_image_code,
            'part_amount_image': part_amount_image
        })
    return parts

base_url = "https://partsouq.com"
start_url = f"{base_url}/"

headers = {
    'Accept':
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
,'Accept-Encoding':
'gzip, deflate, br, zstd',
'Accept-Language':
'en-US,en;q=0.9,fr;q=0.8',

    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
}


html = fetch_url(start_url,  headers)
soup = parse_html(html)
brands = get_brands(soup)

with open('parts_data.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Brand', 'Model', 'Car Name', 'Group Name', 'Part Number', 'Part Name', 'Part Image Code', 'Part Amount Image'])

    for brand in brands:
        brand_html = fetch_url(brand, session, headers)
        brand_soup = parse_html(brand_html)
        models = get_models(brand, session, headers)

        for model in models:
            model_html = fetch_url(model, session, headers)
            model_soup = parse_html(model_html)
            carnames = get_car_names(model, session, headers)

            for carname in carnames:
                carname_html = fetch_url(carname, session, headers)
                carname_soup = parse_html(carname_html)
                groups_link = get_groups_link(carname, session, headers)

                if groups_link:
                    groupnames, group_links = get_group_details(groups_link, session, headers)

                    for groupname, group_link in zip(groupnames, group_links):
                        parts = get_parts(group_link, session, headers)

                        for part in parts:
                            writer.writerow([brand, model, carname, groupname, part['part_number'], part['part_name'], part['part_image_code'], part['part_amount_image']])

                        time.sleep(1)  # Be polite and avoid overwhelming the server
                time.sleep(1)
            time.sleep(1)
        time.sleep(1)


HTTPError: 403 Client Error: Forbidden for url: https://partsouq.com/