In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

base_url = "https://www.shl.com"
catalog_url = f"{base_url}/products/product-catalog/?start=372&type=1&type=1"

def extract_field(soup, heading_text):
    header = soup.find('h4', string=lambda s: s and heading_text.lower() in s.lower())
    if header:
        p_tag = header.find_next_sibling('p')
        if p_tag:
            return p_tag.text.strip()
    return ''

response = requests.get(catalog_url)
soup = BeautifulSoup(response.text, 'html.parser')
rows = soup.find_all('tr')

data = []

for row in rows:
    product = {}

    title_cell = row.find('td', class_='custom__table-heading__title')
    if not title_cell:
        continue
    link_tag = title_cell.find('a')
    product['Assessment Name'] = link_tag.text.strip() if link_tag else ''
    relative_link = link_tag['href'].strip() if link_tag and link_tag.get('href') else ''
    full_link = base_url + relative_link
    product['URL'] = full_link

    general_cells = row.find_all('td', class_='custom__table-heading__general')
    for i, cell in enumerate(general_cells[:2], start=1):
        yes_span = cell.find('span', class_='catalogue__circle -yes')
        product[f'Column_{i}'] = 'Yes' if yes_span else 'No'

    key_cell = row.find('td', class_='product-catalogue__keys')
    keys = [span.text.strip() for span in key_cell.find_all('span', class_='product-catalogue__key')] if key_cell else []
    product['Keys'] = ', '.join(keys)

    try:
        detail_response = requests.get(full_link)
        detail_soup = BeautifulSoup(detail_response.text, 'html.parser')
        product['Description'] = extract_field(detail_soup, 'Description')
        product['Languages'] = extract_field(detail_soup, 'Languages')
        product['Test Duration'] = extract_field(detail_soup, 'Assessment length')
        product["Job Lelevels"] = extract_field(detail_soup, 'Job levels')
    except Exception as e:
        print(f"Error fetching details from {full_link}: {e}")
        product['Description'] = ''
        product['Languages'] = ''
        product['Test Duration'] = ''
        product["Job Lelevels"] = ''

    data.append(product)
    
    time.sleep(0.5)

df = pd.DataFrame(data)
df.to_csv("shl_full_catalog.csv", index=False)
print("file saved")


file saved
