In [None]:
import requests, pandas as pd, time
from bs4 import BeautifulSoup
from urllib.parse import urljoin


In [4]:
!pip install tqdm



In [10]:
base_url = "https://www.shl.com"
catalog_base = "https://www.shl.com/products/product-catalog/?start={}&type=1"
headers = {"User-Agent": "Mozilla/5.0"}

In [None]:
def extract_description_duration_joblevel(detail_url):
    """
    Extracts description, duration, and job levels from an SHL assessment detail page.
    Returns (description, duration, job_levels)
    """
    try:
        res = requests.get(detail_url, headers=headers, timeout=15)
        soup = BeautifulSoup(res.text, "html.parser")

        description = "N/A"
        try:
            desc_div = soup.find('div', class_='product-catalogue-training-calendar__row')
            if desc_div:
                desc_h4 = desc_div.find('h4', string=lambda t: t and 'Description' in t)
                if desc_h4:
                    desc_p = desc_h4.find_next('p')
                    if desc_p:
                        description = desc_p.text.strip()
        except Exception:
            pass

        duration = "N/A"
        try:
            dur_div = soup.find('div', class_='product-catalogue-training-calendar__row')
            if dur_div:
                dur_h4 = soup.find('h4', string=lambda t: t and 'Assessment length' in t)
                if dur_h4:
                    dur_p = dur_h4.find_next('p')
                    if dur_p:
                        duration_text = dur_p.text.strip()
                        parts = duration_text.split('=')
                        if len(parts) > 1:
                            try:
                                duration = int(parts[1].strip().split()[0])
                            except ValueError:
                                duration = parts[1].strip()
                        else:
                            duration = duration_text
        except Exception:
            pass

        job_levels = "N/A"
        try:
            job_div = soup.find('div', class_='product-catalogue-training-calendar__row')
            if job_div:
                job_h4 = soup.find('h4', string=lambda t: t and 'Job levels' in t)
                if job_h4:
                    job_p = job_h4.find_next('p')
                    if job_p:
                        job_levels = job_p.text.strip()
        except Exception:
            pass

        return description, duration, job_levels

    except Exception as e:
        print(f" Error fetching {detail_url}: {e}")
        return "N/A", "N/A", "N/A"


In [None]:
from tqdm import tqdm
all_data=[]

for page_start in range(0, 384, 12):
    page_url = catalog_base.format(page_start)
    print(f"\n üåç Scraping page: {page_url}")

    res = requests.get(page_url, headers=headers, timeout=15)
    soup = BeautifulSoup(res.text, "html.parser")

    rows = soup.find_all("tr", attrs={"data-entity-id": True})
    print(f"   Found {len(rows)} assessments on this page")

    for tr in tqdm(rows, desc=f"Page {page_start//12 + 1} Progress", unit="test", colour="green"):
        name_td = tr.find("td", class_="custom__table-heading__title")
        a_tag = name_td.find("a") if name_td else None
        name = a_tag.get_text(strip=True) if a_tag else ""
        rel_url = a_tag["href"] if a_tag and a_tag.has_attr("href") else ""
        full_url = urljoin(base_url, rel_url)


        remote_td = tr.find_all("td", class_="custom__table-heading__general")[0]
        remote_span = remote_td.find("span", class_="catalogue__circle")
        remote_testing = "Yes" if remote_span and "-yes" in remote_span.get("class", []) else "No"

        adaptive_td = tr.find_all("td", class_="custom__table-heading__general")[1]
        adaptive_span = adaptive_td.find("span", class_="catalogue__circle")
        adaptive_testing = "Yes" if adaptive_span and "-yes" in adaptive_span.get("class", []) else "No"

        test_td = tr.find("td", class_="custom__table-heading__general product-catalogue__keys")
        test_spans = test_td.find_all("span", class_="product-catalogue__key") if test_td else []
        test_type = ", ".join([s.get_text(strip=True) for s in test_spans]) if test_spans else "N/A"

        description, duration, job_levels= extract_description_duration_joblevel(full_url)
        time.sleep(1.0)

        all_data.append({
            "assessment_name": name,
            "url": full_url,
            "remote_testing": remote_testing,
            "adaptive_testing": adaptive_testing,
            "test_type": test_type,
            "description": description,
            "duration": duration,
            "job_levels": job_levels
        })


        df_temp = pd.DataFrame(all_data)
        df_temp.to_csv("shl_progress.csv", index=False, encoding="utf-8-sig")
        print(f"üíæ Saved progress: {len(all_data)} records total so far")




üåç Scraping page: https://www.shl.com/products/product-catalog/?start=0&type=1
   Found 12 assessments on this page


Page 1 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.06s/test]


üíæ Saved progress: 12 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=12&type=1
   Found 12 assessments on this page


Page 2 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:35<00:00,  3.00s/test]


üíæ Saved progress: 24 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=24&type=1
   Found 12 assessments on this page


Page 3 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.01s/test]


üíæ Saved progress: 36 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=36&type=1
   Found 12 assessments on this page


Page 4 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.04s/test]


üíæ Saved progress: 48 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=48&type=1
   Found 12 assessments on this page


Page 5 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.06s/test]


üíæ Saved progress: 60 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=60&type=1
   Found 12 assessments on this page


Page 6 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.01s/test]


üíæ Saved progress: 72 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=72&type=1
   Found 12 assessments on this page


Page 7 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.04s/test]


üíæ Saved progress: 84 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=84&type=1
   Found 12 assessments on this page


Page 8 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.05s/test]


üíæ Saved progress: 96 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=96&type=1
   Found 12 assessments on this page


Page 9 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.02s/test]


üíæ Saved progress: 108 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=108&type=1
   Found 12 assessments on this page


Page 10 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.04s/test]


üíæ Saved progress: 120 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=120&type=1
   Found 12 assessments on this page


Page 11 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:37<00:00,  3.10s/test]


üíæ Saved progress: 132 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=132&type=1
   Found 12 assessments on this page


Page 12 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:35<00:00,  2.97s/test]


üíæ Saved progress: 144 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=144&type=1
   Found 12 assessments on this page


Page 13 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.03s/test]


üíæ Saved progress: 156 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=156&type=1
   Found 12 assessments on this page


Page 14 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.02s/test]


üíæ Saved progress: 168 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=168&type=1
   Found 12 assessments on this page


Page 15 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:37<00:00,  3.10s/test]


üíæ Saved progress: 180 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=180&type=1
   Found 12 assessments on this page


Page 16 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:35<00:00,  3.00s/test]


üíæ Saved progress: 192 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=192&type=1
   Found 12 assessments on this page


Page 17 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.07s/test]


üíæ Saved progress: 204 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=204&type=1
   Found 12 assessments on this page


Page 18 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:37<00:00,  3.17s/test]


üíæ Saved progress: 216 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=216&type=1
   Found 12 assessments on this page


Page 19 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.04s/test]


üíæ Saved progress: 228 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=228&type=1
   Found 12 assessments on this page


Page 20 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:35<00:00,  2.96s/test]


üíæ Saved progress: 240 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=240&type=1
   Found 12 assessments on this page


Page 21 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.01s/test]


üíæ Saved progress: 252 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=252&type=1
   Found 12 assessments on this page


Page 22 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:35<00:00,  2.95s/test]


üíæ Saved progress: 264 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=264&type=1
   Found 12 assessments on this page


Page 23 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:35<00:00,  2.98s/test]


üíæ Saved progress: 276 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=276&type=1
   Found 12 assessments on this page


Page 24 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:34<00:00,  2.92s/test]


üíæ Saved progress: 288 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=288&type=1
   Found 12 assessments on this page


Page 25 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:34<00:00,  2.90s/test]


üíæ Saved progress: 300 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=300&type=1
   Found 12 assessments on this page


Page 26 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.05s/test]


üíæ Saved progress: 312 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=312&type=1
   Found 12 assessments on this page


Page 27 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:35<00:00,  2.99s/test]


üíæ Saved progress: 324 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=324&type=1
   Found 12 assessments on this page


Page 28 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:35<00:00,  2.98s/test]


üíæ Saved progress: 336 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=336&type=1
   Found 12 assessments on this page


Page 29 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [01:02<00:00,  5.20s/test]


üíæ Saved progress: 348 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=348&type=1
   Found 12 assessments on this page


Page 30 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:36<00:00,  3.03s/test]


üíæ Saved progress: 360 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=360&type=1
   Found 12 assessments on this page


Page 31 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 12/12 [00:35<00:00,  2.99s/test]


üíæ Saved progress: 372 records total so far

üåç Scraping page: https://www.shl.com/products/product-catalog/?start=372&type=1
   Found 5 assessments on this page


Page 32 Progress: 100%|[32m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 5/5 [00:14<00:00,  2.87s/test]

üíæ Saved progress: 377 records total so far



