In [5]:
import traceback
from asyncio import sleep
from dataclasses import dataclass

import requests_async as r
from bs4 import BeautifulSoup
import re

In [6]:
@dataclass
class CpuSpecs:
    clock: float
    cores: int
    threads: int | None
    multithread_score: int
    singlethread_score: int

In [7]:
async def get_cpu_specs(cpu_id: int) -> CpuSpecs:
    res = await r.get(
        "https://www.cpubenchmark.net/cpu.php?id=" + str(cpu_id),
        headers={
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0",
        }
    )

    if res.status_code != 200:
        print("Request for " + res.url + " returned status code: " + res.status_code)
        raise Exception()


    soup = BeautifulSoup(res.content, "html.parser")
    desc_body = soup.find("div", class_="desc-body")

    cores_threads = desc_body.find(lambda tag: tag.name == "p" and "cores:" in tag.text.lower()).text

    if "total" in cores_threads.lower() or "perform" in cores_threads.lower() or "efficien" in cores_threads.lower():
        clockspeed = desc_body.find(lambda tag: tag.name == "p" and "GHz" in tag.text).text
        clockspeed = re.search(r'[\d\.]+[\s\w]*(Base)', clockspeed).group()

        cores = re.search(r'(\d+)\s+(Cores)', cores_threads).group()
        threads = re.search(r'(\d+)\s+(Threads)', cores_threads)
        threads = threads.group() if threads else None
    else:
        clockspeed = desc_body.find(lambda tag: tag.name == "p" and "Clockspeed" in tag.text).text

        cores = re.search(r'(Cores:\s*)(\d+)', cores_threads).group()
        threads = re.search(r'(Threads:\s*)(\d+)', cores_threads)
        threads = threads.group() if threads else None

    right_desc = soup.find("div", class_="right-desc")
    multithread = int(right_desc.find(lambda tag: tag.name == "div" and "multithread rating" in tag.text.lower()).find_next_sibling("div").text)
    singlethread = int(right_desc.find(lambda tag: tag.name == "div" and "single thread rating" in tag.text.lower()).find_next_sibling("div").text)

    clockspeed = float(re.search(r'[\d\.]+', clockspeed).group())
    cores = int(re.search(r'\d+', cores).group())
    threads = int(re.search(r'\d+', threads).group()) if threads else None

    specs = CpuSpecs(
        cores=cores,
        threads=threads,
        clock=clockspeed,
        multithread_score=multithread,
        singlethread_score=singlethread,
    )

    return specs

specs = await get_cpu_specs(2657)

In [None]:
res = await r.get(
    "https://www.cpubenchmark.net/cpu_list.php",
    headers={
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0",
    }
)

soup = BeautifulSoup(res.content, "html.parser")

cpu_list = soup.find_all("tr", id=re.compile(r'^cpu\d+$'))


def parse_id(tag):
    cpu = tag.find("a").get("href")
    cpu = re.search(r'id=\d+', cpu).group()
    cpu = int(re.search(r'\d+', cpu).group())
    return cpu

cpu_list = map(parse_id, cpu_list)
for cpu in cpu_list:
    try:
        specs = await get_cpu_specs(cpu)
    except Exception as e:
        print("Failed to parse specs for cpu with id: " + str(cpu))
        print(traceback.format_exc())

    await sleep(0.125)


Failed to parse specs for cpu with id: 7132
Traceback (most recent call last):
  File "/tmp/ipykernel_1229088/3215590539.py", line 22, in <module>
    specs = await get_cpu_specs(cpu)
            ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_1229088/1960977931.py", line 27, in get_cpu_specs
    clockspeed = desc_body.find(lambda tag: tag.name == "p" and "Clockspeed" in tag.text).text
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'NoneType' object has no attribute 'text'

Failed to parse specs for cpu with id: 7133
Traceback (most recent call last):
  File "/tmp/ipykernel_1229088/3215590539.py", line 22, in <module>
    specs = await get_cpu_specs(cpu)
            ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_1229088/1960977931.py", line 27, in get_cpu_specs
    clockspeed = desc_body.find(lambda tag: tag.name == "p" and "Clockspeed" in tag.text).text
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^