In [1]:
import asyncio
import functools
import random
import aiometer
import httpx
from selectolax.parser import HTMLParser
from tqdm.asyncio import trange, tqdm
from pydantic import BaseModel
from typing import List, Dict, Any, Optional
from enum import Enum, StrEnum

from rich import print as pprint

In [25]:
class Labels(StrEnum):
    origin = "Original"
    fan_fiction = "Fan Fiction"
    unknown = "Unknown"


class NovelData(BaseModel):
    title: str
    fiction_id: str
    link: str
    tags: List[str]
    lable: Labels
    following_count: int
    rating: float
    page_count: int
    view_count: int
    chapters_count: int
    last_update: int
    description: str
    description_hash: int


In [26]:
client = httpx.AsyncClient()

In [27]:
async def createTaskByPageNr(client: httpx.AsyncClient, pageNr: int):
    url = f"https://www.royalroad.com/fictions/search?page={pageNr}&orderBy=release_date"
    response = await client.get(url)
    return response

In [28]:
async def parsePage(response: httpx.Response, debug = False) -> list[NovelData]:
    if response.status_code != 200:
        return []
    tree = HTMLParser(response.text)
    novels = []

    for novel in tree.css("div.row.fiction-list-item"):

        title = novel.css_first("h2.fiction-title a").text() or "No Title"
        
        url = "https://www.royalroad.com" + novel.css_first('h2.fiction-title a').attributes['href'] or "No Link"
        
        fiction_id = url.split("/")[-2] or -1
        
        tags = [tag.text() for tag in novel.css('span.tags a')] or []
        
        lable = novel.css_first("span.label.label-default.label-sm.bg-blue-hoki").text() or Labels.unknown
        
        rating = novel.css_first('.star').attributes['title'] or -1
        
        following_count = int(novel.css_first("i.fa.fa-users").parent.css_first('span').text().replace('Followers', '').replace(",", "") or -1)
        
        page_count = int(novel.css_first("i.fa.fa-book").parent.css_first('span').text().replace('Pages', '').replace(",", "") or -1)
        
        view_count = int(novel.css_first("i.fa.fa-eye").parent.css_first('span').text().replace('Views', '').replace(",", "") or -1)
        
        chapter_count = int(novel.css_first("i.fa.fa-list").parent.css_first('span').text().replace('Chapters', '').replace(",", "") or -1)
        
        last_update = int(novel.css_first("i.fa.fa-calendar").parent.css_first('time').attributes['unixtime'] or -1)
        
        description = novel.css_first("div.margin-top-10.col-xs-12").text().strip() or "No Description"
        
        novels.append(NovelData(
            title=title,
            fiction_id=fiction_id,
            link=url,
            tags=tags,
            lable=lable,
            following_count=following_count,
            rating=rating,
            page_count=page_count,
            view_count=view_count,
            chapters_count=chapter_count,
            last_update=last_update,
            description=description,
            description_hash=hash(description)
        ))

    return novels

In [31]:
async def getNovelsByPageNr(client: httpx.AsyncClient, pageNr: int):
    response = await createTaskByPageNr(client, pageNr)
    return await parsePage(response)

In [30]:
testResponse = await createTaskByPageNr(client, 1)
pprint((await parsePage(testResponse))[0])

In [31]:
res = list()

async with httpx.AsyncClient() as client:
    tasks = [createTaskByPageNr(client, i) for i in range(1, 3)]
    res = await tqdm.gather(*tasks)

  async def try_connect(remote_host: str, event: Event) -> None:
100%|██████████| 2/2 [00:00<00:00,  8.16it/s]


In [1]:
pageNrs = list(range(1, 30))

async with aiometer.amap(
    functools.partial(getNovelsByPageNr, client),
    tqdm(pageNrs),
    max_at_once=5,
    max_per_second=2,
) as res:
    async for r in res:
        print(r)



NameError: name 'aiometer' is not defined

In [34]:
res[0].text

