In [1]:
import re

from top_bgco import RawEntry, get_url_for_chapter, DB, build_raw_data
from typing import Dict, List

# Init the DB
db = DB("lite.db")

In [None]:
#
# 2022
#
data_2022 = [
    "https://www.youtube.com/watch?v=7Y8NuZxZCxM",
    "https://www.youtube.com/watch?v=oHCpZ3KXvSs",
    "https://www.youtube.com/watch?v=BVdTCNJytgk",
    "https://www.youtube.com/watch?v=K3k0Ig901wc",
    "https://www.youtube.com/watch?v=WcQDObGcON8",
    "https://www.youtube.com/watch?v=4dWCVA3bZ1k",
    "https://www.youtube.com/watch?v=yC4Zo-JSjLk",
    "https://www.youtube.com/watch?v=CW2SD-FROqU",
    "https://www.youtube.com/watch?v=0ouNMQi4wp8",
    "https://www.youtube.com/watch?v=McwVKKoV2ws",
]


def parse_2022(chapter: Dict, video_id: str, year: int) -> RawEntry:
    """This parser works well with the format of the 2022 top 100 videos."""
    regex = r"(\d*)\ (Devon|Alex|Meg)\ ?-\ (.*)"
    matches = re.finditer(regex, chapter["title"])
    position, who, game = list(matches)[0].groups()
    url = get_url_for_chapter(video_id, chapter["time"])
    return RawEntry(year, who, game, int(position), url)


def before_return_hook_2022(entries: List[RawEntry]) -> List[RawEntry]:
    to_add = [
        # Per questo manca proprio il chapter
        RawEntry(
            2022,
            "Meg",
            "The Guild of Merchant Explorers",
            75,
            "https://www.youtube.com/watch?v=BVdTCNJytgk&t=2240",
        ),
    ]

    def skip(entry: RawEntry) -> RawEntry:
        return entry not in [
            # Qua hanno solo chiacchierato, lo ripetono piu' avanti
            RawEntry(
                2022,
                "Devon",
                "Vindication",
                50,
                "https://www.youtube.com/watch?v=4dWCVA3bZ1k&t=65",
            ),
        ]

    def fix(entry: RawEntry) -> RawEntry:
        # Questo ha il nome sbagliato
        if entry == RawEntry(
                2022,
                "Alex",
                "Welcome To...",
                92,
                "https://www.youtube.com/watch?v=7Y8NuZxZCxM&t=3356",
        ):
            entry.name = "Devon"
        return entry

    return list(filter(skip, map(fix, entries))) + to_add

# Parse and build raw data
raw_data_2022 = build_raw_data(videos_data=data_2022, year=2022, parser=parse_2022, before_return_hook=before_return_hook_2022)
# Add it to the db
db.add_from_raw_data(raw_data_2022)

In [None]:
#
# 2020
#
data_2020 = [
    "https://www.youtube.com/watch?v=lBZ4myP_-Is",
    "https://www.youtube.com/watch?v=oSycUrG3hOI",
    "https://www.youtube.com/watch?v=ZIm1SGQCbr8",
    ("https://www.youtube.com/watch?v=tbZKxpK9s3s", 2, -1),
    "https://www.youtube.com/watch?v=Cf7n1zsrWJI",
]

def parse_2020(chapter: Dict, video_id: str, year: int) -> RawEntry:
    """This parser works well with the format of the 2020 top 50 videos."""
    regex = r"(\d*)\ -\ (.*)"
    matches = re.finditer(regex, chapter["title"])
    position, game = list(matches)[0].groups()
    url = get_url_for_chapter(video_id, chapter["time"])
    return RawEntry(year, "Alex", game, int(position), url)

def before_return_hook_2020(entries: List[RawEntry]) -> List[RawEntry]:
    return entries

# Parse and build raw data
raw_data_2020 = build_raw_data(videos_data=data_2020, year=2020, parser=parse_2020, before_return_hook=before_return_hook_2020)
# Add it to the db
db.add_from_raw_data(raw_data_2020)

In [6]:
#
# 2021
#
data_2021 = [
    ('https://www.youtube.com/watch?v=QnQQ2igVvmg', 4, -1),
    ('https://www.youtube.com/watch?v=L_5kNoP8fAk', 3, -1),
    ('https://www.youtube.com/watch?v=fj7zIwzLM1Y', 3, -1),
    ('https://www.youtube.com/watch?v=05vYqiT3d2A', 3, -1),
    ('https://www.youtube.com/watch?v=qXWeuzhDy0A', 2, -1),
    ('https://www.youtube.com/watch?v=bgyYefZ3GhE', 4, -1),
    ('https://www.youtube.com/watch?v=BC7TLbG5fvc', 3, -1),
    ('https://www.youtube.com/watch?v=B4rXsEP8VDQ', 3, -1),
    ('https://www.youtube.com/watch?v=nttm4rAZPes', 2, -1),
    ('https://www.youtube.com/watch?v=4tQNDSqc2R4', 2, -1)
]

from functools import lru_cache
import requests

@lru_cache(maxsize=None)
def get_video_data(video_id: str) -> str:
    resp_data = requests.get(f"https://yt.lemnoslife.com/noKey/videos?part=snippet&id={video_id}").json()
    resp_chapters = requests.get(f"https://yt.lemnoslife.com/videos?part=chapters&id={video_id}").json()
    return resp_data, resp_chapters


def parse_2021(chapter: Dict, video_id: str, year: int) -> RawEntry:
    """This parser works well with the format of the 2021 top 100 videos."""
    video_data, video_chapters = get_video_data(video_id)
    video_title = video_data["items"][0]["snippet"]["title"]

    # find the starting point
    regex = r"From\ (\d*)\ to\ (\d*)"
    matches = re.finditer(regex, video_title)
    _from, _to = list(matches)[0].groups()

    # find the index
    index = 0
    for i, c in enumerate(video_chapters["items"][0]["chapters"]["chapters"]):
        if c["title"] == chapter["title"]:
            index = i
            break

    # find the offset
    start_from = next(filter(lambda x: x[0] == f"https://www.youtube.com/watch?v={video_id}", map(lambda x: (x, 1, -1) if type(x) == str else x, data_2021)))[1]
    offset = index - start_from

    # calculate the position
    position = int(_from) - offset

    url = get_url_for_chapter(video_id, chapter["time"])
    return RawEntry(year, "Alex", chapter['title'], position, url)

def before_return_hook_2021(entries: List[RawEntry]) -> List[RawEntry]:
    return entries

# Parse and build raw data
raw_data_2021 = build_raw_data(videos_data=data_2021, year=2021, parser=parse_2021, before_return_hook=before_return_hook_2021)
# Add it to the db
db.add_from_raw_data(raw_data_2021)

Should call this function only ONCE for QnQQ2igVvmg!
Should call this function only ONCE for L_5kNoP8fAk!
Should call this function only ONCE for fj7zIwzLM1Y!
Should call this function only ONCE for 05vYqiT3d2A!
Should call this function only ONCE for qXWeuzhDy0A!
Should call this function only ONCE for bgyYefZ3GhE!
Should call this function only ONCE for BC7TLbG5fvc!
Should call this function only ONCE for B4rXsEP8VDQ!
Should call this function only ONCE for nttm4rAZPes!
Should call this function only ONCE for 4tQNDSqc2R4!
