In [None]:
import sys
from pathlib import Path
from collections import defaultdict
from pprint import pprint
import json
from tinydb import Query, TinyDB

sys.path.append(str(Path.cwd().parent))
from pokeca_rec.src.pokecabook_crawler import crawl_gym_decks
from pokeca_rec.src.deck_categorizer import DeckCategorizer
from pokeca_rec.src.official_crawler import crawl_result_pages

In [None]:
deck_categorizer = DeckCategorizer(category_db_path="../db/deck_category.json")

### Get decks from gym

In [None]:
gym_decks = crawl_gym_decks(
     page_start=1,
     num_pages=1,
     progress_bar_lv1=True,
     progress_bar_lv2=True,
     card_db="../db/ptcg_card.db"
)

In [None]:
db_file = "../db/gym_deck_db.json"
DB = TinyDB(db_file, indent=4, ensure_ascii=False)
DB.drop_tables()

gym_decks_refine = {}
for date, decks in gym_decks.items():
    for _, deck_list in decks.items():
        for deck in deck_list:
            categ = deck_categorizer(deck)
            categ = categ if categ else "Others"
            document = {"date": date, "category": categ, "deck": deck}
            DB.insert(document)

### Get decks from city league

In [None]:
import re

def datetime_transform(input_str):
    # example input_str = "2024年04月06日(土)"

    # Regular expression pattern to extract year, month, and day
    pattern = r"(\d{4})年(\d{2})月(\d{2})日"

    # Use re.search() to find matches
    match = re.search(pattern, input_str)

    if match:
        # Extract year, month, and day from the match groups
        year, month, day = match.groups()
        
        # Format the output string
        output_str = f"{year}.{month}.{day}"
        
        return output_str
    else:
        return input_str

In [None]:
city_decks = crawl_result_pages(
    league="City",
    result_page_limit=1,
    deck_page_limit=1,
    start_page_num=1,
    card_db="../db/ptcg_card.db"
)

In [None]:
db_file = "../db/city_deck_db.json"
DB = TinyDB(db_file, indent=4, ensure_ascii=False)
DB.drop_tables()

for date, decks in city_decks.items():
    date = datetime_transform(date)
    for deck in decks:
        temp = {
            'pokemons': deck['pokemons'],
            'tools': deck['tools'],
            'supporters': deck['supporters'],
            'stadiums': deck['stadiums'],
            'energies': deck['energies'],
        }
        categ = deck_categorizer(temp)
        categ = categ if categ else "Others"
        document = {"date": date, "category": categ, "deck": temp}
        DB.insert(document)
