In [None]:
from parse_deck import parse_event_to_deck

In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

import time
import re
from tqdm import tqdm

chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--start-maximized")

In [None]:
def wait_loading_circle(timeout: int = 20):
    WebDriverWait(driver, 20).until(EC.invisibility_of_element_located((By.XPATH, "//div[@class='sk-circle-container']")))
    

In [None]:
# parse CL event links from official website
url = "https://players.pokemon-card.com/event/result/list"
driver = webdriver.Chrome(options=chrome_options)  # options=chrome_options
driver.implicitly_wait(10) # seconds
driver.get(url)

page_limit = 5
event_limit = 20
cl_events = []

# decks = {
#     deck category 1: [
#         {
#             deck_link: "",
#             deck_code: "",
#             pokemons: {},
#             tools: {},
#             supporters: {},
#             stages: {},
#             energies: {},
#             rank: 1,
#             num_people: 32,
#             date: datetime
#         }, ...
#     ],
#     deck category 2:...
# }
all_categories = ["ルギアVSTAR", "ミュウVMAX", "ジュラルドンVMAX", "ゾロア", "others"]
decks = {}

page_cnt = 0
event_cnt = 0
while 1:
    events = driver.find_elements(By.CLASS_NAME, "eventListItem")
    for event in tqdm(events):
        title = event.find_element(By.CLASS_NAME, "title")
        if "シティリーグ" in title.text:
            num_people_str = event.find_element(By.CLASS_NAME, "capacity").text
            num_people = re.findall(r'\d+', num_people_str)
            num_people = int(num_people[0]) if len(num_people) == 1 else None
            event_link = event.get_attribute("href")
            parse_event_to_deck(event_link, num_people, decks, all_categories)
            event_cnt += 1
    page_cnt += 1

    if page_cnt >= page_limit or event_cnt >= event_limit:
        break

    # nevigate to the next page
    driver.find_element(By.CLASS_NAME, "btn.next").click()
    wait_loading_circle()

driver.close()

In [None]:
# len(decks)

In [None]:
# deck_name = []
# for d in decks["others"]:
#     lead = list(d["pokemons"].keys())[0]
#     if lead not in deck_name:
#         deck_name.append(lead)
        
# print(deck_name)

In [None]:
# for d in decks["others"]:
#     lead = list(d["pokemons"].keys())[0]
#     if lead == "ゾロア":
#         print(d)
#         print(d["deck_link"])
#         print("\n---\n")

In [29]:
import pandas as pd
from collections import OrderedDict

target_category = "ゾロア"
df_list = []

for card_type in ["pokemons", "tools", "supporters", "stages", "energies"]:
    # df init
    df = pd.DataFrame()
    for _, deck in enumerate(decks[target_category]):
        deck_code = deck["deck_code"]  # row id
        pokecard = OrderedDict()
        pokecard["date"] = deck["date"]
        pokecard["num_people"] = deck["num_people"]
        pokecard["rank"] = deck["rank"]
        pokecard.update(deck[card_type])

        if _ == 0:
            df = pd.DataFrame(pokecard, index=[deck_code])
        else:
            df.loc[deck_code] = pokecard
    df = df.fillna(0)

    # select cols for analysis
    col_list = list(df)
    col_list.remove("date")
    col_list.remove("num_people")
    col_list.remove("rank")

    # calculate
    num_decks = df.shape[0]
    num_used = df[col_list].sum(axis='rows', numeric_only=True)
    num_picked = df[col_list].astype(bool).sum(axis='rows')
    
    df.loc["avg_num_used"] = {}
    df.loc["pick_rate"] = {}
    avg_num_used = num_used / num_picked
    pick_rate = num_picked / num_decks
    for col in col_list:
        df.loc["avg_num_used", col] = avg_num_used[col]
        df.loc["pick_rate", col] = pick_rate[col]

    # reorder rows in df
    num_rows = df.shape[0]
    target_rows = [num_rows-1, num_rows-2]
    idx = target_rows + [i for i in range(len(df)) if i not in target_rows]
    df = df.iloc[idx]
    
    df_list.append(df)

In [30]:
df_list[0]

Unnamed: 0,date,num_people,rank,ゾロア,ゾロアーク,チラーミィ,チラチーノ,マナフィ,バサギリ,ライチュウ,テールナー,グラエナ,タルップル,ヤドラン,ヒスイ ゾロアーク,かがやくジラーチ
pick_rate,,,,1.0,1.0,0.75,0.75,1.0,0.875,1.0,0.75,1.0,1.0,1.0,0.375,0.5
avg_num_used,,,,4.0,4.0,3.333333,2.833333,1.25,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
LnQLQn-FLn7fe-nLnnNn,2022年11月23日(水),48.0,5.0,4.0,4.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
yyyM2M-Zgfdkb-SME22M,2022年11月20日(日),32.0,5.0,4.0,4.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2MyMyy-c1qVpR-Sy3MEM,2022年11月20日(日),48.0,2.0,4.0,4.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
RMXyM3-PaMcu0-yMypM2,2022年11月20日(日),48.0,5.0,4.0,4.0,4.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0
88xxc8-UAuRpB-8Yxc4Y,2022年11月20日(日),76.0,5.0,4.0,4.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0
XRMpyp-lHu00T-MMyMRy,2022年11月20日(日),32.0,1.0,4.0,4.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0
VVFkkV-HetqHG-1dvF5k,2022年11月20日(日),34.0,5.0,4.0,4.0,3.0,3.0,2.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0
kkkkdw-bWIMXK-VVvFVw,2022年11月20日(日),32.0,5.0,4.0,4.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0


In [31]:
df_list[1]

Unnamed: 0,date,num_people,rank,ハイパーボール,レベルボール,しんかのおこう,ふうせん,ふつうのつりざお,レスキューキャリー,やまびこホーン,ロストスイーパー
pick_rate,,,,0.875,1.0,1.0,0.25,1.0,1.0,0.25,0.125
avg_num_used,,,,2.571429,3.75,3.5,1.0,2.0,1.5,1.0,1.0
LnQLQn-FLn7fe-nLnnNn,2022年11月23日(水),48.0,5.0,3.0,4.0,3.0,1.0,2.0,1.0,1.0,1.0
yyyM2M-Zgfdkb-SME22M,2022年11月20日(日),32.0,5.0,1.0,4.0,4.0,0.0,2.0,2.0,0.0,0.0
2MyMyy-c1qVpR-Sy3MEM,2022年11月20日(日),48.0,2.0,0.0,4.0,3.0,0.0,1.0,2.0,0.0,0.0
RMXyM3-PaMcu0-yMypM2,2022年11月20日(日),48.0,5.0,1.0,4.0,4.0,0.0,2.0,2.0,0.0,0.0
88xxc8-UAuRpB-8Yxc4Y,2022年11月20日(日),76.0,5.0,4.0,3.0,4.0,0.0,2.0,1.0,0.0,0.0
XRMpyp-lHu00T-MMyMRy,2022年11月20日(日),32.0,1.0,3.0,4.0,3.0,1.0,2.0,1.0,1.0,0.0
VVFkkV-HetqHG-1dvF5k,2022年11月20日(日),34.0,5.0,2.0,4.0,3.0,0.0,2.0,2.0,0.0,0.0
kkkkdw-bWIMXK-VVvFVw,2022年11月20日(日),32.0,5.0,4.0,3.0,4.0,0.0,3.0,1.0,0.0,0.0


In [32]:
df_list[2]

Unnamed: 0,date,num_people,rank,セレナ,博士の研究,マリィ,ボスの指令
pick_rate,,,,1.0,0.875,0.75,0.75
avg_num_used,,,,3.625,3.142857,1.666667,1.166667
LnQLQn-FLn7fe-nLnnNn,2022年11月23日(水),48.0,5.0,4.0,2.0,2.0,1.0
yyyM2M-Zgfdkb-SME22M,2022年11月20日(日),32.0,5.0,4.0,3.0,2.0,1.0
2MyMyy-c1qVpR-Sy3MEM,2022年11月20日(日),48.0,2.0,4.0,3.0,2.0,1.0
RMXyM3-PaMcu0-yMypM2,2022年11月20日(日),48.0,5.0,4.0,3.0,0.0,1.0
88xxc8-UAuRpB-8Yxc4Y,2022年11月20日(日),76.0,5.0,4.0,4.0,1.0,0.0
XRMpyp-lHu00T-MMyMRy,2022年11月20日(日),32.0,1.0,4.0,3.0,1.0,1.0
VVFkkV-HetqHG-1dvF5k,2022年11月20日(日),34.0,5.0,3.0,4.0,0.0,0.0
kkkkdw-bWIMXK-VVvFVw,2022年11月20日(日),32.0,5.0,2.0,0.0,2.0,2.0


In [33]:
df_list[3]

Unnamed: 0,date,num_people,rank,頂への雪道
pick_rate,,,,0.75
avg_num_used,,,,1.666667
LnQLQn-FLn7fe-nLnnNn,2022年11月23日(水),48.0,5.0,2.0
yyyM2M-Zgfdkb-SME22M,2022年11月20日(日),32.0,5.0,3.0
2MyMyy-c1qVpR-Sy3MEM,2022年11月20日(日),48.0,2.0,1.0
RMXyM3-PaMcu0-yMypM2,2022年11月20日(日),48.0,5.0,0.0
88xxc8-UAuRpB-8Yxc4Y,2022年11月20日(日),76.0,5.0,1.0
XRMpyp-lHu00T-MMyMRy,2022年11月20日(日),32.0,1.0,2.0
VVFkkV-HetqHG-1dvF5k,2022年11月20日(日),34.0,5.0,0.0
kkkkdw-bWIMXK-VVvFVw,2022年11月20日(日),32.0,5.0,1.0


In [34]:
df_list[4]

Unnamed: 0,date,num_people,rank,ダブルターボエネルギー,キャプチャーエネルギー,ツインエネルギー,ギフトエネルギー
pick_rate,,,,0.875,1.0,1.0,0.125
avg_num_used,,,,2.0,3.125,4.0,1.0
LnQLQn-FLn7fe-nLnnNn,2022年11月23日(水),48.0,5.0,2.0,3.0,4.0,1.0
yyyM2M-Zgfdkb-SME22M,2022年11月20日(日),32.0,5.0,3.0,3.0,4.0,0.0
2MyMyy-c1qVpR-Sy3MEM,2022年11月20日(日),48.0,2.0,1.0,2.0,4.0,0.0
RMXyM3-PaMcu0-yMypM2,2022年11月20日(日),48.0,5.0,1.0,4.0,4.0,0.0
88xxc8-UAuRpB-8Yxc4Y,2022年11月20日(日),76.0,5.0,1.0,4.0,4.0,0.0
XRMpyp-lHu00T-MMyMRy,2022年11月20日(日),32.0,1.0,4.0,2.0,4.0,0.0
VVFkkV-HetqHG-1dvF5k,2022年11月20日(日),34.0,5.0,2.0,4.0,4.0,0.0
kkkkdw-bWIMXK-VVvFVw,2022年11月20日(日),32.0,5.0,0.0,3.0,4.0,0.0
