In [23]:
import time
import re

import pandas as pd
from collections import OrderedDict

import json
import os

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [24]:
from deck_crawler.parse_deck import parse_events_from_official
from deck_crawler.parse_deck import reassign_category

In [25]:
# flags and parameters
RUN_PARSE = False
DEBUG_PARSE = False

NUM_RESULT_PAGE = 30
NUM_EVENT_PAGE = 1000
NUM_DECK_PAGE = 2

START_DATE = "2022年12月02日"  # include
END_DATE = "2023年1月04日"  # include

TARGET_CATEGORY = "LTB_カイリュー"

In [26]:
# create some folders
EXCEL_FOLDER = "excel"
DB_FOLDER = "deck_db"
LOG_FOLDER = "logs"
folders = [EXCEL_FOLDER, DB_FOLDER, LOG_FOLDER]

for folder in folders:
    if not os.path.exists(folder):
        os.makedirs(folder)

if not os.path.exists(f"{EXCEL_FOLDER}/{TARGET_CATEGORY}"):
    os.makedirs(f"{EXCEL_FOLDER}/{TARGET_CATEGORY}")

In [27]:
# loading db
decks = {}
store_file_name = "deck_db/test.json"
if not DEBUG_PARSE:
    store_file_name = "deck_db/store.json"
    if os.path.exists(store_file_name):
        with open(store_file_name, "r") as f:
            decks = json.load(f)

decks = reassign_category(decks)
print("categories in previous result:")
print(decks.keys())

store_code_list = []
for category in decks.keys():
    for d in decks[category]:
        store_code_list.append(d["deck_code"])
print("\n")
print("number of decks in the previous result:")
print(len(store_code_list))

categories in previous result:
dict_keys(['ルナトーン', 'アル_ジュラルドン', 'かがやくムゲンダイナ', 'アル_レジドラゴVSTAR', 'Other_ギラティナVSTAR', 'ルギアVSTAR', 'レジエレキVMAX', 'Other_Lost', 'LTB', 'こくばバドレックスVMAX', 'アル_そらをとぶピカチュウ', 'キュレムVMAX', 'クロススイッチャー', 'レジドラゴVSTAR', 'ガラル マタドガス', 'ハピナスV', 'ミュウツーV-UNION', 'オリジンパルキアVSTAR', 'プテラVSTAR', 'LOST_ギラティナVSTAR', 'オリジンディアルガVSTAR', 'LTB_リザードン', 'LTB_ウッウ', 'LTB_ヤミラミ_リザードン', 'LTB_カイオーガ', 'LTB_カイリュー', 'LTB_空の封印石_水超鋼雷', 'LTB_空の封印石_草超鋼雷', 'LTB_空の封印石_水超闘雷', 'LTB_空の封印石_水超雷', 'LTB_空の封印石_水草超雷', 'LTB_空の封印石_水草超闘雷', 'LTB_空の封印石_水超鋼闘雷', 'LTB_空の封印石_other', 'LTB_空の封印石_水超鋼', 'LTB_空の封印石_超鋼雷', 'LTB_空の封印石_水草超', 'LTB_空の封印石_草超闘雷', 'LTB_空の封印石_草超鋼闘雷', 'LTB_空の封印石_水草超鋼雷', 'LTB_空の封印石_水草超鋼闘雷', 'ミュウVMAX', 'ヒスイ ゾロアークVSTAR', 'ムゲンダイナVMAX', 'ゾロア', 'ヒスイ ダイケンキVSTAR', 'アルセウス裏工作', 'レジ', 'others', 'ゾロア_ウインディ', 'ロトムVSTAR'])


number of decks in the previous result:
4292


In [28]:
# parse events
t1 = time.time()
if RUN_PARSE or DEBUG_PARSE:
    parse_events_from_official(
        decks,
        store_code_list,
        result_page_limit=NUM_RESULT_PAGE,
        event_page_limit=NUM_EVENT_PAGE,
        deck_page_limit=NUM_DECK_PAGE,
    )
t2 = time.time()

print()
print(f"{t2 - t1} seconds")


6.604194641113281e-05 seconds


In [29]:
# save to json
with open(store_file_name, "w") as f:
    json.dump(decks, f, ensure_ascii=False, indent=4)

In [30]:
# show all categories we have
total = 0
for k in decks.keys():
    print(f"[{k}]:\n{len(decks[k])}\n")
    total += len(decks[k])

print(total)

[ルナトーン]:
25

[アル_ジュラルドン]:
242

[かがやくムゲンダイナ]:
69

[アル_レジドラゴVSTAR]:
12

[Other_ギラティナVSTAR]:
30

[ルギアVSTAR]:
1024

[レジエレキVMAX]:
83

[Other_Lost]:
73

[LTB]:
507

[こくばバドレックスVMAX]:
6

[アル_そらをとぶピカチュウ]:
130

[キュレムVMAX]:
87

[クロススイッチャー]:
179

[レジドラゴVSTAR]:
7

[ガラル マタドガス]:
179

[ハピナスV]:
41

[ミュウツーV-UNION]:
12

[オリジンパルキアVSTAR]:
157

[プテラVSTAR]:
17

[LOST_ギラティナVSTAR]:
122

[オリジンディアルガVSTAR]:
16

[LTB_リザードン]:
3

[LTB_ウッウ]:
1

[LTB_ヤミラミ_リザードン]:
124

[LTB_カイオーガ]:
92

[LTB_カイリュー]:
11

[LTB_空の封印石_水超鋼雷]:
41

[LTB_空の封印石_草超鋼雷]:
3

[LTB_空の封印石_水超闘雷]:
31

[LTB_空の封印石_水超雷]:
5

[LTB_空の封印石_水草超雷]:
2

[LTB_空の封印石_水草超闘雷]:
1

[LTB_空の封印石_水超鋼闘雷]:
3

[LTB_空の封印石_other]:
5

[LTB_空の封印石_水超鋼]:
3

[LTB_空の封印石_超鋼雷]:
5

[LTB_空の封印石_水草超]:
1

[LTB_空の封印石_草超闘雷]:
1

[LTB_空の封印石_草超鋼闘雷]:
1

[LTB_空の封印石_水草超鋼雷]:
2

[LTB_空の封印石_水草超鋼闘雷]:
1

[ミュウVMAX]:
400

[ヒスイ ゾロアークVSTAR]:
27

[ムゲンダイナVMAX]:
112

[ゾロア]:
62

[ヒスイ ダイケンキVSTAR]:
8

[アルセウス裏工作]:
32

[レジ]:
175

[others]:
92

[ゾロア_ウインディ]:
15

[ロトムVSTAR]:
15

4292


In [31]:
# show categories we have in the period
number_decks = 0
for k in decks.keys():
    deck_cnt = 0
    for deck in decks[k]:
        if deck["date"] >= START_DATE and deck["date"] <= END_DATE:
            deck_cnt += 1

    print(f"{k}\t{deck_cnt}\n")
    if k == TARGET_CATEGORY:
        number_decks = deck_cnt

ルナトーン	15

アル_ジュラルドン	141

かがやくムゲンダイナ	69

アル_レジドラゴVSTAR	8

Other_ギラティナVSTAR	15

ルギアVSTAR	494

レジエレキVMAX	40

Other_Lost	59

LTB	299

こくばバドレックスVMAX	6

アル_そらをとぶピカチュウ	96

キュレムVMAX	42

クロススイッチャー	89

レジドラゴVSTAR	4

ガラル マタドガス	132

ハピナスV	29

ミュウツーV-UNION	8

オリジンパルキアVSTAR	81

プテラVSTAR	8

LOST_ギラティナVSTAR	68

オリジンディアルガVSTAR	9

LTB_リザードン	0

LTB_ウッウ	0

LTB_ヤミラミ_リザードン	47

LTB_カイオーガ	66

LTB_カイリュー	11

LTB_空の封印石_水超鋼雷	41

LTB_空の封印石_草超鋼雷	3

LTB_空の封印石_水超闘雷	31

LTB_空の封印石_水超雷	5

LTB_空の封印石_水草超雷	2

LTB_空の封印石_水草超闘雷	1

LTB_空の封印石_水超鋼闘雷	3

LTB_空の封印石_other	5

LTB_空の封印石_水超鋼	3

LTB_空の封印石_超鋼雷	5

LTB_空の封印石_水草超	1

LTB_空の封印石_草超闘雷	1

LTB_空の封印石_草超鋼闘雷	1

LTB_空の封印石_水草超鋼雷	2

LTB_空の封印石_水草超鋼闘雷	1

ミュウVMAX	222

ヒスイ ゾロアークVSTAR	15

ムゲンダイナVMAX	84

ゾロア	22

ヒスイ ダイケンキVSTAR	6

アルセウス裏工作	19

レジ	102

others	59

ゾロア_ウインディ	6

ロトムVSTAR	4



In [32]:
# number of decks for the target category
print(number_decks)
if number_decks <= 0:
    raise Exception(f"No decks for {TARGET_CATEGORY} during {START_DATE} to {END_DATE}")

11


In [33]:
# Analysis
df_list = []
common_cols = ["date", "prefecture", "num_players", "rank"]
int_cols = ["num_players", "rank"]

for card_type in ["pokemons", "tools", "supporters", "stadiums", "energies"]:
    # df init
    df = pd.DataFrame()
    for _, deck in enumerate(decks[TARGET_CATEGORY]):
        deck_code = deck["deck_code"]  # row id
        if deck["date"] < START_DATE or deck["date"] > END_DATE:
            continue
        pokecard = OrderedDict()
        for col in common_cols:
            pokecard[col] = deck.get(col, "")
        pokecard.update(deck[card_type])
        if _ == 0:
            df = pd.DataFrame(pokecard, index=[deck_code])
        else:
            df = pd.concat([df, pd.DataFrame(pokecard, index=[deck_code])])
    df = df.fillna(0)

    # sort rows by date
    df = df.sort_values(by=["date"], ascending=False)

    # select cols for analysis
    col_list = list(df)
    for c in common_cols:
        col_list.remove(c)

    # calculate
    num_decks = df.shape[0]
    num_used = df[col_list].sum(axis="rows", numeric_only=True)
    num_picked = df[col_list].astype(bool).sum(axis="rows")
    avg_num_used = num_used / num_picked
    pick_rate = num_picked / num_decks

    # insert rows in df
    df.loc["avg_num_used"] = {}
    df.loc["pick_rate"] = {}
    for col in col_list:
        df.loc["avg_num_used", col] = avg_num_used[col]
        df.loc["pick_rate", col] = pick_rate[col]

    # reorder index in df, move 'avg_num_used' and 'pick_rate' to top
    num_rows = df.shape[0]
    target_rows = [num_rows - 1, num_rows - 2]
    idx = target_rows + [i for i in range(len(df)) if i not in target_rows]
    df = df.iloc[idx]

    # sort cols by pick rate
    df = df.sort_values("pick_rate", axis=1, ascending=False)
    col_list = list(df)
    for c in common_cols:
        col_list.remove(c)
    df = df[common_cols + col_list]

    # format data type
    for col in int_cols:
        df[col] = df[col].astype("Int64")

    # store
    df_list.append(df)

In [34]:
# excel writer
writer = pd.ExcelWriter(
    f"{EXCEL_FOLDER}/{TARGET_CATEGORY}/{TARGET_CATEGORY}-{START_DATE}-{END_DATE}.xlsx",
    engine="xlsxwriter",
    mode="w",
)
workbook = writer.book

for sheet_id, sheet_name in enumerate(
    ["pokemons", "tools", "supporters", "stadiums", "energies"]
):
    df_list[sheet_id].to_excel(writer, sheet_name=sheet_name, float_format="%.2f")

    # Make deck_link become a hyperlink
    # Get the xlsxwriter workbook and worksheet objects
    worksheet = writer.sheets[sheet_name]
    for i, deck_id in enumerate(df_list[sheet_id].index):
        if deck_id in ["avg_num_used", "pick_rate"]:
            continue

        # Calculate the row number
        row = i + 2

        # Write the hyperlink to the cell
        worksheet.write_url(
            f"A{row}",
            f"https://www.pokemon-card.com/deck/confirm.html/deckID/{deck_id}",
            string=f"{deck_id}",
        )

    # formatting
    header_format = workbook.add_format(
        {"bold": True, "text_wrap": True, "valign": "top"}
    )
    for col_num, value in enumerate(df_list[sheet_id].columns.values):
        worksheet.write(0, col_num + 1, value, header_format)
    worksheet.autofit()

workbook.close()
writer.save()

In [35]:
df_list[0]

Unnamed: 0,date,prefecture,num_players,rank,ヤミラミ\nS11 044/100,カイリューV\nS7R 042/067,キュワワー\nS11 049/100,カイオーガ\nS8a 007/028,ライコウV\nS12a 038/172,かがやくゲッコウガ\nS12a 033/172,かがやくゲッコウガ\nS9a 026/067,ライコウV\nSI 137/414,ネオラントV\nSPD 002/020,ドラピオンV\nS12a 085/172,キュワワー\nS12a 064/172,ウッウ\nS11 033/100,ネオラントV\nS9 104/100,マナフィ\nS12a 029/172,コオリッポ\nSI 123/414,ネオラントV\nS9 030/100,カイリューV\nS10b 078/071,ネオラントV\nSPZ 001/020,ネオラントV\nS9 105/100,ヤレユータン\nS8b 212/184,ガラル サンダーV\nSI 217/414,ネオラントV\nS12a 026/172
pick_rate,,,,,1.0,0.909091,0.818182,0.727273,0.545455,0.545455,0.454545,0.454545,0.363636,0.272727,0.181818,0.181818,0.181818,0.181818,0.181818,0.181818,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909
avg_num_used,,,,,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
pSypy2-7wzfiA-pyXRXM,2023年01月04日(水),奈良県,96.0,10.0,1.0,1.0,4.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
Y48ccY-vlgUnf-KYxxDx,2023年01月02日(月),東京都,32.0,3.0,1.0,1.0,4.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
dvvwkk-PURvbj-kkFVkv,2022年12月30日(金),群馬県,32.0,2.0,1.0,1.0,4.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
yySSXM-6Ii86T-yyyER3,2022年12月30日(金),東京都,64.0,6.0,1.0,1.0,4.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1kVfkV-EP6eYK-kkkVfk,2022年12月29日(木),埼玉県,70.0,10.0,1.0,1.0,4.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
JKc8cx-o2eLmv-888D4c,2022年12月29日(木),鹿児島県,48.0,12.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
iNnngn-A0P52C-ngnnLL,2022年12月28日(水),京都,32.0,6.0,1.0,1.0,4.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
kkkVVk-9cCUy3-FFvkFv,2022年12月25日(日),大阪府,32.0,3.0,1.0,1.0,4.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [36]:
df_list[1]

Unnamed: 0,date,prefecture,num_players,rank,バトルVIPパス,クイックボール,ヒスイのヘビーボール,回収ネット,あなぬけのヒモ,いれかえカート,ミラージュゲート,ふつうのつりざお,エネルギーリサイクル,森の封印石,ロストスイーパー,トレッキングシューズ,ふうせん,霧の水晶,空の封印石
pick_rate,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.727273,0.454545,0.272727,0.090909
avg_num_used,,,,,4.0,2.727273,1.0,4.0,3.636364,3.181818,4.0,2.181818,1.454545,2.545455,3.0,2.125,1.0,1.0,1.0
pSypy2-7wzfiA-pyXRXM,2023年01月04日(水),奈良県,96.0,10.0,4.0,2.0,1.0,4.0,3.0,4.0,4.0,3.0,1.0,2.0,3.0,0.0,0.0,1.0,0.0
Y48ccY-vlgUnf-KYxxDx,2023年01月02日(月),東京都,32.0,3.0,4.0,3.0,1.0,4.0,3.0,3.0,4.0,3.0,1.0,2.0,3.0,0.0,1.0,1.0,0.0
dvvwkk-PURvbj-kkFVkv,2022年12月30日(金),群馬県,32.0,2.0,4.0,3.0,1.0,4.0,3.0,3.0,4.0,3.0,1.0,2.0,3.0,0.0,1.0,1.0,0.0
yySSXM-6Ii86T-yyyER3,2022年12月30日(金),東京都,64.0,6.0,4.0,2.0,1.0,4.0,4.0,2.0,4.0,2.0,1.0,2.0,3.0,2.0,0.0,0.0,1.0
1kVfkV-EP6eYK-kkkVfk,2022年12月29日(木),埼玉県,70.0,10.0,4.0,3.0,1.0,4.0,4.0,4.0,4.0,2.0,2.0,3.0,3.0,1.0,0.0,0.0,0.0
JKc8cx-o2eLmv-888D4c,2022年12月29日(木),鹿児島県,48.0,12.0,4.0,3.0,1.0,4.0,4.0,3.0,4.0,2.0,2.0,3.0,3.0,2.0,1.0,0.0,0.0
iNnngn-A0P52C-ngnnLL,2022年12月28日(水),京都,32.0,6.0,4.0,2.0,1.0,4.0,4.0,3.0,4.0,1.0,2.0,2.0,3.0,3.0,1.0,0.0,0.0
kkkVVk-9cCUy3-FFvkFv,2022年12月25日(日),大阪府,32.0,3.0,4.0,3.0,1.0,4.0,3.0,3.0,4.0,2.0,2.0,3.0,3.0,2.0,1.0,0.0,0.0


In [37]:
df_list[2]

Unnamed: 0,date,prefecture,num_players,rank,アクロマの実験
pick_rate,,,,,1.0
avg_num_used,,,,,3.272727
pSypy2-7wzfiA-pyXRXM,2023年01月04日(水),奈良県,96.0,10.0,4.0
Y48ccY-vlgUnf-KYxxDx,2023年01月02日(月),東京都,32.0,3.0,3.0
dvvwkk-PURvbj-kkFVkv,2022年12月30日(金),群馬県,32.0,2.0,3.0
yySSXM-6Ii86T-yyyER3,2022年12月30日(金),東京都,64.0,6.0,4.0
1kVfkV-EP6eYK-kkkVfk,2022年12月29日(木),埼玉県,70.0,10.0,3.0
JKc8cx-o2eLmv-888D4c,2022年12月29日(木),鹿児島県,48.0,12.0,3.0
iNnngn-A0P52C-ngnnLL,2022年12月28日(水),京都,32.0,6.0,4.0
kkkVVk-9cCUy3-FFvkFv,2022年12月25日(日),大阪府,32.0,3.0,3.0


In [38]:
df_list[3]

Unnamed: 0,date,prefecture,num_players,rank,ポケストップ
pick_rate,,,,,1.0
avg_num_used,,,,,2.181818
pSypy2-7wzfiA-pyXRXM,2023年01月04日(水),奈良県,96.0,10.0,2.0
Y48ccY-vlgUnf-KYxxDx,2023年01月02日(月),東京都,32.0,3.0,3.0
dvvwkk-PURvbj-kkFVkv,2022年12月30日(金),群馬県,32.0,2.0,3.0
yySSXM-6Ii86T-yyyER3,2022年12月30日(金),東京都,64.0,6.0,2.0
1kVfkV-EP6eYK-kkkVfk,2022年12月29日(木),埼玉県,70.0,10.0,2.0
JKc8cx-o2eLmv-888D4c,2022年12月29日(木),鹿児島県,48.0,12.0,2.0
iNnngn-A0P52C-ngnnLL,2022年12月28日(水),京都,32.0,6.0,2.0
kkkVVk-9cCUy3-FFvkFv,2022年12月25日(日),大阪府,32.0,3.0,2.0


In [39]:
df_list[4]

Unnamed: 0,date,prefecture,num_players,rank,基本水エネルギー,基本雷エネルギー,基本超エネルギー,基本闘エネルギー
pick_rate,,,,,1.0,1.0,1.0,0.090909
avg_num_used,,,,,4.636364,3.0,2.0,1.0
pSypy2-7wzfiA-pyXRXM,2023年01月04日(水),奈良県,96.0,10.0,5.0,3.0,2.0,1.0
Y48ccY-vlgUnf-KYxxDx,2023年01月02日(月),東京都,32.0,3.0,5.0,3.0,2.0,0.0
dvvwkk-PURvbj-kkFVkv,2022年12月30日(金),群馬県,32.0,2.0,5.0,3.0,2.0,0.0
yySSXM-6Ii86T-yyyER3,2022年12月30日(金),東京都,64.0,6.0,4.0,3.0,2.0,0.0
1kVfkV-EP6eYK-kkkVfk,2022年12月29日(木),埼玉県,70.0,10.0,5.0,3.0,2.0,0.0
JKc8cx-o2eLmv-888D4c,2022年12月29日(木),鹿児島県,48.0,12.0,4.0,3.0,2.0,0.0
iNnngn-A0P52C-ngnnLL,2022年12月28日(水),京都,32.0,6.0,5.0,3.0,2.0,0.0
kkkVVk-9cCUy3-FFvkFv,2022年12月25日(日),大阪府,32.0,3.0,5.0,3.0,2.0,0.0


In [40]:
decks["others"]

[{'deck_link': 'https://www.pokemon-card.com/deck/confirm.html/deckID/kvFf1v-vebuMc-kffFVV',
  'deck_code': 'kvFf1v-vebuMc-kffFVV',
  'pokemons': {'アルセウスV\nS-P 267/S-P': 4,
   'アルセウスVSTAR\nS9 084/100': 3,
   'ビッパ\nS9 081/100': 2,
   'ビーダル\nS12a 122/172': 2,
   'ガラル ファイヤー\nS7D 026/067': 2,
   'かがやくサーナイト\nS12a 055/172': 1,
   'ノコッチ\nS12a 118/172': 1},
  'tools': {'クイックボール': 4,
   'ハイパーボール': 4,
   'しんかのおこう': 1,
   'あなぬけのヒモ': 1,
   'ともだちてちょう': 1,
   'ふつうのつりざお': 1,
   'こだわりベルト': 1,
   '大きなおまもり': 2,
   'ふうせん': 1},
  'supporters': {'博士の研究': 1,
   'マリィ': 4,
   'チェレンの気くばり': 2,
   'ボスの指令': 1,
   'セレナ': 1,
   'ツツジ': 1,
   'キバナ': 1,
   'クララ': 1},
  'stadiums': {'頂への雪道': 2, 'シンオウ神殿': 1},
  'energies': {'Vガードエネルギー': 1, 'ダブルターボエネルギー': 4, '基本悪エネルギー': 9},
  'rank': 8,
  'num_players': 96,
  'date': '2023年01月04日(水)',
  'prefecture': '奈良県'},
 {'deck_link': 'https://www.pokemon-card.com/deck/confirm.html/deckID/pMypyy-egU9OJ-y3Sppy',
  'deck_code': 'pMypyy-egU9OJ-y3Sppy',
  'pokemons': {'ドラパルトVMAX\nS4a 31

In [41]:
len(decks["others"])

92