In [1]:
import time
import re

import pandas as pd
from collections import OrderedDict

import json
import os

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [2]:
from deck_crawler.parse_deck import parse_events_from_official
from deck_crawler.parse_deck import reassign_category

In [3]:
# flags
RUN_PARSE = True
DEBUG_PARSE = True

NUM_RESULT_PAGE = 2
NUM_EVENT_PAGE = 50
NUM_DECK_PAGE = 2

In [4]:
# create some folders
EXCEL_FOLDER = "excel"
DB_FOLDER = "deck_db"
LOG_FOLDER = "logs"
folders = [EXCEL_FOLDER, DB_FOLDER, LOG_FOLDER]
for folder in folders:
    if not os.path.exists(folder):
        os.makedirs(folder)

In [5]:
# loading
decks = {}
store_file_name = 'deck_db/store.json'
if not DEBUG_PARSE:
    store_file_name = 'deck_db/test.json'
    if os.path.exists(store_file_name):
        with open(store_file_name, 'r') as f:
            decks = json.load(f)

decks = reassign_category(decks)
print("categories in previous result:")
print(decks.keys())

store_code_list = []
for category in decks.keys():
    for d in decks[category]:
        store_code_list.append(d["deck_code"])
print('\n')
print("number of decks in the previous result:")
print(len(store_code_list))

categories in previous result:
dict_keys([])


number of decks in the previous result:
0


In [6]:
# parse events
t1 = time.time()
if RUN_PARSE or DEBUG_PARSE:
    parse_events_from_official(
        decks,
        store_code_list,
        result_page_limit=NUM_RESULT_PAGE,
        event_page_limit=NUM_EVENT_PAGE,
        deck_page_limit=NUM_DECK_PAGE
    )
t2 = time.time()

print()
print(f"{t2 - t1} seconds")

Processing result page: 0: 100%|██████████| 20/20 [03:56<00:00, 11.81s/it]
Processing result page: 1: 100%|██████████| 20/20 [03:03<00:00,  9.17s/it]

426.4015920162201 seconds


In [7]:
# save to json
with open(store_file_name, 'w') as f:
    json.dump(decks, f, ensure_ascii=False, indent=4)

In [8]:
# show all categories we have
total = 0
for k in decks.keys():
    print(f"[{k}]:\n{len(decks[k])}\n")
    total += len(decks[k])

print(total)

[ルギアVSTAR]:
98

[アル_ジュラルドン]:
37

[ミュウVMAX]:
37

[LTB_カイオーガ]:
8

[ムゲンダイナVMAX]:
22

[Other_Lost]:
13

[ヒスイ ダイケンキVSTAR]:
1

[LOST_ギラティナVSTAR]:
18

[アル_そらをとぶピカチュウ]:
34

[レジ]:
21

[ハピナスV]:
6

[オリジンパルキアVSTAR]:
10

[アル_レジドラゴVSTAR]:
2

[LTB]:
9

[ゾロア_ウインディ]:
2

[キュレムVMAX]:
13

[ガラル マタドガス]:
4

[others]:
19

[レジエレキVMAX]:
7

[LTB_ヤミラミ_リザードン]:
10

[ヒスイ ゾロアークVSTAR]:
5

[LTB_空の封印石_水草超闘雷]:
1

[こくばバドレックスVMAX]:
2

[アルセウス裏工作]:
4

[LTB_空の封印石_水超鋼闘雷]:
2

[LTB_空の封印石_水超鋼雷]:
7

[LTB_空の封印石_水超闘雷]:
6

[ロトムVSTAR]:
1

[LTB_空の封印石_other]:
2

[LTB_空の封印石_水超鋼]:
2

[ゾロア]:
1

[ミュウツーV-UNION]:
1

[プテラVSTAR]:
1

[オリジンディアルガVSTAR]:
4

[LTB_空の封印石_草超鋼雷]:
1

[ルナトーン]:
2

[LTB_空の封印石_超鋼雷]:
1

[Other_ギラティナVSTAR]:
1

415


In [9]:
start_date = "2022年12月18日"  # include
end_date = "2022年12月31日"  # include

In [10]:
target_category = "こくばバドレックスVMAX"
number_decks = 0

# show categories we have in the period
for k in decks.keys():
    deck_cnt = 0
    for deck in decks[k]:
        if deck["date"] >= start_date and deck["date"] <= end_date:
            deck_cnt += 1

    print(f"{k}\t{deck_cnt}\n")
    if k == target_category:
        number_decks = deck_cnt

ルギアVSTAR	98

アル_ジュラルドン	37

ミュウVMAX	37

LTB_カイオーガ	8

ムゲンダイナVMAX	22

Other_Lost	13

ヒスイ ダイケンキVSTAR	1

LOST_ギラティナVSTAR	18

アル_そらをとぶピカチュウ	34

レジ	21

ハピナスV	6

オリジンパルキアVSTAR	10

アル_レジドラゴVSTAR	2

LTB	9

ゾロア_ウインディ	2

キュレムVMAX	13

ガラル マタドガス	4

others	19

レジエレキVMAX	7

LTB_ヤミラミ_リザードン	10

ヒスイ ゾロアークVSTAR	5

LTB_空の封印石_水草超闘雷	1

こくばバドレックスVMAX	2

アルセウス裏工作	4

LTB_空の封印石_水超鋼闘雷	2

LTB_空の封印石_水超鋼雷	7

LTB_空の封印石_水超闘雷	6

ロトムVSTAR	1

LTB_空の封印石_other	2

LTB_空の封印石_水超鋼	2

ゾロア	1

ミュウツーV-UNION	1

プテラVSTAR	1

オリジンディアルガVSTAR	4

LTB_空の封印石_草超鋼雷	1

ルナトーン	2

LTB_空の封印石_超鋼雷	1

Other_ギラティナVSTAR	1



In [11]:
# number of decks for the target category
print(number_decks)
if number_decks <= 0:
    raise Exception(f"No decks for {target_category} during {start_date} to {end_date}")

2


In [12]:
# Analysis
df_list = []
common_cols = ["date", "num_players", "rank"]

for card_type in ["pokemons", "tools", "supporters", "stadiums", "energies"]:
    # df init
    df = pd.DataFrame()
    for _, deck in enumerate(decks[target_category]):
        deck_link = deck["deck_link"]  # row id
        if deck["date"] < start_date or deck["date"] > end_date:
            continue
        pokecard = OrderedDict()
        pokecard["date"] = deck["date"]
        pokecard["num_players"] = deck["num_players"]
        pokecard["rank"] = deck["rank"]
        pokecard.update(deck[card_type])
        if _ == 0:
            df = pd.DataFrame(pokecard, index=[deck_link])
        else:
            df = pd.concat([df, pd.DataFrame(pokecard, index=[deck_link])])
    df = df.fillna(0)
    
    # sort rows by date
    df = df.sort_values(by=['date'], ascending=False)

    # select cols for analysis
    col_list = list(df)
    for c in common_cols: col_list.remove(c)

    # calculate
    num_decks = df.shape[0]
    num_used = df[col_list].sum(axis='rows', numeric_only=True)
    num_picked = df[col_list].astype(bool).sum(axis='rows')
    avg_num_used = num_used / num_picked
    pick_rate = num_picked / num_decks
    
    # insert rows in df
    df.loc["avg_num_used"] = {}
    df.loc["pick_rate"] = {}
    for col in col_list:
        df.loc["avg_num_used", col] = avg_num_used[col]
        df.loc["pick_rate", col] = pick_rate[col]

    # reorder index in df, move 'avg_num_used' and 'pick_rate' to top
    num_rows = df.shape[0]
    target_rows = [num_rows-1, num_rows-2]
    idx = target_rows + [i for i in range(len(df)) if i not in target_rows]
    df = df.iloc[idx]
    
    # sort cols by pick rate
    df = df.sort_values('pick_rate', axis=1, ascending=False)
    col_list = list(df)
    for c in common_cols: col_list.remove(c)
    df = df[common_cols + col_list]
    
    # store
    df_list.append(df)

In [13]:
# excel writer
writer = pd.ExcelWriter(f"{EXCEL_FOLDER}/{target_category}-{start_date}-{end_date}.xlsx", engine='xlsxwriter')
df_list[0].to_excel(writer, sheet_name='pokemons')
df_list[1].to_excel(writer, sheet_name='tools')
df_list[2].to_excel(writer, sheet_name='supporters')
df_list[3].to_excel(writer, sheet_name='stadiums')
df_list[4].to_excel(writer, sheet_name='energies')
writer.save()

In [14]:
df_list[0]

Unnamed: 0,date,num_players,rank,エーフィVMAX,エーフィV,こくばバドレックスVMAX,こくばバドレックスV,クロバットV,ネオラントV,マホイップVMAX,マホイップV,ガラル フリーザー,かがやくムゲンダイナ,メタモンV,アルセウスV,アルセウスVSTAR
pick_rate,,,,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.5,0.5,0.5,0.5,0.5,0.5
avg_num_used,,,,1.0,1.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0
https://www.pokemon-card.com/deck/confirm.html/deckID/HnngPL-j61c5o-LNnnng,2022年12月29日(木),32.0,5.0,1.0,1.0,4.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0
https://www.pokemon-card.com/deck/confirm.html/deckID/fkf1kF-0G0Smz-VFkkVv,2022年12月28日(水),64.0,5.0,1.0,1.0,4.0,4.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0


In [15]:
df_list[1]

Unnamed: 0,date,num_players,rank,霧の水晶,ふうせん,ハイパーボール,ポケモンいれかえ,エネルギー回収,クイックボール,こだわりベルト,エネルギーつけかえ,しんかのおこう,バトルVIPパス,回収ネット,森の封印石,ロストスイーパー,やまびこホーン,ふつうのつりざお,ヒスイのヘビーボール,あなぬけのヒモ
pick_rate,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
avg_num_used,,,,3.5,1.0,2.0,1.5,1.0,3.5,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
https://www.pokemon-card.com/deck/confirm.html/deckID/HnngPL-j61c5o-LNnnng,2022年12月29日(木),32.0,5.0,4.0,1.0,2.0,2.0,1.0,4.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
https://www.pokemon-card.com/deck/confirm.html/deckID/fkf1kF-0G0Smz-VFkkVv,2022年12月28日(水),64.0,5.0,3.0,1.0,2.0,1.0,1.0,3.0,1.0,1.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [16]:
df_list[2]

Unnamed: 0,date,num_players,rank,博士の研究,マリィ,セレナ,ツツジ,ボスの指令,ネジキ
pick_rate,,,,1.0,1.0,1.0,1.0,1.0,0.5
avg_num_used,,,,1.5,2.0,2.0,1.0,1.0,1.0
https://www.pokemon-card.com/deck/confirm.html/deckID/HnngPL-j61c5o-LNnnng,2022年12月29日(木),32.0,5.0,1.0,2.0,2.0,1.0,1.0,1.0
https://www.pokemon-card.com/deck/confirm.html/deckID/fkf1kF-0G0Smz-VFkkVv,2022年12月28日(水),64.0,5.0,2.0,2.0,2.0,1.0,1.0,0.0


In [17]:
df_list[3]

Unnamed: 0,date,num_players,rank,シンオウ神殿
pick_rate,,,,1.0
avg_num_used,,,,2.5
https://www.pokemon-card.com/deck/confirm.html/deckID/HnngPL-j61c5o-LNnnng,2022年12月29日(木),32.0,5.0,2.0
https://www.pokemon-card.com/deck/confirm.html/deckID/fkf1kF-0G0Smz-VFkkVv,2022年12月28日(水),64.0,5.0,3.0


In [18]:
df_list[4]

Unnamed: 0,date,num_players,rank,基本超エネルギー,ダブルターボエネルギー
pick_rate,,,,1.0,0.5
avg_num_used,,,,13.0,1.0
https://www.pokemon-card.com/deck/confirm.html/deckID/HnngPL-j61c5o-LNnnng,2022年12月29日(木),32.0,5.0,13.0,0.0
https://www.pokemon-card.com/deck/confirm.html/deckID/fkf1kF-0G0Smz-VFkkVv,2022年12月28日(水),64.0,5.0,13.0,1.0


In [19]:
decks["others"]

[{'deck_link': 'https://www.pokemon-card.com/deck/confirm.html/deckID/XyMy2y-gwZ51h-yRUMyy',
  'deck_code': 'XyMy2y-gwZ51h-yRUMyy',
  'pokemons': {'ポワルン あまみずのすがた': 3,
   'ライコウV': 2,
   'エモンガ': 1,
   'ドラピオンV': 1,
   'スターミーV': 1,
   'ガラル サンダーV': 1,
   'ネオラントV': 1,
   'かがやくゲッコウガ': 1},
  'tools': {'クイックボール': 4,
   'バトルVIPパス': 4,
   'ヒスイのヘビーボール': 1,
   'エネルギー転送': 3,
   'クロススイッチャー': 4,
   'あなぬけのヒモ': 2,
   '回収ネット': 3,
   'トレッキングシューズ': 3,
   '森の封印石': 2,
   '空の封印石': 1,
   'ともだちてちょう': 1,
   'ふつうのつりざお': 1},
  'supporters': {'メロン': 4},
  'stadiums': {'ポケストップ': 3},
  'energies': {'スピード雷エネルギー': 3, '基本雷エネルギー': 2, '基本水エネルギー': 6, '基本闘エネルギー': 2},
  'rank': 9,
  'num_players': 70,
  'date': '2022年12月29日(木)'},
 {'deck_link': 'https://www.pokemon-card.com/deck/confirm.html/deckID/8ax444-0wttB9-48xJ88',
  'deck_code': '8ax444-0wttB9-48xJ88',
  'pokemons': {'インテレオン': 3,
   'ジメレオン': 4,
   'メッソン': 4,
   'マナフィ': 1,
   'かがやくリザードン': 1,
   'ライコウV': 1,
   'ガラル サンダーV': 1,
   'ドラピオンV': 1},
  'tools': {'レベルボール': 4,
  

In [20]:
len(decks["others"])

19