In [1]:
import time
import re

import pandas as pd
from collections import OrderedDict

import json
import os

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [2]:
# create some folders
folders = ["excel", "deck_db", "logs"]
for folder in folders:
    if not os.path.exists(folder):
        os.makedirs(folder)

In [3]:
from parse_deck import parse_events_from_official
from parse_deck import reassign_category
from deck_category_helper import find_category

In [4]:
RUN_PARSE = True

In [5]:
# decks = {
#     deck category 1: [
#         {
#             deck_link: "",
#             deck_code: "",
#             pokemons: {},
#             tools: {},
#             supporters: {},
#             stadiums: {},
#             energies: {},
#             rank: 1,
#             num_people: 32,
#             date: datetime
#         }, ...
#     ],
#     deck category 2:...
# }

decks = {}
store_file_name = 'deck_db/store.json'
if os.path.exists(store_file_name):
    with open(store_file_name, 'r') as f:
        decks = json.load(f)
        
decks = reassign_category(decks)
print(decks.keys())

store_code_list = []
for category in decks.keys():
    for d in decks[category]:
        store_code_list.append(d["deck_code"])

dict_keys(['レジ', 'ルギアVSTAR', 'オリジンパルキアVSTAR', 'ムゲンダイナVMAX', 'LOST_ギラティナVSTAR', 'キュレムVMAX', 'Other_Lost', 'ルナトーン', 'ミュウVMAX', 'アル_ジュラルドン', 'アル_そらをとぶピカチュウ', 'others', 'レジエレキVMAX', 'LTB_空の封印石_水超闘雷', 'LTB_空の封印石_水超鋼雷', 'LTB_空の封印石_水草超鋼闘雷', 'LTB_空の封印石_水超雷', 'LTB_空の封印石_水超鋼', 'LTB_空の封印石_超鋼雷', 'アルセウス裏工作', 'LTB_カイオーガ', 'ハピナスV', 'LTB', 'ヒスイ ゾロアークVSTAR', 'ヒスイ ダイケンキVSTAR', 'ロトムVSTAR', 'ゾロア', 'プテラVSTAR', 'ガラル マタドガス', 'LTB_ヤミラミ_リザードン', 'Other_ギラティナVSTAR', 'アル_レジドラゴVSTAR', 'ミュウツーV-UNION', 'LTB_空の封印石_other', 'レジドラゴVSTAR', 'オリジンディアルガVSTAR', 'こくばバドレックスVMAX', 'ゾロア_ウインディ', 'LTB_空の封印石_水超鋼闘雷', 'LTB_空の封印石_草超闘雷', 'LTB_空の封印石_草超鋼雷', 'LTB_空の封印石_草超鋼闘雷', 'LTB_空の封印石_水草超鋼雷'])


In [6]:
len(store_code_list)

1532

In [None]:
# parse events
t1 = time.time()
if RUN_PARSE:
    parse_events_from_official(decks, store_code_list, result_page_limit=10, deck_page_limit=2)
t2 = time.time()

print()
print(t2 - t1)

In [None]:
# save to json
with open(store_file_name, 'w') as f:
    json.dump(decks, f, ensure_ascii=False, indent=4)

In [None]:
total = 0
# show all categories we have
for k in decks.keys():
    print(f"[{k}]:\n{len(decks[k])}\n")
    total += len(decks[k])

print(total)

In [None]:
start_date = "2022年12月02日"  # include
end_date = "2022年12月25日"  # include

In [None]:
# show categories we have in the period
for k in decks.keys():
    deck_cnt = 0
    for deck in decks[k]:
        if deck["date"] >= start_date and deck["date"] <= end_date:
            deck_cnt += 1

    print(f"{k}\t{deck_cnt}\n")

In [None]:
target_category = "アル_レジドラゴVSTAR"
df_list = []
common_cols = ["date", "num_people", "rank"]

for card_type in ["pokemons", "tools", "supporters", "stadiums", "energies"]:
    # df init
    df = pd.DataFrame()
    for _, deck in enumerate(decks[target_category]):
        deck_link = deck["deck_link"]  # row id
        if deck["date"] < start_date or deck["date"] > end_date:
            continue
        pokecard = OrderedDict()
        pokecard["date"] = deck["date"]
        pokecard["num_people"] = deck["num_people"]
        pokecard["rank"] = deck["rank"]
        pokecard.update(deck[card_type])
        if _ == 0:
            df = pd.DataFrame(pokecard, index=[deck_link])
        else:
            df = pd.concat([df, pd.DataFrame(pokecard, index=[deck_link])])
    df = df.fillna(0)
    
    # sort rows by date
    df = df.sort_values(by=['date'], ascending=False)

    # select cols for analysis
    col_list = list(df)
    for c in common_cols: col_list.remove(c)

    # calculate
    num_decks = df.shape[0]
    num_used = df[col_list].sum(axis='rows', numeric_only=True)
    num_picked = df[col_list].astype(bool).sum(axis='rows')
    avg_num_used = num_used / num_picked
    pick_rate = num_picked / num_decks
    
    # insert rows in df
    df.loc["avg_num_used"] = {}
    df.loc["pick_rate"] = {}
    for col in col_list:
        df.loc["avg_num_used", col] = avg_num_used[col]
        df.loc["pick_rate", col] = pick_rate[col]

    # reorder index in df, move 'avg_num_used' and 'pick_rate' to top
    num_rows = df.shape[0]
    target_rows = [num_rows-1, num_rows-2]
    idx = target_rows + [i for i in range(len(df)) if i not in target_rows]
    df = df.iloc[idx]
    
    # sort cols by pick rate
    df = df.sort_values('pick_rate', axis=1, ascending=False)
    col_list = list(df)
    for c in common_cols: col_list.remove(c)
    df = df[common_cols + col_list]
    
    # store
    df_list.append(df)

In [None]:
# number of decks for the target category
len(df_list[0]) - 2

In [None]:
# prepare folder
excel_folder = f"./excel/{target_category}"
if not os.path.exists(excel_folder):
    os.makedirs(excel_folder)

In [None]:
# excel writer
writer = pd.ExcelWriter(f"{excel_folder}/{target_category}-{start_date}-{end_date}.xlsx", engine='xlsxwriter')
df_list[0].to_excel(writer, sheet_name='pokemons')
df_list[1].to_excel(writer, sheet_name='tools')
df_list[2].to_excel(writer, sheet_name='supporters')
df_list[3].to_excel(writer, sheet_name='stadiums')
df_list[4].to_excel(writer, sheet_name='energies')
writer.save()

In [None]:
df_list[0]

In [None]:
df_list[1]

In [None]:
df_list[2]

In [None]:
df_list[3]

In [None]:
df_list[4]

In [None]:
decks["others"]

In [None]:
len(decks["others"])