In [55]:
import requests


item_url = "https://penguin-stats.io/PenguinStats/api/v2/items"
matrix_url = "https://penguin-stats.io/PenguinStats/api/v2/result/matrix?show_closed_zones=true"
stage_url = "https://penguin-stats.io/PenguinStats/api/v2/stages"
zone_url = "https://penguin-stats.io/PenguinStats/api/v2/zones"

item_data = requests.get(item_url).json()
matrix_data = requests.get(matrix_url).json()
stage_data = requests.get(stage_url).json()
zone_data = requests.get(zone_url).json()


In [145]:
item_dict = {item_info["itemId"]: item_info for item_info in item_data}
stage_dict = {stage_info["stageId"]: stage_info for stage_info in stage_data}
zone_dict = {zone_info["zoneId"]: zone_info for zone_info in zone_data}


In [147]:
from datetime import datetime, timezone, timedelta


def get_item_type(item_id):
    return item_dict[item_id]["itemType"]


def get_item_name(item_id):
    return item_dict[item_id]["name"]


def get_item_id_by_name(item_name):
    for item_id, item_info in item_dict.items():
        if item_info["name"] == item_name:
            return item_id
    else:
        raise ValueError(f"Item name {item_name} not found.")


def get_stage_name(stage_id):
    return stage_dict[stage_id]["code"]


def get_stage_open_timestamp(stage_id, server):
    return stage_dict[stage_id]["existence"][server]["openTime"]


TIMEZONE_OFFSET_HOURS_DICT = {
    "CN": +8,
    "US": -5,
    "JP": +9,
    "KR": +9,
}


def get_timezone(server):
    return timezone(timedelta(hours=TIMEZONE_OFFSET_HOURS_DICT[server]))


def get_stage_type(stage_id):
    return stage_dict[stage_id]["stageType"]


def get_stage_zone_id(stage_id):
    return stage_dict[stage_id]["zoneId"]


def get_stage_open_time(stage_id, server):
    return datetime.fromtimestamp(get_stage_open_timestamp(stage_id, server) / 1000).astimezone(get_timezone(server))


def get_stage_ap_cost(stage_id):
    return stage_dict[stage_id]["apCost"]


def get_zone_name(zone_id):
    return zone_dict[zone_id]["zoneName"]


In [149]:
SERVER = "CN"
TIMES_THRESHOLD = 256

stage_drop_info = {stage_id: {} for stage_id in stage_dict}
for element in matrix_data["matrix"]:
    stage_id = element["stageId"]
    item_id = element["itemId"]
    times = element["times"]
    quantity = element["quantity"]
    assert item_id not in stage_drop_info[stage_id]
    stage_drop_info[stage_id][item_id] = (quantity, times)


In [154]:
import pandas as pd

# columns = ['作战名称', '活动名称', '作战开放时间', '作战理智消耗', '作战掉落物品名称', '掉落数', '样本数']
records = []

for stage_id, drop_info in stage_drop_info.items():
    if get_stage_type(stage_id) != "ACTIVITY":
        continue
    drop_info_filtered = {
        item_id: (quantity, times)
        for item_id, (quantity, times) in drop_info.items()
        if times >= TIMES_THRESHOLD and get_item_type(item_id) == "MATERIAL"}
    if len(drop_info_filtered) != 1:
        continue
    stage_name = get_stage_name(stage_id)
    zone_name = get_zone_name(get_stage_zone_id(stage_id))
    stage_open_time = get_stage_open_time(stage_id, SERVER)
    ap_cost = get_stage_ap_cost(stage_id)
    (item_id, (quantity, times)), = drop_info_filtered.items()
    item_name = get_item_name(item_id)
    item_type = get_item_type(item_id)
    if item_type not in ("MATERIAL", "CARD_EXP"):
        continue
    records.append({
        '作战名称': stage_name,
        '活动名称': zone_name,
        '作战开放时间': stage_open_time,
        '作战理智消耗': ap_cost,
        '作战掉落物品名称': item_name,
        '掉落数': quantity,
        '样本数': times,
    })

df = pd.DataFrame.from_records(records)
df["作战掉落物品数量"] = df["掉落数"] / df["样本数"]
df["单位理智掉落物品数量"] = df["作战掉落物品数量"] / df["作战理智消耗"]
df["单件期望理智"] = 1 / df["单位理智掉落物品数量"]
df.to_csv("掉单一蓝材料的活动.csv")
df


Unnamed: 0,作战名称,活动名称,作战开放时间,作战理智消耗,作战掉落物品名称,掉落数,样本数,作战掉落物品数量,单位理智掉落物品数量,单件期望理智
0,GT-5,骑兵与猎人,2019-05-30 10:00:00+08:00,15,扭转醇,8073,12211,0.661125,0.044075,22.688592
1,GT-6,骑兵与猎人,2019-05-30 10:00:00+08:00,15,研磨石,5208,10649,0.489060,0.032604,30.671083
2,DM-6,生于黑夜,2020-04-21 16:00:00+08:00,15,糖组,7947,11459,0.693516,0.046234,21.628917
3,DM-7,生于黑夜,2020-04-21 16:00:00+08:00,18,扭转醇,18756,23847,0.786514,0.043695,22.885797
4,DM-8,生于黑夜,2020-04-21 16:00:00+08:00,18,异铁组,29777,44625,0.667272,0.037071,26.975518
...,...,...,...,...,...,...,...,...,...,...
168,CV-7,不义之财・复刻,2024-09-19 16:00:00+08:00,21,研磨石,677328,1000203,0.677191,0.032247,31.010475
169,CV-8,不义之财・复刻,2024-09-19 16:00:00+08:00,21,转质盐组,228570,366243,0.624094,0.029719,33.648786
170,GO-7,追迹日落以西,2024-10-09 16:00:00+08:00,21,固源岩组,127511,116427,1.095201,0.052152,19.174557
171,GO-8,追迹日落以西,2024-10-09 16:00:00+08:00,21,凝胶,45160,64234,0.703054,0.033479,29.869663


In [156]:
import numpy as np
from scipy import stats


def chi2_test(作战理智消耗, 掉落数, 样本数):
    p = np.sum(掉落数) / np.dot(样本数, 作战理智消耗)
    return stats.chisquare(掉落数, p * 样本数 * 作战理智消耗)


In [157]:
groups = []
records = []
for item_name, group in sorted(df.groupby("作战掉落物品名称"), key=lambda x: get_item_id_by_name(x[0])):
    总掉落数 = group["掉落数"].sum()
    总样本数 = group["样本数"].sum()
    总消耗理智 = (group["样本数"] * group["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    group["理论掉落数"] = 总掉落数 / 总消耗理智 * group["样本数"] * group["作战理智消耗"]
    chi2_result = chi2_test(group["作战理智消耗"], group["掉落数"], group["样本数"])
    records.append({
        "作战掉落物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智掉落物品数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": chi2_result[1],
    })
    groups.append(group)

dfp = pd.DataFrame.from_records(records)
dfp


Unnamed: 0,作战掉落物品名称,总掉落数,总样本数,总消耗理智,单位理智掉落物品数量,单件期望理智,卡方检验p值
0,固源岩组,25464335,23331286,488835267,0.052092,19.19686,0.9984505
1,糖组,12432069,13146895,268662879,0.046274,21.610472,0.9980974
2,聚酸酯组,8662306,8937454,187191279,0.046275,21.609867,0.9934143
3,异铁组,23825115,30655795,643472121,0.037026,27.008143,1.3848550000000002e-23
4,酮凝集组,17447695,22441881,470827320,0.037058,26.985073,0.1898807
5,全新装置,14280116,25038611,516383202,0.027654,36.160995,1.3993460000000002e-31
6,扭转醇,22467873,24532178,512999331,0.043797,22.832572,4.377015e-32
7,轻锰矿,20437297,26371086,551416239,0.037063,26.980879,0.01940804
8,研磨石,17956431,27732391,557979951,0.032181,31.074101,7.698221e-15
9,RMA70-12,15204095,26122730,548067099,0.027741,36.047335,7.61064e-06


In [158]:
pd.concat(groups).to_csv("掉单一蓝材料的活动_按物品分组.csv")
