In [28]:
# 获取企鹅物流数据

import json

import requests

item_url = "https://penguin-stats.io/PenguinStats/api/v2/items"
matrix_url = "https://penguin-stats.io/PenguinStats/api/v2/result/matrix?show_closed_zones=true"
stage_url = "https://penguin-stats.io/PenguinStats/api/v2/stages"
zone_url = "https://penguin-stats.io/PenguinStats/api/v2/zones"

def get_data(url, path):
    try:
        raise
        with open(path, "r", encoding="utf-8") as fp:
            data = json.load(fp)
    except Exception:
        with requests.get(url) as response:
            data = response.json()
        # with open(path, "w", encoding="utf-8") as fp:
        #     json.dump(data, fp, ensure_ascii=False)
    return data

item_data = get_data(item_url, "items.json")
matrix_data = get_data(matrix_url, "matrix.json")
stage_data = get_data(stage_url, "stages.json")
zone_data = get_data(zone_url, "zones.json")


In [29]:
item_dict = {item_info["itemId"]: item_info for item_info in item_data}
stage_dict = {stage_info["stageId"]: stage_info for stage_info in stage_data}
zone_dict = {zone_info["zoneId"]: zone_info for zone_info in zone_data}


In [30]:
from datetime import datetime, timezone, timedelta


def get_item_type(item_id):
    return item_dict[item_id]["itemType"]


def get_item_name(item_id):
    return item_dict[item_id]["name"]


def get_item_rarity(item_id):
    return item_dict[item_id]["rarity"]


def get_item_id_by_name(item_name):
    for item_id, item_info in item_dict.items():
        if item_info["name"] == item_name:
            return item_id
    else:
        raise ValueError(f"Item name {item_name} not found.")


def get_stage_name(stage_id):
    return stage_dict[stage_id]["code"]


def get_stage_open_timestamp(stage_id, server):
    return stage_dict[stage_id]["existence"][server]["openTime"]


TIMEZONE_OFFSET_HOURS_DICT = {
    "CN": +8,
    "US": -5,
    "JP": +9,
    "KR": +9,
}


def get_timezone(server):
    return timezone(timedelta(hours=TIMEZONE_OFFSET_HOURS_DICT[server.upper()]))


def get_stage_type(stage_id):
    return stage_dict[stage_id]["stageType"]


def get_stage_zone_id(stage_id):
    return stage_dict[stage_id]["zoneId"]


def get_stage_open_time(stage_id, server):
    return datetime.fromtimestamp(get_stage_open_timestamp(stage_id, server) / 1000).astimezone(get_timezone(server))


def get_stage_ap_cost(stage_id):
    return stage_dict[stage_id]["apCost"]


def get_zone_name(zone_id):
    return zone_dict[zone_id]["zoneName"]


In [31]:
SERVER = "CN"
TIMES_THRESHOLD = 0
STAGE_BLACKLIST = []
ZONE_BLACKLIST = ["崔林特尔梅之金", "覆潮之下・复刻"]

stage_drop_info = {stage_id: {} for stage_id in stage_dict}
for element in matrix_data["matrix"]:
    stage_id = element["stageId"]
    item_id = element["itemId"]
    times = element["times"]
    quantity = element["quantity"]
    assert item_id not in stage_drop_info[stage_id]
    stage_drop_info[stage_id][item_id] = (quantity, times)


In [32]:
# 卡方检验

from scipy import stats


def chi2_test(作战理智消耗, 掉落数, 样本数):
    p = 掉落数.sum() / (样本数 * 作战理智消耗).sum()
    return stats.chisquare(掉落数, p * 样本数 * 作战理智消耗)


In [33]:
# 掉单一蓝材料的活动

import pandas as pd

records = []
for stage_id, drop_info in stage_drop_info.items():
    if get_stage_type(stage_id) != "ACTIVITY":
        continue

    drop_info_filtered = {
        item_id: (quantity, times)
        for item_id, (quantity, times) in drop_info.items()
        if times >= TIMES_THRESHOLD and get_item_type(item_id) == "MATERIAL"}
    if len(drop_info_filtered) != 1:
        continue

    stage_name = get_stage_name(stage_id)
    zone_name = get_zone_name(get_stage_zone_id(stage_id))
    if zone_name in ZONE_BLACKLIST:
        continue
    stage_open_time = get_stage_open_time(stage_id, SERVER)
    ap_cost = get_stage_ap_cost(stage_id)
    (item_id, (quantity, times)), = drop_info_filtered.items()
    item_name = get_item_name(item_id)
    if get_item_rarity(item_id) != 2:
        continue

    作战掉落物品数量 = quantity / times
    单位理智掉落物品数量 = 作战掉落物品数量 / ap_cost
    单件期望理智 = 1 / 单位理智掉落物品数量

    records.append({
        "作战名称": stage_name,
        "活动名称": zone_name,
        "作战开放时间": stage_open_time,
        "作战理智消耗": ap_cost,
        "作战掉落物品名称": item_name,
        "掉落数": quantity,
        "样本数": times,
        "作战掉落物品数量": 作战掉落物品数量,
        "单位理智掉落物品数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
    })

df = pd.DataFrame.from_records(records)
df.to_csv("掉单一蓝材料的活动.csv")


In [34]:
# 掉单一蓝材料的活动，按物品分组

groups = []
records = []
for item_name, group in sorted(df.groupby("作战掉落物品名称"), key=lambda x: get_item_id_by_name(x[0])):
    总掉落数 = group["掉落数"].sum()
    总样本数 = group["样本数"].sum()
    总消耗理智 = (group["样本数"] * group["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    group["理论掉落数"] = 总掉落数 / 总消耗理智 * group["样本数"] * group["作战理智消耗"]
    chi2, p_value = chi2_test(group["作战理智消耗"], group["掉落数"], group["样本数"])
    records.append({
        "作战掉落物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智掉落物品数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })
    groups.append(group.sort_values("单位理智掉落物品数量"))

df_grouped = pd.concat(groups)
df_grouped.to_csv("掉单一蓝材料的活动_按物品分组.csv")

df_material = pd.DataFrame.from_records(records)
df_material


Unnamed: 0,作战掉落物品名称,总掉落数,总样本数,总消耗理智,单位理智掉落物品数量,单件期望理智,卡方检验p值
0,固源岩组,25349656,23184391,486631842,0.052092,19.196783,0.9971324
1,糖组,12432069,13146895,268662879,0.046274,21.610472,0.9980974
2,聚酸酯组,8662306,8937454,187191279,0.046275,21.609867,0.9934143
3,异铁组,20507213,26364461,553354107,0.03706,26.983389,0.3962349
4,酮凝集组,17447695,22441881,470827320,0.037058,26.985073,0.1898807
5,全新装置,5970960,10621916,215121147,0.027756,36.0279,0.05416389
6,扭转醇,19411810,21185963,442728816,0.043846,22.807189,0.9489345
7,轻锰矿,20437297,26371086,551416239,0.037063,26.980879,0.01940804
8,研磨石,17956431,27732391,557979951,0.032181,31.074101,7.698221e-15
9,RMA70-12,15204095,26122730,548067099,0.027741,36.047335,7.61064e-06


In [35]:
# 掉两种绿材料的活动

def sort_key(drop_info_item):
    byproduct_weight = {
        "30012": 15,
        "30022": 10,
        "30032": 10,
        "30042": 8,
        "30052": 8,
        "30062": 6,
    }
    item_id, (quantity, times) = drop_info_item
    return quantity / byproduct_weight.get(item_id, 1)


records = []
for stage_id, drop_info in stage_drop_info.items():
    if get_stage_type(stage_id) != "ACTIVITY":
        continue

    drop_info_filtered = {
        item_id: (quantity, times)
        for item_id, (quantity, times) in drop_info.items()
        if times >= TIMES_THRESHOLD and get_item_type(item_id) == "MATERIAL"}
    if len(drop_info_filtered) != 2:
        continue

    drop_info_filtered_items = list(drop_info_filtered.items())
    if get_item_rarity(drop_info_filtered_items[0][0]) != 1 or get_item_rarity(drop_info_filtered_items[1][0]) != 1:
        continue

    stage_name = get_stage_name(stage_id)
    zone_name = get_zone_name(get_stage_zone_id(stage_id))
    if zone_name in ZONE_BLACKLIST:
        continue
    stage_open_time = get_stage_open_time(stage_id, SERVER)
    ap_cost = get_stage_ap_cost(stage_id)
    (item_id_0, (quantity_0, times_0)), (item_id_1, (quantity_1, times_1)) = sorted(
        drop_info_filtered_items, key=sort_key, reverse=True)
    item_name_0 = get_item_name(item_id_0)
    item_name_1 = get_item_name(item_id_1)
    assert times_0 == times_1

    单次作战主掉落数量 = quantity_0 / times_0
    单位理智主掉落数量 = 单次作战主掉落数量 / ap_cost
    主掉落单件期望理智 = 1 / 单位理智主掉落数量
    单次作战副掉落数量 = quantity_1 / times_1
    单位理智副掉落数量 = 单次作战副掉落数量 / ap_cost
    副掉落单件期望理智 = 1 / 单位理智副掉落数量

    records.append({
        "作战名称": stage_name,
        "活动名称": zone_name,
        "作战开放时间": stage_open_time,
        "作战理智消耗": ap_cost,
        "样本数": times_0,
        "主掉落物品名称": item_name_0,
        "主掉落数": quantity_0,
        "单次作战主掉落数量": 单次作战主掉落数量,
        "单位理智主掉落数量": 单位理智主掉落数量,
        "主掉落单件期望理智": 主掉落单件期望理智,
        "副掉落物品名称": item_name_1,
        "副掉落数": quantity_1,
        "单次作战副掉落数量": 单次作战副掉落数量,
        "单位理智副掉落数量": 单位理智副掉落数量,
        "副掉落单件期望理智": 副掉落单件期望理智,
    })

df = pd.DataFrame.from_records(records)
df.to_csv("掉两种绿材料的活动.csv")


In [36]:
# 掉两种绿材料的活动，按主掉落分组

groups = []
records = []
for item_name, group in sorted(df.groupby("主掉落物品名称"), key=lambda x: get_item_id_by_name(x[0])):
    总掉落数 = group["主掉落数"].sum()
    总样本数 = group["样本数"].sum()
    总消耗理智 = (group["样本数"] * group["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    group["理论主掉落数"] = 总掉落数 / 总消耗理智 * group["样本数"] * group["作战理智消耗"]
    chi2, p_value = chi2_test(group["作战理智消耗"], group["主掉落数"], group["样本数"])
    records.append({
        "主掉落物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智主掉落数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })
    groups.append(group.sort_values("单位理智主掉落数量"))

df_grouped = pd.concat(groups)
df_grouped.to_csv("掉两种绿材料的活动_按主掉落分组.csv")

df_material = pd.DataFrame.from_records(records)
df_material


Unnamed: 0,主掉落物品名称,总掉落数,总样本数,总消耗理智,单位理智主掉落数量,单件期望理智,卡方检验p值
0,固源岩,18124235,10872888,130474656,0.13891,7.198906,0.999955
1,糖,12463192,10611010,134630436,0.092573,10.802244,0.999825
2,聚酸酯,3021620,2707494,32640933,0.092571,10.802461,1.0
3,异铁,870861,894864,11753736,0.074092,13.496684,0.999998
4,酮凝集,1447184,1578065,19553448,0.074012,13.511377,0.999998
5,装置,3166463,4630998,56695740,0.05585,17.905069,3e-05


In [37]:
# 掉两种绿材料的活动，按副掉落分组

groups = []
records = []
for item_name, group in sorted(df.groupby("副掉落物品名称"), key=lambda x: get_item_id_by_name(x[0])):
    总掉落数 = group["副掉落数"].sum()
    总样本数 = group["样本数"].sum()
    总消耗理智 = (group["样本数"] * group["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    group["理论副掉落数"] = 总掉落数 / 总消耗理智 * group["样本数"] * group["作战理智消耗"]
    chi2, p_value = chi2_test(group["作战理智消耗"], group["副掉落数"], group["样本数"])
    records.append({
        "副掉落物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智副掉落数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })
    groups.append(group.sort_values("单位理智副掉落数量"))

df_grouped = pd.concat(groups)
df_grouped.to_csv("掉两种绿材料的活动_按副掉落分组.csv")

df_material = pd.DataFrame.from_records(records)
df_material


Unnamed: 0,副掉落物品名称,总掉落数,总样本数,总消耗理智,单位理智副掉落数量,单件期望理智,卡方检验p值
0,固源岩,13308313,15323958,191730450,0.069412,14.406819,0.9999767
1,糖,692781,1232768,14944221,0.046358,21.571349,0.04101086
2,聚酸酯,3697634,6648852,79786224,0.046344,21.577642,0.9305069
3,异铁,831955,1815163,22492563,0.036988,27.035793,0.4717177
4,酮凝集,1888641,4221472,51070821,0.036981,27.041042,0.9665484
5,装置,710439,2053106,25724670,0.027617,36.209541,1.050709e-68


In [38]:
# 掉白材料的活动

records = []
for stage_id, drop_info in stage_drop_info.items():
    if get_stage_type(stage_id) != "ACTIVITY":
        continue

    drop_info_filtered = {
        item_id: (quantity, times)
        for item_id, (quantity, times) in drop_info.items()
        if times >= TIMES_THRESHOLD and get_item_type(item_id) == "MATERIAL"}
    if len(drop_info_filtered) != 6:
        continue

    stage_name = get_stage_name(stage_id)
    zone_name = get_zone_name(get_stage_zone_id(stage_id))
    if zone_name in ZONE_BLACKLIST:
        continue
    stage_open_time = get_stage_open_time(stage_id, SERVER)
    ap_cost = get_stage_ap_cost(stage_id)

    if not set(drop_info_filtered) == {"30011", "30021", "30031", "30041", "30051", "30061"}:
        continue
    times = drop_info_filtered["30011"][1]
    assert all(times == v[1] for v in drop_info_filtered.values())

    records.append({
        "作战名称": stage_name,
        "活动名称": zone_name,
        "作战开放时间": stage_open_time,
        "作战理智消耗": ap_cost,
        "样本数": times,
        "源岩掉落数": drop_info_filtered["30011"][0],
        "源岩单件期望理智": (times * ap_cost) / drop_info_filtered["30011"][0],
        "代糖掉落数": drop_info_filtered["30021"][0],
        "代糖单件期望理智": (times * ap_cost) / drop_info_filtered["30021"][0],
        "酯原料掉落数": drop_info_filtered["30031"][0],
        "酯原料单件期望理智": (times * ap_cost) / drop_info_filtered["30031"][0],
        "异铁碎片掉落数": drop_info_filtered["30041"][0],
        "异铁碎片单件期望理智": (times * ap_cost) / drop_info_filtered["30041"][0],
        "双酮掉落数": drop_info_filtered["30051"][0],
        "双酮单件期望理智": (times * ap_cost) / drop_info_filtered["30051"][0],
        "破损装置掉落数": drop_info_filtered["30061"][0],
        "破损装置单件期望理智": (times * ap_cost) / drop_info_filtered["30061"][0],
    })

df = pd.DataFrame.from_records(records)
df.to_csv("掉白材料的活动.csv")


In [39]:
# 掉白材料的活动

item_names = ["源岩", "代糖", "酯原料", "异铁碎片", "双酮", "破损装置"]
records = []
df = df.loc[df["样本数"] > 50000]
for item_name in item_names:
    总掉落数 = df[f"{item_name}掉落数"].sum()
    总样本数 = df["样本数"].sum()
    总消耗理智 = (df["样本数"] * df["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    chi2, p_value = chi2_test(df["作战理智消耗"], df[f"{item_name}掉落数"], df["样本数"])
    records.append({
        "物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智掉落物品数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })

df_material = pd.DataFrame.from_records(records)
df_material


Unnamed: 0,物品名称,总掉落数,总样本数,总消耗理智,单位理智掉落物品数量,单件期望理智,卡方检验p值
0,源岩,975030,1563939,14075451,0.069272,14.435916,0.966294
1,代糖,649537,1563939,14075451,0.046147,21.669976,0.569755
2,酯原料,648046,1563939,14075451,0.046041,21.719833,0.82231
3,异铁碎片,517871,1563939,14075451,0.036792,27.179454,0.073427
4,双酮,516426,1563939,14075451,0.03669,27.255504,0.48819
5,破损装置,386943,1563939,14075451,0.027491,36.376032,0.559283
