In [35]:
import json
from datetime import datetime, timedelta, timezone
from typing import Iterable, Literal

import matplotlib.pyplot as plt
import pandas as pd
import requests
from matplotlib.figure import Figure, SubFigure
from matplotlib.ticker import LogFormatter
from scipy import stats

plt.rcParams.update({
    "font.sans-serif": "Source Han Sans CN",
    "figure.dpi": 144,
    "figure.constrained_layout.use": True,
    "savefig.bbox": "tight",
})


class LogFormatterPlain(LogFormatter):
    def __init__(self, format_str_or_function="{x:,.0f}", *args, **kwargs):
        super().__init__(*args, **kwargs)
        if isinstance(format_str_or_function, str):
            self.function = lambda x: format_str_or_function.format(x=x)
        else:
            self.function = format_str_or_function

    def _num_to_string(self, x, vmin, vmax):
        return self.function(x)


In [36]:
# 获取企鹅物流数据

item_url = "https://penguin-stats.io/PenguinStats/api/v2/items"
matrix_url = "https://penguin-stats.io/PenguinStats/api/v2/result/matrix?show_closed_zones=true"
stage_url = "https://penguin-stats.io/PenguinStats/api/v2/stages"
zone_url = "https://penguin-stats.io/PenguinStats/api/v2/zones"


def get_data(url, path):
    try:
        with open(path, "r", encoding="utf-8") as fp:
            data = json.load(fp)
    except Exception:
        with requests.get(url) as response:
            data = response.json()
        with open(path, "w", encoding="utf-8") as fp:
            json.dump(data, fp, ensure_ascii=False)
    return data

item_data = get_data(item_url, "items.json")
matrix_data = get_data(matrix_url, "matrix.json")
stage_data = get_data(stage_url, "stages.json")
zone_data = get_data(zone_url, "zones.json")


In [37]:
item_dict = {item_info["itemId"]: item_info for item_info in item_data}
stage_dict = {stage_info["stageId"]: stage_info for stage_info in stage_data}
zone_dict = {zone_info["zoneId"]: zone_info for zone_info in zone_data}


In [38]:
def get_item_type(item_id):
    return item_dict[item_id]["itemType"]


def get_item_name(item_id):
    return item_dict[item_id]["name"]


def get_item_rarity(item_id):
    return item_dict[item_id]["rarity"]


def get_item_id_by_name(item_name):
    for item_id, item_info in item_dict.items():
        if item_info["name"] == item_name:
            return item_id
    else:
        raise ValueError(f"Item name {item_name} not found.")


def get_stage_name(stage_id):
    return stage_dict[stage_id]["code"]


def get_stage_open_timestamp(stage_id, server):
    return stage_dict[stage_id]["existence"][server]["openTime"]


TIMEZONE_OFFSET_HOURS_DICT = {
    "CN": +8,
    "US": -5,
    "JP": +9,
    "KR": +9,
}


def get_timezone(server):
    return timezone(timedelta(hours=TIMEZONE_OFFSET_HOURS_DICT[server.upper()]))


def get_stage_type(stage_id):
    return stage_dict[stage_id]["stageType"]


def get_stage_zone_id(stage_id):
    return stage_dict[stage_id]["zoneId"]


def get_stage_open_time(stage_id, server):
    return datetime.fromtimestamp(get_stage_open_timestamp(stage_id, server) / 1000).astimezone(get_timezone(server))


def get_stage_ap_cost(stage_id):
    return stage_dict[stage_id]["apCost"]


def get_zone_name(zone_id):
    return zone_dict[zone_id]["zoneName"]


In [39]:
# 稀疏矩阵改为按作战存储

server = "CN"
times_threshold = 50000
stage_blacklist = []
zone_blacklist = ["崔林特尔梅之金", "覆潮之下・复刻"]

stage_drop_info = {stage_id: {} for stage_id in stage_dict}
for element in matrix_data["matrix"]:
    stage_id = element["stageId"]
    item_id = element["itemId"]
    times = element["times"]
    quantity = element["quantity"]
    assert item_id not in stage_drop_info[stage_id]
    stage_drop_info[stage_id][item_id] = (quantity, times)


In [40]:
# 卡方检验

def chi2_test(作战理智消耗, 掉落数, 样本数):
    p = 掉落数.sum() / (样本数 * 作战理智消耗).sum()
    return stats.chisquare(掉落数, p * 样本数 * 作战理智消耗)


In [41]:
# 掉单一蓝材料的活动

records = []
for stage_id, drop_info in stage_drop_info.items():
    if get_stage_type(stage_id) != "ACTIVITY":
        continue

    drop_info_filtered = {
        item_id: (quantity, times)
        for item_id, (quantity, times) in drop_info.items()
        if get_item_type(item_id) == "MATERIAL"}
    if len(drop_info_filtered) != 1:
        continue

    stage_name = get_stage_name(stage_id)
    zone_name = get_zone_name(get_stage_zone_id(stage_id))
    stage_open_time = get_stage_open_time(stage_id, server)
    ap_cost = get_stage_ap_cost(stage_id)
    (item_id, (quantity, times)), = drop_info_filtered.items()
    item_name = get_item_name(item_id)
    if get_item_rarity(item_id) != 2:
        continue

    作战掉落物品数量 = quantity / times
    单位理智掉落物品数量 = 作战掉落物品数量 / ap_cost
    单件期望理智 = 1 / 单位理智掉落物品数量

    records.append({
        "作战名称": stage_name,
        "活动名称": zone_name,
        "作战开放时间": stage_open_time,
        "作战理智消耗": ap_cost,
        "作战掉落物品名称": item_name,
        "掉落数": quantity,
        "样本数": times,
        "作战掉落物品数量": 作战掉落物品数量,
        "单位理智掉落物品数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
    })

df = pd.DataFrame.from_records(records)
df.to_csv("掉单一蓝材料的活动.csv")


In [42]:
# 蓝材料图表

# legend_position = {
#     "固源岩组": "upper right",
#     "糖组": "upper center",
#     "聚酸酯组": "upper center",
#     "异铁组": "upper center",
#     "酮凝集组": "upper right",
#     "全新装置": "upper left",
#     "扭转醇": "upper right",
#     "轻锰矿": "upper right",
#     "研磨石": "upper right",
#     "RMA70-12": "upper right",
#     "凝胶": "upper left",
#     "炽合金": "upper right",
#     "晶体元件": "upper left",
#     "半自然溶剂": "upper left",
#     "化合切削液": "upper left",
#     "转质盐组": "upper right",
#     "褐素纤维": "upper right",
#     "环烃聚质": "upper right",
# }


In [43]:
# 蓝材料图表

def plot(fig: Figure | SubFigure, item_name: str, group: pd.DataFrame):
    ax1 = fig.subplots()
    ax2 = ax1.twinx()

    left_color = "tab:blue"
    right_color = "tab:red"

    ax1.set_title(f"{item_name}")
    ax1.set_xticks(range(len(group)))
    ax1.set_xticklabels([f"{zone_name} {stage_name}" for zone_name, stage_name in group[["活动名称", "作战名称"]].to_records(index=False)], rotation=45, horizontalalignment="right")
    ax1.set_xlim(-1, len(group) - 1 + 1)
    ax1.set_ylabel("样本数", color=left_color)
    ax1.tick_params(axis="y", which="both", colors=left_color)
    ax1.set_yscale("log")
    ax1.yaxis.set_major_formatter(LogFormatterPlain())
    ax1.yaxis.set_minor_formatter(LogFormatterPlain())

    ax2.set_ylabel("单位理智掉落物品数量\n单件期望理智", color=right_color)
    ax2.tick_params(axis="y", which="both", colors=right_color)
    ax2.yaxis.set_major_formatter(lambda x, _: f"{x:.5f}\n{1/x:.4f}")

    ax1.bar(range(len(group)), group["样本数"], color=left_color, alpha=0.7, label="样本数")
    ax2.plot(range(len(group)), group["单位理智掉落物品数量"], color=right_color, label="单位理智掉落物品数量\n单件期望理智", marker="o")
    for i, x in enumerate(group["单位理智掉落物品数量"]):
        if i % 2 == 0:
            verticalalignment = "top"
            xytext = (0, -8)
        else:
            verticalalignment = "bottom"
            xytext = (0, 8)
        ax2.annotate(f"{x:.5f}\n{1/x:.4f}", (i, x),
                     horizontalalignment="center", verticalalignment=verticalalignment, fontsize=9, textcoords="offset points", xytext=xytext,
                     bbox=dict(boxstyle="round,pad=0.2", edgecolor="none", facecolor="white", alpha=0.7))
    fig.legend(loc="lower center", bbox_to_anchor=(1, 1), bbox_transform=ax1.transAxes)


grouped = sorted(df.groupby("作战掉落物品名称"), key=lambda x: get_item_id_by_name(x[0]))

# 大图
fig = plt.figure(figsize=(12, 18 * 7))
subfigs: Iterable[SubFigure] = fig.subfigures(18, 1)  # type: ignore
fig.suptitle("SideStory历史掉率（掉单一蓝材料的作战）", verticalalignment="bottom")
for subfig, (item_name, group) in zip(subfigs, grouped):
    plot(subfig, item_name, group)  # type: ignore
fig.savefig("蓝材料掉落图表.png")
plt.close(fig)

# 单图
for item_name, group in grouped:
    fig = plt.figure(figsize=(12, 7))
    plot(fig, item_name, group)  # type: ignore
    fig.savefig(f"蓝材料图表/{item_name}.png")
    plt.close(fig)


In [44]:
# 掉单一蓝材料的活动，按物品分组

df = df[~df["作战名称"].isin(stage_blacklist)]
df = df[~df["活动名称"].isin(zone_blacklist)]
df = df[df["样本数"] >= times_threshold]

records = []
for item_name, group in sorted(df.groupby("作战掉落物品名称"), key=lambda x: get_item_id_by_name(x[0])):
    总掉落数 = group["掉落数"].sum()
    总样本数 = group["样本数"].sum()
    总消耗理智 = (group["样本数"] * group["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    group["理论掉落数"] = 总掉落数 / 总消耗理智 * group["样本数"] * group["作战理智消耗"]
    chi2, p_value = chi2_test(group["作战理智消耗"], group["掉落数"], group["样本数"])
    records.append({
        "作战掉落物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智掉落物品数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })

df_material = pd.DataFrame.from_records(records)
df_material


Unnamed: 0,作战掉落物品名称,总掉落数,总样本数,总消耗理智,单位理智掉落物品数量,单件期望理智,卡方检验p值
0,固源岩组,25828419,23610734,495825414,0.052092,19.196894,0.9928503
1,糖组,12368715,13066217,267292746,0.046274,21.610389,0.9995857
2,聚酸酯组,8629427,8894907,186479253,0.046276,21.609691,0.9142374
3,异铁组,20440907,26264603,551556663,0.03706,26.982984,0.8009785
4,酮凝集组,17387659,22352349,469215744,0.037057,26.985562,0.1070448
5,全新装置,5970960,10621916,215121147,0.027756,36.0279,0.05416389
6,扭转醇,19328839,21073472,440829858,0.043846,22.806846,0.9153161
7,轻锰矿,20398388,26313185,550374021,0.037063,26.981251,0.008432727
8,研磨石,17913000,27653346,556631220,0.032181,31.074148,1.395744e-16
9,RMA70-12,15119428,25952653,545005713,0.027742,36.046715,3.576746e-06


In [45]:
# 掉两种绿材料的活动

def sort_key(drop_info_item):
    byproduct_weight = {
        "30012": 15,
        "30022": 10,
        "30032": 10,
        "30042": 8,
        "30052": 8,
        "30062": 6,
    }
    item_id, (quantity, times) = drop_info_item
    return quantity / byproduct_weight.get(item_id, 1)


records = []
for stage_id, drop_info in stage_drop_info.items():
    if get_stage_type(stage_id) != "ACTIVITY":
        continue

    drop_info_filtered = {
        item_id: (quantity, times)
        for item_id, (quantity, times) in drop_info.items()
        if get_item_type(item_id) == "MATERIAL"}
    if len(drop_info_filtered) != 2:
        continue

    drop_info_filtered_items = list(drop_info_filtered.items())
    if get_item_rarity(drop_info_filtered_items[0][0]) != 1 or get_item_rarity(drop_info_filtered_items[1][0]) != 1:
        continue

    stage_name = get_stage_name(stage_id)
    zone_name = get_zone_name(get_stage_zone_id(stage_id))
    stage_open_time = get_stage_open_time(stage_id, server)
    ap_cost = get_stage_ap_cost(stage_id)
    (item_id_0, (quantity_0, times_0)), (item_id_1, (quantity_1, times_1)) = sorted(
        drop_info_filtered_items, key=sort_key, reverse=True)
    item_name_0 = get_item_name(item_id_0)
    item_name_1 = get_item_name(item_id_1)
    assert times_0 == times_1

    单次作战主掉落数量 = quantity_0 / times_0
    单位理智主掉落数量 = 单次作战主掉落数量 / ap_cost
    主掉落单件期望理智 = 1 / 单位理智主掉落数量
    单次作战副掉落数量 = quantity_1 / times_1
    单位理智副掉落数量 = 单次作战副掉落数量 / ap_cost
    副掉落单件期望理智 = 1 / 单位理智副掉落数量

    records.append({
        "作战名称": stage_name,
        "活动名称": zone_name,
        "作战开放时间": stage_open_time,
        "作战理智消耗": ap_cost,
        "样本数": times_0,
        "主掉落物品名称": item_name_0,
        "主掉落数": quantity_0,
        "单次作战主掉落数量": 单次作战主掉落数量,
        "单位理智主掉落数量": 单位理智主掉落数量,
        "主掉落单件期望理智": 主掉落单件期望理智,
        "副掉落物品名称": item_name_1,
        "副掉落数": quantity_1,
        "单次作战副掉落数量": 单次作战副掉落数量,
        "单位理智副掉落数量": 单位理智副掉落数量,
        "副掉落单件期望理智": 副掉落单件期望理智,
    })

df = pd.DataFrame.from_records(records)
df.to_csv("掉两种绿材料的活动.csv")


In [46]:
# 绿材料图表

def plot(fig: Figure | SubFigure, item_name: str, group: pd.DataFrame, drop_type: Literal["主", "副"]):
    ax1 = fig.add_subplot()
    ax2 = ax1.twinx()

    left_color = "tab:blue"
    right_color = "tab:red"

    ax1.set_title(f"{item_name}（作为{drop_type}掉落）")
    ax1.set_xticks(range(len(group)))
    ax1.set_xticklabels([f"{zone_name} {stage_name}" for zone_name, stage_name in group[["活动名称", "作战名称"]].to_records(index=False)], rotation=45, horizontalalignment="right")
    ax1.set_xlim(-1, len(group) - 1 + 1)
    ax1.set_ylabel("样本数", color=left_color)
    ax1.tick_params(axis="y", which="both", colors=left_color)
    ax1.set_yscale("log")
    ax1.yaxis.set_major_formatter(LogFormatterPlain())
    ax1.yaxis.set_minor_formatter(LogFormatterPlain())

    ax2.set_ylabel(f"单位理智{drop_type}掉落数量\n单件期望理智", color=right_color)
    ax2.tick_params(axis="y", which="both", colors=right_color)
    ax2.yaxis.set_major_formatter(lambda x, _: f"{x:.5f}\n{1/x:.4f}")

    ax1.bar(range(len(group)), group["样本数"], color=left_color, alpha=0.7, label="样本数")
    ax2.plot(range(len(group)), group[f"单位理智{drop_type}掉落数量"], color=right_color, label=f"单位理智{drop_type}掉落数量\n单件期望理智", marker="o")
    for i, x in enumerate(group[f"单位理智{drop_type}掉落数量"]):
        if i % 2 == 0:
            verticalalignment = "top"
            xytext = (0, -8)
        else:
            verticalalignment = "bottom"
            xytext = (0, 8)
        ax2.annotate(f"{x:.5f}\n{1/x:.4f}", (i, x),
                     horizontalalignment="center", verticalalignment=verticalalignment, fontsize=7, textcoords="offset points", xytext=xytext,
                     bbox=dict(boxstyle="round,pad=0.2", edgecolor="none", facecolor="white", alpha=0.5))
    fig.legend(loc="lower center", bbox_to_anchor=(1, 1), bbox_transform=ax1.transAxes)


grouped = [
    *(("主", item_name, group) for item_name, group in sorted(df.groupby("主掉落物品名称"), key=lambda x: get_item_id_by_name(x[0]))),
    *(("副", item_name, group) for item_name, group in sorted(df.groupby("副掉落物品名称"), key=lambda x: get_item_id_by_name(x[0])))
]

# 大图
fig = plt.figure(figsize=(2 * 15, 6 * 7))
subfigs: Iterable[SubFigure] = fig.subfigures(6, 2).transpose().reshape(-1)  # type: ignore
fig.suptitle("SideStory历史掉率（掉两种绿材料的作战）", verticalalignment="bottom")
for subfig, (drop_type, item_name, group) in zip(subfigs, grouped):
    plot(subfig, item_name, group, drop_type)  # type: ignore
fig.savefig("绿材料掉落图表.png")
plt.close(fig)

# 单图
for drop_type, item_name, group in grouped:
    fig = plt.figure(figsize=(12, 7))
    plot(fig, item_name, group, drop_type)  # type: ignore
    fig.savefig(f"绿材料{drop_type}掉落图表/{item_name}.png")
    plt.close(fig)


In [47]:
# 掉两种绿材料的活动，按主掉落分组

df = df[~df["作战名称"].isin(stage_blacklist)]
df = df[~df["活动名称"].isin(zone_blacklist)]
df = df[df["样本数"] >= times_threshold]

records = []
for item_name, group in sorted(df.groupby("主掉落物品名称"), key=lambda x: get_item_id_by_name(x[0])):
    总掉落数 = group["主掉落数"].sum()
    总样本数 = group["样本数"].sum()
    总消耗理智 = (group["样本数"] * group["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    group["理论主掉落数"] = 总掉落数 / 总消耗理智 * group["样本数"] * group["作战理智消耗"]
    chi2, p_value = chi2_test(group["作战理智消耗"], group["主掉落数"], group["样本数"])
    records.append({
        "主掉落物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智主掉落数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })

df_material = pd.DataFrame.from_records(records)
df_material


Unnamed: 0,主掉落物品名称,总掉落数,总样本数,总消耗理智,单位理智主掉落数量,单件期望理智,卡方检验p值
0,固源岩,18738625,11241613,134899356,0.138908,7.199,0.999551
1,糖,12370558,10532125,133631628,0.092572,10.802393,0.986317
2,聚酸酯,2749647,2475191,29702292,0.092574,10.80222,0.999773
3,异铁,528852,521069,7137924,0.07409,13.497016,0.97484
4,酮凝集,1232431,1352808,16651647,0.074013,13.511221,0.997578
5,装置,3064135,4499698,54851223,0.055863,17.901046,0.060082


In [48]:
# 掉两种绿材料的活动，按副掉落分组

df = df[~df["作战名称"].isin(stage_blacklist)]
df = df[~df["活动名称"].isin(zone_blacklist)]
df = df[df["样本数"] >= times_threshold]

records = []
for item_name, group in sorted(df.groupby("副掉落物品名称"), key=lambda x: get_item_id_by_name(x[0])):
    总掉落数 = group["副掉落数"].sum()
    总样本数 = group["样本数"].sum()
    总消耗理智 = (group["样本数"] * group["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    group["理论副掉落数"] = 总掉落数 / 总消耗理智 * group["样本数"] * group["作战理智消耗"]
    chi2, p_value = chi2_test(group["作战理智消耗"], group["副掉落数"], group["样本数"])
    records.append({
        "副掉落物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智副掉落数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })

df_material = pd.DataFrame.from_records(records)
df_material


Unnamed: 0,副掉落物品名称,总掉落数,总样本数,总消耗理智,单位理智副掉落数量,单件期望理智,卡方检验p值
0,固源岩,13193804,15201463,190081635,0.069411,14.406886,0.9997183
1,糖,539023,969446,11633352,0.046334,21.582292,0.653357
2,聚酸酯,3868233,6955977,83471724,0.046342,21.578774,0.8784809
3,异铁,746458,1644163,20171646,0.037005,27.023149,0.3045945
4,酮凝集,1791529,4002730,48445917,0.03698,27.041659,0.8967066
5,装置,636769,1848725,23069796,0.027602,36.229458,4.154296e-68


In [49]:
# 掉白材料的活动

item_names = ["源岩", "代糖", "酯原料", "异铁碎片", "双酮", "破损装置"]
records = []
for stage_id, drop_info in stage_drop_info.items():
    if get_stage_type(stage_id) != "ACTIVITY":
        continue

    drop_info_filtered = {
        item_id: (quantity, times)
        for item_id, (quantity, times) in drop_info.items()
        if get_item_type(item_id) == "MATERIAL"}
    if len(drop_info_filtered) != 6:
        continue

    stage_name = get_stage_name(stage_id)
    zone_name = get_zone_name(get_stage_zone_id(stage_id))
    stage_open_time = get_stage_open_time(stage_id, server)
    ap_cost = get_stage_ap_cost(stage_id)

    if not set(drop_info_filtered) == {"30011", "30021", "30031", "30041", "30051", "30061"}:
        continue
    times = drop_info_filtered["30011"][1]
    assert all(times == v[1] for v in drop_info_filtered.values())

    records.append({
        "作战名称": stage_name,
        "活动名称": zone_name,
        "作战开放时间": stage_open_time,
        "作战理智消耗": ap_cost,
        "样本数": times,
        "源岩掉落数": drop_info_filtered["30011"][0],
        "源岩单件期望理智": (times * ap_cost) / drop_info_filtered["30011"][0],
        "代糖掉落数": drop_info_filtered["30021"][0],
        "代糖单件期望理智": (times * ap_cost) / drop_info_filtered["30021"][0],
        "酯原料掉落数": drop_info_filtered["30031"][0],
        "酯原料单件期望理智": (times * ap_cost) / drop_info_filtered["30031"][0],
        "异铁碎片掉落数": drop_info_filtered["30041"][0],
        "异铁碎片单件期望理智": (times * ap_cost) / drop_info_filtered["30041"][0],
        "双酮掉落数": drop_info_filtered["30051"][0],
        "双酮单件期望理智": (times * ap_cost) / drop_info_filtered["30051"][0],
        "破损装置掉落数": drop_info_filtered["30061"][0],
        "破损装置单件期望理智": (times * ap_cost) / drop_info_filtered["30061"][0],
    })

df = pd.DataFrame.from_records(records)
df.to_csv("掉白材料的活动.csv")


In [50]:
fig = plt.figure(figsize=(40, 7))
ax1 = fig.add_subplot()
ax2 = ax1.twinx()

left_color = "black"

ax1.set_title(f"SideStory历史掉率（掉全部白材料的作战）")
ax1.set_xticks(range(len(df)))
ax1.set_xticklabels([f"{zone_name} {stage_name}" for zone_name, stage_name in df[["活动名称", "作战名称"]].to_records(index=False)], rotation=45, horizontalalignment="right")
ax1.set_xlim(-1, len(df) - 1 + 1)
ax1.set_ylabel("样本数", color=left_color)
ax1.tick_params(axis="y", which="both", colors=left_color)
ax1.set_yscale("log")
ax1.yaxis.set_major_formatter(LogFormatterPlain())
ax1.yaxis.set_minor_formatter(LogFormatterPlain())

ax2.set_ylabel("单位理智掉落物品数量\n单件期望理智")
ax2.yaxis.set_major_formatter(lambda x, _: f"{x:.2f}\n{1/x:.2f}" if x != 0 else f"0.00\n+∞")
ax2.set_ylim(0, 0.08)
ax2.yaxis.minorticks_on()
ax2.grid(axis="y", which="major")
ax2.grid(axis="y", which="minor", linewidth=0.5)

bar = ax1.bar(range(len(df)), df["样本数"], color=left_color, alpha=0.3, label="样本数")
for i, item_name in enumerate(item_names):
    markers = ["o", "s", "D", "^", "v", "P"]
    ax2.plot(range(len(df)), 1 / df[f"{item_name}单件期望理智"], label=f"{item_name}单位理智掉落物品数量\n{item_name}单件期望理智", marker=markers[i])
fig.legend(loc="lower right", ncols=7, bbox_to_anchor=(1, 1), bbox_transform=ax1.transAxes)

fig.savefig(f"白材料掉落图表.png")
plt.close(fig)


In [51]:
# 掉白材料的活动

df = df[~df["作战名称"].isin(stage_blacklist)]
df = df[~df["活动名称"].isin(zone_blacklist)]
df = df[df["样本数"] >= times_threshold]

records = []
for item_name in item_names:
    总掉落数 = df[f"{item_name}掉落数"].sum()
    总样本数 = df["样本数"].sum()
    总消耗理智 = (df["样本数"] * df["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    chi2, p_value = chi2_test(df["作战理智消耗"], df[f"{item_name}掉落数"], df["样本数"])
    records.append({
        "物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智掉落物品数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })

df_material = pd.DataFrame.from_records(records)
df_material


Unnamed: 0,物品名称,总掉落数,总样本数,总消耗理智,单位理智掉落物品数量,单件期望理智,卡方检验p值
0,源岩,975030,1563939,14075451,0.069272,14.435916,0.966294
1,代糖,649537,1563939,14075451,0.046147,21.669976,0.569755
2,酯原料,648046,1563939,14075451,0.046041,21.719833,0.82231
3,异铁碎片,517871,1563939,14075451,0.036792,27.179454,0.073427
4,双酮,516426,1563939,14075451,0.03669,27.255504,0.48819
5,破损装置,386943,1563939,14075451,0.027491,36.376032,0.559283
