In [1]:
import pandas as pd
from scipy import stats

from common import data_folder, item_names_T1, item_names_T2, item_names_T3


In [2]:
df_T3 = pd.read_csv(data_folder / "掉单一蓝材料的活动.csv", index_col=0, parse_dates=["作战开放时间"])
df_T2 = pd.read_csv(data_folder / "掉两种绿材料的活动.csv", index_col=0, parse_dates=["作战开放时间"])
df_T1 = pd.read_csv(data_folder / "掉全部白材料的活动.csv", index_col=0, parse_dates=["作战开放时间"])


In [3]:
times_threshold = 50000
stage_blacklist = []
zone_blacklist = ["崔林特尔梅之金", "覆潮之下・复刻"]


In [4]:
# 卡方检验

def chi2_test(作战理智消耗, 掉落数, 样本数):
    p = 掉落数.sum() / (样本数 * 作战理智消耗).sum()
    return stats.chisquare(掉落数, p * 样本数 * 作战理智消耗)


In [5]:
# 掉单一蓝材料的活动，按物品分组

df_T3_filtered = df_T3[~df_T3["作战名称"].isin(stage_blacklist)]
df_T3_filtered = df_T3_filtered[~df_T3_filtered["活动名称"].isin(zone_blacklist)]
df_T3_filtered = df_T3_filtered[df_T3_filtered["样本数"] >= times_threshold]

records = []
for item_name, group in sorted(df_T3_filtered.groupby("作战掉落物品名称"), key=lambda x: item_names_T3.index(x[0])):  # type: ignore
    总掉落数 = group["掉落数"].sum()
    总样本数 = group["样本数"].sum()
    总消耗理智 = (group["样本数"] * group["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    chi2, p_value = chi2_test(group["作战理智消耗"], group["掉落数"], group["样本数"])
    records.append({
        "作战掉落物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智掉落物品数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })

df_T3_material = pd.DataFrame.from_records(records)
df_T3_material


Unnamed: 0,作战掉落物品名称,总掉落数,总样本数,总消耗理智,单位理智掉落物品数量,单件期望理智,卡方检验p值
0,固源岩组,25828419,23610734,495825414,0.052092,19.196894,0.9928503
1,糖组,12368715,13066217,267292746,0.046274,21.610389,0.9995857
2,聚酸酯组,8629427,8894907,186479253,0.046276,21.609691,0.9142374
3,异铁组,20440907,26264603,551556663,0.03706,26.982984,0.8009785
4,酮凝集组,17387659,22352349,469215744,0.037057,26.985562,0.1070448
5,全新装置,5970960,10621916,215121147,0.027756,36.0279,0.05416389
6,扭转醇,19328839,21073472,440829858,0.043846,22.806846,0.9153161
7,轻锰矿,20398388,26313185,550374021,0.037063,26.981251,0.008432727
8,研磨石,17913000,27653346,556631220,0.032181,31.074148,1.395744e-16
9,RMA70-12,15119428,25952653,545005713,0.027742,36.046715,3.576746e-06


In [6]:
# 掉两种绿材料的活动，按主掉落分组

df_T2_filtered = df_T2[~df_T2["作战名称"].isin(stage_blacklist)]
df_T2_filtered = df_T2_filtered[~df_T2_filtered["活动名称"].isin(zone_blacklist)]
df_T2_filtered = df_T2_filtered[df_T2_filtered["样本数"] >= times_threshold]

records = []
for item_name, group in sorted(df_T2_filtered.groupby("主掉落物品名称"), key=lambda x: item_names_T2.index(x[0])):  # type: ignore
    总掉落数 = group["主掉落数"].sum()
    总样本数 = group["样本数"].sum()
    总消耗理智 = (group["样本数"] * group["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    chi2, p_value = chi2_test(group["作战理智消耗"], group["主掉落数"], group["样本数"])
    records.append({
        "主掉落物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智主掉落数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })

df_T2_material_main = pd.DataFrame.from_records(records)
df_T2_material_main


Unnamed: 0,主掉落物品名称,总掉落数,总样本数,总消耗理智,单位理智主掉落数量,单件期望理智,卡方检验p值
0,固源岩,18738625,11241613,134899356,0.138908,7.199,0.999551
1,糖,12370558,10532125,133631628,0.092572,10.802393,0.986317
2,聚酸酯,2749647,2475191,29702292,0.092574,10.80222,0.999773
3,异铁,528852,521069,7137924,0.07409,13.497016,0.97484
4,酮凝集,1232431,1352808,16651647,0.074013,13.511221,0.997578
5,装置,3064135,4499698,54851223,0.055863,17.901046,0.060082


In [7]:
# 掉两种绿材料的活动，按副掉落分组

records = []
for item_name, group in sorted(df_T2_filtered.groupby("副掉落物品名称"), key=lambda x: item_names_T2.index(x[0])):  # type: ignore
    总掉落数 = group["副掉落数"].sum()
    总样本数 = group["样本数"].sum()
    总消耗理智 = (group["样本数"] * group["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    chi2, p_value = chi2_test(group["作战理智消耗"], group["副掉落数"], group["样本数"])
    records.append({
        "副掉落物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智副掉落数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })

df_T2_material_sub = pd.DataFrame.from_records(records)
df_T2_material_sub


Unnamed: 0,副掉落物品名称,总掉落数,总样本数,总消耗理智,单位理智副掉落数量,单件期望理智,卡方检验p值
0,固源岩,13193804,15201463,190081635,0.069411,14.406886,0.9997183
1,糖,539023,969446,11633352,0.046334,21.582292,0.653357
2,聚酸酯,3868233,6955977,83471724,0.046342,21.578774,0.8784809
3,异铁,746458,1644163,20171646,0.037005,27.023149,0.3045945
4,酮凝集,1791529,4002730,48445917,0.03698,27.041659,0.8967066
5,装置,636769,1848725,23069796,0.027602,36.229458,4.154296e-68


In [8]:
# 掉白材料的活动

df_T1_filtered = df_T1[~df_T1["作战名称"].isin(stage_blacklist)]
df_T1_filtered = df_T1_filtered[~df_T1_filtered["活动名称"].isin(zone_blacklist)]
df_T1_filtered = df_T1_filtered[df_T1_filtered["样本数"] >= times_threshold]

records = []
for item_name in item_names_T1:
    总掉落数 = df_T1_filtered[f"{item_name}掉落数"].sum()
    总样本数 = df_T1_filtered["样本数"].sum()
    总消耗理智 = (df_T1_filtered["样本数"] * df_T1_filtered["作战理智消耗"]).sum()
    单位理智掉落物品数量 = 总掉落数 / 总消耗理智
    单件期望理智 = 1 / 单位理智掉落物品数量
    chi2, p_value = chi2_test(df_T1_filtered["作战理智消耗"], df_T1_filtered[f"{item_name}掉落数"], df_T1_filtered["样本数"])
    records.append({
        "物品名称": item_name,
        "总掉落数": 总掉落数,
        "总样本数": 总样本数,
        "总消耗理智": 总消耗理智,
        "单位理智掉落物品数量": 单位理智掉落物品数量,
        "单件期望理智": 单件期望理智,
        "卡方检验p值": p_value,
    })

df_material = pd.DataFrame.from_records(records)
df_material


Unnamed: 0,物品名称,总掉落数,总样本数,总消耗理智,单位理智掉落物品数量,单件期望理智,卡方检验p值
0,源岩,975030,1563939,14075451,0.069272,14.435916,0.966294
1,代糖,649537,1563939,14075451,0.046147,21.669976,0.569755
2,酯原料,648046,1563939,14075451,0.046041,21.719833,0.82231
3,异铁碎片,517871,1563939,14075451,0.036792,27.179454,0.073427
4,双酮,516426,1563939,14075451,0.03669,27.255504,0.48819
5,破损装置,386943,1563939,14075451,0.027491,36.376032,0.559283
