In [None]:
import os
import pickle
import re

from collections import defaultdict

import pandas as pd
import matplotlib.pyplot as plt

### 全局变量

In [None]:
START_YEAR = 2024
START_MONTH = 4
END_YEAR = 2024
END_MONTH = 4
DATA_FOLDER_PATH = '..\data'


In [None]:
# 设置 Matplotlib 的字体
plt.rcParams['font.family'] = 'SimHei'  # 设置字体为 SimHei
plt.rcParams['axes.unicode_minus'] = False  # 正确显示负号


### File I/O

In [None]:
def unpickle(file):
    """
    Unpickle file
    :param file: file path

    :return: dict
    """
    if not os.path.exists(file):
        return {}
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [None]:
def load_and_merge_data(start_year, start_month, end_year, end_month):
    """
    Load and merge equity data from specified period into a single DataFrame.

    :param start_year: int, start year
    :param start_month: int, start month
    :param end_year: int, end year
    :param end_month: int, end month

    :return: DataFrame
    """
    all_equity_data = []
    for year in range(start_year, end_year + 1):
        for month in range(start_month, end_month + 1):
            # 构建文件名
            filename = f"momozhen_{year}_{month}.pkl"
            file_path = os.path.join(DATA_FOLDER_PATH, filename)
            data = unpickle(file_path)
            if data and "equity_data" in data:
                all_equity_data.extend(data["equity_data"])

    # 合并所有股权数据到一个 DataFrame
    if all_equity_data:
        return pd.DataFrame(all_equity_data)
    else:
        return pd.DataFrame()
    

### Helper functions

In [None]:
def plot_attribute_percentages(percentages, attribute_name, min_percentage=50, max_percentage=150):
    """
    显示给定属性的百分比值的频率图
    """
    if attribute_name not in percentages:
        print(f"No data available for {attribute_name}")
        return
    data = percentages[attribute_name]
    x = list(data.keys())
    y = list(data.values())

    plt.figure(figsize=(10, 6))
    plt.hist(x, bins=range(min_percentage, max_percentage+1), weights=y, color='skyblue') # 属性正常范围应该为 50 - 150%
    plt.xlabel('Percentage')
    plt.ylabel('Frequency')
    plt.title(f'{attribute_name}')
    plt.grid(axis='y')
    plt.show()

In [None]:
def plot_attribute_percentages_all(percentages, min_percentage=50, max_percentage=150):
    """
    将所有属性的加成百分比转换为一张百分比频率图
    """
    data = defaultdict(int)
    for attribute_name, attribute_data in percentages.items():
        for percentage, count in attribute_data.items():
            data[percentage] += count

    x = list(data.keys())
    y = list(data.values())

    plt.figure(figsize=(10, 6))
    plt.hist(x, bins=range(min_percentage, max_percentage+1), weights=y, color='skyblue') # 属性正常范围应该为 50 - 150%
    plt.xlabel('Percentage')
    plt.ylabel('Frequency')
    plt.title('All attributes')
    plt.grid(axis='y')
    plt.show()

In [None]:
def plot_level(df, min_level=1, max_level=300):
    """
    显示 装备 等级的频率图 (histogram)

    :param df: DataFrame
    """
    x = df['level']

    plt.figure(figsize=(10, 6))
    plt.hist(x, bins=range(min_level, max_level+1), color='skyblue')
    plt.xlabel('Level')
    plt.ylabel('Frequency')
    plt.title(f'等级分布 ({min_level} - {max_level})')
    plt.grid(axis='y')
    plt.show()

### 数据分析

In [None]:
# 使用函数加载和合并数据
df = load_and_merge_data(START_YEAR, START_MONTH, END_YEAR, END_MONTH)

In [None]:
# 为 df 添加 df['mysterious'] 列, 依照 attributes list 中只要一项存在字符 '神秘属性'
df['mysterious'] = df['attributes'].apply(lambda x: any('神秘属性' in s for s in x))

In [None]:
# attributes 的值 例子: `['生命偷取 +19.4% (79%)', '伤害反弹 +8.8% (60%)', '附加物防 +178.2 (81%)', '附加魔防 +220 (100%)', '[神秘属性]专属强化命的“先兆感知”，攻击满血满盾的对手会恢复先兆感知。']`

# 从 attributes 中提取每项属性的加成百分比 (先检查是否存在 '神秘属性' 词条, 如果存在则不提取, 单独统计出现次数)
mysterious_count = 0
attr_perc_dict = defaultdict(lambda: defaultdict(int))

attribute_pattern = re.compile(r'([^\[]+)\s+\+(\d+\.?\d*)\s+\((\d+)%\)')


In [None]:
for attributes in df['attributes']:
	for attribute in attributes:
		if '神秘属性' in attribute:
			mysterious_count += 1
		else:
			match = attribute_pattern.match(attribute)
			if match:
				attr_name = match.group(1).strip()
				percent = int(match.group(3))
				attr_perc_dict[attr_name][percent] += 1

In [None]:
print(f"神秘属性出现次数: {mysterious_count}")

In [None]:
plot_attribute_percentages(attr_perc_dict, '附加物防')

In [None]:
plot_attribute_percentages_all(attr_perc_dict)

In [None]:
min_level = df['level'].min()
max_level = df['level'].max()
plot_level(df, min_level, max_level)


In [None]:
# 将装备按照有无神秘分成两堆
mysterious_df = df[df['mysterious'] == True]
non_mysterious_df = df[df['mysterious'] == False]