In [None]:
###Q1.Step1.按照年份分开，以防出现只遍历一次场次的情况

In [None]:
import pandas as pd

# 读取已整理好的CSV文件
df = pd.read_csv('/Users/Keanu/Desktop/all_sorted_cleaned.csv')

# 将“日期”列转换为日期类型
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='coerce')

# 按照年份切分数据
for year, data in df.groupby(df['日期'].dt.year):
    output_path = f'/Users/Keanu/Desktop/all_sorted_{year}.csv'
    data.to_csv(output_path, index=False)
    print(f"年份 {year} 的数据已保存为 {output_path}")


In [None]:
###Step2.这里再把几个合起来

In [None]:
import pandas as pd

# 提取每场中“分”最高的数据并合并所有年份的结果
def extract_and_merge_max_scores(years, input_dir, output_file):
    df_list = []
    for year in years:
        input_path = f'{input_dir}/all_sorted_{year}.csv'
        # 读取CSV文件
        df = pd.read_csv(input_path)

        # 将“分”列转换为数值类型
        df['分'] = pd.to_numeric(df['分'], errors='coerce')

        # 提取每一“場”中“分”最高的数据，保留所有最高分的记录
        df_max_score_per_event = df[df['分'] == df.groupby('場')['分'].transform('max')]
        df_list.append(df_max_score_per_event)

        print(f"{year}年每一場中分数最高的数据已提取")

    # 合并所有年份的数据框
    df_merged = pd.concat(df_list, ignore_index=True)

    # 保存合并后的文件
    df_merged.to_csv(output_file, index=False)
    print(f"所有文件已合并并保存为 {output_file}")

# 设置输入和输出参数
years = ['2021', '2022', '2023', '2024']
input_directory = '/Users/Keanu/Desktop'
output_filepath = '/Users/Keanu/Desktop/merged_max_score_events.csv'

# 执行提取和合并操作
extract_and_merge_max_scores(years, input_directory, output_filepath)


In [None]:
###Step3.再次清洗数据，相同评分的不予考虑

In [None]:
import pandas as pd

# 读取数据集
file_path = '/Users/Keanu/Desktop/merged_max_score_events.csv'
df = pd.read_csv(file_path)

# 删除日期、場、分相同的重复行，保留第一条出现的记录
df_cleaned = df.drop_duplicates(subset=['日期', '場', '分'], keep='first')

# 保存清洗后的数据集
output_path = '/Users/Keanu/Desktop/cleaned_max_score_events.csv'
df_cleaned.to_csv(output_path, index=False)

print(f"清洗后的数据集已保存为 {output_path}")


In [None]:
###Step4.计算比例

In [None]:
import pandas as pd

# 读取合并后的文件
df = pd.read_csv('/Users/Keanu/Desktop/cleaned_max_score_events.csv')

# 提取“名”是1的数据
df_rank_1 = df[df['名'] == 1]

# 计算“名”是1的数据的条数占总体的比例
rank_1_count = len(df_rank_1)
total_count = len(df)
rank_1_ratio = rank_1_count / total_count

# 保存提取出的“名”是1的数据到新的CSV文件
df_rank_1.to_csv('/Users/Keanu/Desktop/rank_1_data.csv', index=False)

# 输出结果
print(f"名为1的数据条数: {rank_1_count}")
print(f"名为1的数据占总体的比例: {rank_1_ratio:.2%}")

In [None]:
##Step5.可视化（饼状图）

In [None]:
# 所有名次分布

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# ... (Your existing code for data processing) ...

# 读取包含比例数据的 CSV 文件
df = pd.read_csv('/Users/Keanu/Desktop/cleaned_max_score_events.csv')

# 计算每个名次的出现次数
rank_counts = df['名'].value_counts()

# 创建颜色映射
colors = cm.get_cmap('viridis', len(rank_counts))  # 使用viridis颜色映射，可以替换成其他颜色映射

# 创建饼图
plt.figure(figsize=(8, 8))
wedges, texts, autotexts = plt.pie(rank_counts, labels=rank_counts.index, autopct='%1.1f%%', startangle=90, colors=[colors(i) for i in range(len(rank_counts))])

# 添加图例，并调整图例位置
plt.legend(wedges, rank_counts.index, title="Ranking", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))

# 调整标签字体大小和颜色
for autotext in autotexts:
    autotext.set_fontsize(10)
    autotext.set_color('white')


plt.title('Ranking Distribution')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

# 保存图片
plt.savefig('ranking_pie_chart.png', bbox_inches='tight')

plt.show()

In [None]:
# 第一名占比

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the data
df = pd.read_csv('/Users/Keanu/Desktop/cleaned_max_score_events.csv')

# Calculate the proportion of rank 1
df_rank_1 = df[df['名'] == 1]
rank_1_count = len(df_rank_1)
total_count = len(df)
rank_1_ratio = rank_1_count / total_count

# Create the visualization
labels = ['Rank 1', 'Other Ranks']
sizes = [rank_1_ratio, 1 - rank_1_ratio]
colors = ['lightcoral', 'lightskyblue']
explode = (0.1, 0)  # explode 1st slice

plt.figure(figsize=(6, 6))  # Adjust figure size
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct='%1.1f%%', shadow=True, startangle=140)
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title('Proportion of Rank 1')
plt.show()