In [2]:
import pandas as pd
import numpy as np

# 读取CSV文件
file_path = 'Processed_Data/overall/ETH_clustered.csv' 
df = pd.read_csv(file_path)

# 初始化一个字典来存储相同Cluster组的第六个Change数据
cluster_groups = {}

# 遍历数据集，每组取五个数据点
for i in range(0, len(df) - 5):  # 减去5确保至少有六个数据点用于最后一组
    # 前五个数据来自Cluster列
    cluster_group = tuple(df['cluster1'].iloc[i:i+5])
    
    # 第六个数据来自Change列
    sixth_data_change = df['Change'].iloc[i+5]
    
    # 检查这个Cluster组是否已经在字典中
    if cluster_group in cluster_groups:
        # 如果在，将第六个Change数据加入到对应的列表中
        cluster_groups[cluster_group]['changes'].append(sixth_data_change)
        if sixth_data_change > 0:
            cluster_groups[cluster_group]['positive_count'] += 1
    else:
        # 如果不在，创建一个新的键，并将第六个Change数据作为一个列表的元素
        cluster_groups[cluster_group] = {'changes': [sixth_data_change], 'positive_count': 0}
        if sixth_data_change > 0:
            cluster_groups[cluster_group]['positive_count'] = 1

# 计算每个Cluster组第六个Change数据的平均值和正数百分比
results = {}
for cluster_group, data in cluster_groups.items():
    average_change = np.mean(data['changes'])
    positive_percentage = (data['positive_count'] / len(data['changes'])) * 100
    results[cluster_group] = (average_change, positive_percentage, len(data['changes']))

# 过滤count<10的集群组，并按照Positive Change Percentage从大到小排列
filtered_results = {k: v for k, v in results.items() if v[2] >= 10}
sorted_results = sorted(filtered_results.items(), key=lambda x: x[1][1], reverse=True)

# 打印每个Cluster组及其第六个Change数据的平均值、正数百分比和数据点数量
for cluster_group, (average_change, positive_percentage, data_count) in sorted_results:
    print(f"Cluster Group: {cluster_group}, Average Sixth Data Change: {average_change:.2f}, Positive Change Percentage: {positive_percentage:.2f}%, Data Count: {data_count}")


Cluster Group: (0, 0, 2, 1, 2), Average Sixth Data Change: 0.81, Positive Change Percentage: 90.00%, Data Count: 10
Cluster Group: (1, 2, 2, 1, 2), Average Sixth Data Change: 0.51, Positive Change Percentage: 80.00%, Data Count: 10
Cluster Group: (1, 0, 1, 2, 2), Average Sixth Data Change: 0.55, Positive Change Percentage: 76.92%, Data Count: 13
Cluster Group: (1, 1, 2, 0, 2), Average Sixth Data Change: 0.24, Positive Change Percentage: 75.00%, Data Count: 16
Cluster Group: (2, 1, 2, 1, 2), Average Sixth Data Change: 0.25, Positive Change Percentage: 75.00%, Data Count: 12
Cluster Group: (2, 1, 0, 1, 2), Average Sixth Data Change: 0.35, Positive Change Percentage: 75.00%, Data Count: 12
Cluster Group: (1, 2, 2, 2, 1), Average Sixth Data Change: 0.29, Positive Change Percentage: 72.73%, Data Count: 11
Cluster Group: (1, 0, 2, 0, 1), Average Sixth Data Change: 0.51, Positive Change Percentage: 72.22%, Data Count: 18
Cluster Group: (0, 2, 1, 2, 1), Average Sixth Data Change: 0.22, Positiv