In [None]:
# 多数据集匹配
import os
import re
import numpy as np
import pandas as pd

# 正则表达式
# 定义文件夹路径
# folder_path = 'dataset/Top_random100_from500'
folder_path = 'dataset/Bottom_random100_from500'

pattern = r"^(.*?)(?=(_all_roll_ewma_span28_normalize))"

repoName = []

# 定义 Sigmoid 函数
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# 遍历文件夹中的文件
for filename in os.listdir(folder_path):
    if os.path.isfile(os.path.join(folder_path, filename)):
        match = re.search(pattern, filename)
        if match:
            prefix = match.group(1)
            repoName.append(prefix)

# # 打印结果
# print(len(repoName))

# 创建一个空的列表来存储结果
results = []

for name in repoName:
    try:
        pred = np.load(f'results/long_term_forecast_{name}_all_roll_ewma_span28_normalize_84_84_PatchTST_custom_ftM_sl84_ll84_pl84_dm512_nh2_el1_dl1_df2048_expand2_dc4_fc3_ebtimeF_dtTrue_Exp_0/pred.npy')
    except FileNotFoundError:
        print(f"未找到 {name} 的预测文件，跳过...")
        continue

    first_element = pred[0, :84, :5]

    # 计算均值
    mean_values = np.mean(first_element, axis=0)

    # 将均值通过 Sigmoid 函数进行变换
    sigmoid_values = sigmoid(mean_values)

    # 将变换后的值相加
    sum_sigmoid_values = np.sum(sigmoid_values)

    score = ((sum_sigmoid_values - 2.5) / 2.5) * 100

    # file_path = f'dataset/Top_random100_from500/{name}_all_normalize.csv'
    file_path = f'dataset/Bottom_random100_from500/{name}_all_normalize.csv'
    df = pd.read_csv(file_path)

    # 获取数据集的长度
    dataset_len = len(df)

    # 定义参数
    num_test = int(dataset_len * 0.2)  # 测试数据的数量
    pred_len = 84   # 预测长度
    seq_len = 84    # 序列长度

    # 计算数据范围
    start_index = dataset_len - num_test
    end_index = dataset_len - num_test + pred_len

    # 提取指定范围的数据
    data_range = df.iloc[start_index:end_index]
    data_range = data_range.drop(columns=['date'])

    # 计算每个属性的累计量
    cumulative_data = data_range.cumsum()

    # 计算每个属性的总变化量
    total_changes = cumulative_data.iloc[-1] - data_range.iloc[0]

    # 计算时间步长
    time_steps = len(data_range) - 1

    # 计算每个属性的平均斜率
    slopes = total_changes / time_steps

    # 计算所有属性的平均斜率
    average_slope = slopes.mean()

    # 将结果存储到字典中
    result_dict = {'name': name, 'score': score, 'average_slope': average_slope}
    for col, slope in zip(data_range.columns, slopes):
        result_dict[col] = slope

    # 将结果字典添加到列表中
    results.append(result_dict)

# 将结果列表转换为 DataFrame 并保存到 CSV 文件中
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='score', ascending=True)
results_df.to_csv('workresult/results_summary_averageSlope.csv', index=False)

print("结果已保存到 results_summary_averageSlope.csv")

In [None]:
import os

def count_subdirectories(folder_path):
    """
    统计指定文件夹下的子文件夹数目
    :param folder_path: 文件夹路径
    :return: 子文件夹数目
    """
    count = 0
    with os.scandir(folder_path) as entries:
        for entry in entries:
            if entry.is_dir():  # 只统计目录
                count += 1
    return count

# 替换为你自己的文件夹路径
folder_path = "results"

# 调用函数并输出结果
subdirectory_count = count_subdirectories(folder_path)
print(f"Subdirectories count: {subdirectory_count}")