In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import re

In [2]:
arg_component_balance=False
arg_add_new_dataset=False
arg_add_transformer=False

In [3]:
result_path = './meta/results'
datasets = ['ETTh1', 'ETTh2', 'ETTm1', 'ETTm2', 'ili', 'weather', 'ECL', 'Exchange', 'traffic']
if arg_add_new_dataset:
    datasets.extend(['covid-19', 'fred-md'])

pred_len_1, pred_len_2 = 96, 24
file_list = [f'{dataset}-component_balance_{arg_component_balance}-add_new_dataset_{arg_add_new_dataset}-add_transformer_{arg_add_transformer}_{pred_len_1}_{pred_len_2}.npz' for dataset in datasets]

baselines = [
 'iTransformer',
 'MambaSimple',
 'PatchTST',
 'TimeMixer',
 'Koopa',
 'SegRNN',
 'DLinear',
 'FEDformer',
 'Crossformer',
 'Informer',
 'PAttn',
 'ETSformer',
 'TimeXer',
 'LightTS',
 'Nonstationary',
 'TiDE',
 'Pyraformer',
 'MICN',
 'Transformer',
 'FiLM',
 'TimesNet',
 'TSMixer',
 'Autoformer',
 'Reformer',
 'DUET']
print(len(file_list))

9


In [4]:
# sota performance
def search_sota_performance(dataset, pred_lens=[96],
                            path_old='./results', 
                            path='./results_long_term_forecasting/results'):
    result_dict = {}
    for pred_len in pred_lens:
        model_list_old = os.listdir(path_old)
        model_list_old = [_ for _ in model_list_old if f'pl{pred_len}' in _ and dataset in _]

        try:
            model_list_new = os.listdir(os.path.join(path, dataset))
            model_list_new = [_ for _ in model_list_new if f'pl{pred_len}' in _]
        except:
            model_list_new = []
        model_list = model_list_old + model_list_new

        result_dict[pred_len] = {}
        for model in model_list:
            try:
                result = np.load(os.path.join(path_old, model, 'metrics.npy'), allow_pickle=True)
            except:
                result = np.load(os.path.join(path, dataset, model, 'metrics.npy'), allow_pickle=True)
            result_dict[pred_len][model] = result[1]

    df = pd.DataFrame.from_dict(result_dict[pred_lens[0]], orient='index')
    if not df.empty:
        df.columns = ['mse']
        df = df.sort_values(by='mse')
        df.index = [_.split('_')[1] if 'LTF' in _ or 'STF' in _ else _.split('_')[6] for _ in df.index]

    return df

In [None]:
# TSGym vs best sota
dfs_rank = []
for pred_len_1, pred_len_2 in zip([96, 192, 336, 720], [24, 36, 48, 60]):
    file_list = [f'{dataset}-component_balance_{arg_component_balance}-add_new_dataset_{arg_add_new_dataset}-add_transformer_{arg_add_transformer}_{pred_len_1}_{pred_len_2}.npz' for dataset in datasets]
    dfs = []
    for i, file in enumerate(file_list):
        # dataset = file.split('-')[0]
        dataset = file[:re.search('-component', file).start()]

        if dataset in ['ili', 'covid-19', 'fred-md']:
            df = search_sota_performance(dataset, pred_lens=[pred_len_2])
        else:
            df = search_sota_performance(dataset, pred_lens=[pred_len_1])

        if df.empty:
            break
        # 添加图例和标题
        perf_epoch = np.load(os.path.join(result_path, file), allow_pickle=True)
        top1_perf_epoch = perf_epoch['top1_perf_epoch']

        df.loc['TSGym'] = top1_perf_epoch[perf_epoch['best_epoch'].item()]
        df.dropna(inplace=True)
        df = df.sort_values(by='mse')
        df = df.reset_index()
        df.columns = ['model', dataset]
        # todo: 有重复, 并且跑出来结果还不同
        df = df.drop_duplicates(subset='model', keep='first')
        df = df.set_index('model')
        dfs.append(df)

    if df.empty:
        continue
    # model_names = set.intersection(*map(set, [_.index.tolist() for _ in dfs]))
    for i, df in enumerate(dfs):
        # df = df[[_ in model_names for _ in df.index]]
        dfs[i] = df.sort_values(by=df.columns[0])

    ranks = {k: [] for k in baselines}
    ranks['TSGym'] = []
    for df in dfs:
        for baseline in baselines:
            if len(np.where(df.index == baseline)[0]) > 0:
                ranks[baseline].append((np.where(df.index == baseline)[0] + 1).item())
        ranks['TSGym'].append((np.where(df.index == 'TSGym')[0] + 1).item())

    dfs = pd.concat(dfs, axis=1)
    dfs = dfs.round(4)
    dfs.index = dfs.index.str.replace('TemporalFusionTransformer', 'TFT')
    # dfs.to_excel(f'./meta/perf_component_balance_{arg_component_balance}-add_new_dataset_{arg_add_new_dataset}-add_transformer_{arg_add_transformer}_{pred_len_1}_{pred_len_2}.xlsx', index=True)
    
    df_rank = pd.Series({k: np.mean(v) for k,v in ranks.items() if len(v) > 0})
    df_rank = df_rank.sort_values()
    dfs_rank.append(df_rank)

# dfs_rank = pd.concat(dfs_rank, axis=1)
# dfs_rank.columns = [str(_) for _ in [96, 192, 336, 720]]
# dfs_rank.index = dfs_rank.index.str.replace('TemporalFusionTransformer', 'TFT')
# dfs_rank = dfs_rank.round(2)
# dfs_rank.to_excel(f'./meta/rank_component_balance_{arg_component_balance}-add_new_dataset_{arg_add_new_dataset}-add_transformer_{arg_add_transformer}.xlsx', index=True)

In [89]:
dfs_rank[0]

TSGym             1.555556
PAttn             3.222222
MICN              4.888889
TimesNet          5.000000
DLinear           7.555556
TiDE              7.555556
Nonstationary     7.888889
LightTS           9.000000
Autoformer       10.222222
Pyraformer       11.888889
dtype: float64

In [80]:
model_names

{'Autoformer',
 'MICN',
 'Mamba',
 'Nonstationary',
 'PatchTST',
 'Pyraformer',
 'SegRNN',
 'TSGym',
 'TSMixer'}

In [79]:
dfs

Unnamed: 0_level_0,ETTh1,ETTh2,ETTm1,ETTm2,ili,weather,ECL,Exchange,traffic
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
SegRNN,0.4486,0.4228,0.4535,0.3873,4.5968,0.3564,0.2606,1.0145,0.8595
TSGym,0.495,0.4094,0.4347,0.3598,2.5674,0.3177,0.2464,0.9461,0.4565
PatchTST,0.5349,0.4396,0.461,0.4129,1.9889,0.3576,0.2458,0.9235,0.5399
Autoformer,0.5396,0.4838,0.5238,0.434,2.8931,0.4442,0.2516,1.1266,0.6552
Mamba,0.6043,0.5767,0.6371,0.5745,3.5959,0.3902,0.2399,1.7361,0.7206
TSMixer,0.7384,2.4228,0.6059,2.4974,6.4026,0.3165,0.2646,0.5829,0.6594
MICN,0.8083,0.8366,0.4953,0.5111,2.9754,0.3505,0.2134,0.7933,0.5738
Nonstationary,0.8093,0.6995,0.6166,0.6294,2.4043,0.43,0.2202,1.4276,0.6677
Pyraformer,0.9673,4.1336,0.8452,3.9508,4.5446,0.3878,1.3415,1.8688,1.8364


In [81]:
dfs_rank

Unnamed: 0,96,192,336,720
TSGym,1.56,1.67,1.89,2.22
PAttn,3.22,,,
MICN,4.89,5.89,5.78,4.11
TimesNet,5.0,6.33,5.78,
DLinear,7.56,,,
TiDE,7.56,,,
Nonstationary,7.89,,7.33,5.89
LightTS,9.0,,,
Autoformer,10.22,8.56,7.78,5.11
Pyraformer,11.89,11.11,11.11,8.44


meta-feature的差异性

In [65]:
# meta_feature_path = '/data/coding/chaochuan/TSGym/meta_learner_cc/meta_feature_copy/meta_feature'

In [66]:
# meta_features_dict = {}
# for file in os.listdir(meta_feature_path):
#     if file.endswith('.npz'):
#         meta_feature = np.load(os.path.join(meta_feature_path, file), allow_pickle=True)
#         meta_features_dict[file.replace('.npz', '').replace('meta_feature_', '')] = meta_feature['meta_feature']

In [67]:
# meta_features = np.stack(list(meta_features_dict.values()))
# meta_features = np.clip(meta_features, -1e4, 1e4)

# # z-score
# mu = np.nanmean(meta_features, axis=0, keepdims=True)
# std = np.nanstd(meta_features, axis=0, keepdims=True)

# meta_features = (meta_features - mu) / (std + 1e-6)
# meta_features = np.nan_to_num(meta_features, nan=0.0)

In [68]:
# fig = plt.figure(figsize=(20, 20))

# for seed in range(1, 5):
#     tsne = TSNE(n_components=2, random_state=seed, perplexity=10)
#     meta_features_tsne = tsne.fit_transform(meta_features) 
#     meta_features_dict_tsne = {k: v for k, v in zip(meta_features_dict.keys(), meta_features_tsne)}
#     fig.add_subplot(2, 2, seed)
#     for k, v in meta_features_dict_tsne.items():
#         plt.scatter(v[0], v[1])
#         plt.text(v[0], v[1], k, fontsize=12)
# fig.suptitle('TSNE', fontsize=16)
# plt.show()


In [69]:
# fig = plt.figure(figsize=(20, 20))

# pca = PCA(n_components=2, random_state=42)
# meta_features_pca = pca.fit_transform(meta_features) 
# meta_features_dict_pca = {k: v for k, v in zip(meta_features_dict.keys(), meta_features_pca)}
# for k, v in meta_features_dict_pca.items():
#     plt.scatter(v[0], v[1])
#     plt.text(v[0], v[1], k, fontsize=16)
# plt.title('PCA', fontsize=16)
# plt.show()
