In [1]:
import numpy as np
import pandas as pd
import os
import re
from collections import defaultdict

In [2]:
list_a = ['dataset', 'horizon', 'feature']
list_b = ['in', 'sd', 'fusion', 'embed', 'ff']


map = {
    'un': 'in',
    'ud': 'sd',
    'fu': 'fusion',
    'eb': 'embed',
    'ff': 'ff'
}

def parse_experiments(folder_path, top_k=None, component_names=list_a, prefix_map=map):
    rows = []

    for filename in os.listdir(folder_path):
        if not filename.endswith('.txt'):
            continue

        exp_name = filename.replace('.txt', '')
        components = exp_name.split('_')

        if len(components) != len(component_names):
            print(f"Skipping file with unexpected name: {filename}")
            continue

        component_data = dict(zip(component_names, components))

        filepath = os.path.join(folder_path, filename)
        with open(filepath, 'r') as f:
            lines = f.readlines()[1:]  # skip header

            if top_k is not None:
                lines = lines[:top_k]

            for line in lines:
                parts = line.strip().split('|')
                if len(parts) < 2:
                    continue

                config_str = parts[0].strip()
                rank_match = re.search(r'Avg Rank:\s*([0-9.]+)', parts[1])
                if not rank_match:
                    continue

                try:
                    rank = float(rank_match.group(1))
                except ValueError:
                    continue

                config_items = config_str.split('_')
                config = {}
                for item in config_items:
                    for prefix, key in prefix_map.items():
                        if item.startswith(prefix):
                            config[key] = item[len(prefix):]
                            break

                row = {**component_data, **config, 'rank': rank}
                rows.append(row)

    df = pd.DataFrame(rows)
    return df


In [3]:
res = parse_experiments("./rank_book", top_k=30)

In [4]:
# res[res['dataset']=='M4']['dataset'] = res[res['dataset']=='M4']['dataset'] + res[res['dataset']=='M4']['horizon']
mask = res['dataset'] == 'M4'
res.loc[mask, 'dataset'] = res.loc[mask, 'dataset'] + res.loc[mask, 'horizon'].astype(str)

In [5]:
res[res['dataset']=='M4Daily']

Unnamed: 0,dataset,horizon,feature,in,sd,fusion,embed,ff,rank
1500,M4Daily,Daily,s,False,True,temporal,none,mlp,1.0
1501,M4Daily,Daily,s,False,True,temporal,freq,mlp,2.0
1502,M4Daily,Daily,s,True,True,feature,patch,rnn,3.33
1503,M4Daily,Daily,s,True,False,feature,patch,rnn,3.67
1504,M4Daily,Daily,s,True,True,feature,token,rnn,5.0
1505,M4Daily,Daily,s,True,True,temporal,none,mlp,6.33
1506,M4Daily,Daily,s,True,True,temporal,invert,mlp,8.0
1507,M4Daily,Daily,s,True,False,temporal,patch,mlp,8.33
1508,M4Daily,Daily,s,True,False,temporal,freq,mlp,9.67
1509,M4Daily,Daily,s,True,True,feature,invert,rnn,10.67


In [6]:
import pandas as pd

def add_hlr_column(df):
    # Define dataset-specific lookback rules
    def get_lookback(dataset):
        return 36 if dataset.lower() == 'ili' else 96

    # Define fallback mapping for non-numeric horizon values
    horizon_fallback_map = {
        'Hourly': 1.0/10.0,
        'Daily': 1.0/10.0,
        'Weekly': 1.0/10.0,
        'Monthly': 1.0/1.5,
        'Quarterly': 1.0/1.5,
        'Yearly': 1.0/1.5,
        # Add more mappings as needed
    }

    # Apply lookback per row
    df['lookback'] = df['dataset'].apply(get_lookback)

    # Try to convert horizon to float; invalid entries become NaN
    def compute_hlr(row):
        try:
            return float(row['horizon']) / row['lookback']
        except (ValueError, TypeError):
            # return horizon_fallback_map.get(str(row['horizon']).lower(), None)
            return horizon_fallback_map[str(row['horizon'])]

    df['hlr'] = df.apply(compute_hlr, axis=1)


    return df


In [7]:
res = add_hlr_column(res)

In [8]:
path_map ={
    'etth1s':'../characteristics/ETT-small/TFB_characteristics_ETTh1.csv',
    'etth2s':'../characteristics/ETT-small/TFB_characteristics_ETTh2.csv',
    'ettm1s':'../characteristics/ETT-small/TFB_characteristics_ETTm1.csv',
    'ettm2s':'../characteristics/ETT-small/TFB_characteristics_ETTm2.csv',
    'exchanges':'../characteristics/exchange_rate/TFB_characteristics_exchange_rate.csv',
    'ilis':'../characteristics/illness/TFB_characteristics_national_illness.csv',
    'ecls':'../characteristics/electricity/TFB_characteristics_electricity.csv',
    'weathers':'../characteristics/weather/TFB_characteristics_weather.csv',
    'PEMS03s':'../characteristics/pems/TFB_characteristics_PEMS03.csv',
    'PEMS04s':'../characteristics/pems/TFB_characteristics_PEMS04.csv',
    'PEMS07s':'../characteristics/pems/TFB_characteristics_PEMS07.csv',
    'PEMS08s':'../characteristics/pems/TFB_characteristics_PEMS08.csv',
    'M4Dailys': '../characteristics/m4/mean_TFB_characteristics_daily.csv',
    'M4Hourlys': '../characteristics/m4/mean_TFB_characteristics_hourly.csv',
    'M4Weeklys': '../characteristics/m4/mean_TFB_characteristics_weekly.csv',
    'M4Monthlys': '../characteristics/m4/mean_TFB_characteristics_monthly.csv',
    'M4Quarterlys': '../characteristics/m4/mean_TFB_characteristics_quarterly.csv',
    'M4Yearlys': '../characteristics/m4/mean_TFB_characteristics_yearly.csv',
}

In [9]:
def merge_metadata(df, path_map):
    meta_frames = []

    for key, path in path_map.items():
        if not os.path.exists(path):
            print(f"Warning: Path does not exist: {path}")
            continue

        try:
            meta = pd.read_csv(path).tail(1)
        except Exception as e:
            print(f"Error reading {path}: {e}")
            continue

        # Add dataset-feature key to metadata
        meta['meta_key'] = key.lower()
        meta_frames.append(meta)

    # Combine all meta files
    meta_all = pd.concat(meta_frames, ignore_index=True)

    # Create key in experiment df
    df['meta_key'] = (df['dataset'] + df['feature']).str.lower()

    # Merge on key
    merged_df = df.merge(meta_all, on='meta_key', how='inner')

    # Drop the helper key
    merged_df = merged_df.drop(columns=['meta_key'])

    return merged_df

In [10]:
res_all = merge_metadata(res, path_map)

In [11]:
res_all

Unnamed: 0,dataset,horizon,feature,in,sd,fusion,embed,ff,rank,lookback,hlr,Correlation,Transition,Shifting,Seasonality,Trend,Stationarity,Short_term_jsd,Long_term_jsd
0,PEMS04,24,s,False,True,feature,patch,trans,1.0,96,0.25,,0.007731,0.136123,0.884784,0.037360,1.381057e-29,0.049628,0.066581
1,PEMS04,24,s,False,False,feature,invert,trans,2.0,96,0.25,,0.007731,0.136123,0.884784,0.037360,1.381057e-29,0.049628,0.066581
2,PEMS04,24,s,False,False,feature,token,rnn,3.5,96,0.25,,0.007731,0.136123,0.884784,0.037360,1.381057e-29,0.049628,0.066581
3,PEMS04,24,s,False,True,feature,invert,trans,6.5,96,0.25,,0.007731,0.136123,0.884784,0.037360,1.381057e-29,0.049628,0.066581
4,PEMS04,24,s,False,True,feature,none,trans,8.5,96,0.25,,0.007731,0.136123,0.884784,0.037360,1.381057e-29,0.049628,0.066581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1615,ettm1,720,s,True,True,temporal,patch,trans,25.0,96,7.50,,0.074074,0.905848,0.596210,0.988068,6.808664e-04,0.203616,0.111870
1616,ettm1,720,s,True,True,feature,token,trans,26.5,96,7.50,,0.074074,0.905848,0.596210,0.988068,6.808664e-04,0.203616,0.111870
1617,ettm1,720,s,True,False,feature,none,rnn,27.0,96,7.50,,0.074074,0.905848,0.596210,0.988068,6.808664e-04,0.203616,0.111870
1618,ettm1,720,s,True,True,feature,freq,mlp,27.0,96,7.50,,0.074074,0.905848,0.596210,0.988068,6.808664e-04,0.203616,0.111870


In [12]:
res_all= res_all.drop('Correlation', axis=1)

In [13]:
res_filter = res_all[res_all['feature'] == 's']

In [14]:
def rearrange_columns(df):
    # Drop unwanted columns
    df = df.drop(columns=['horizon', 'Short_term_jsd', 'Long_term_jsd'], errors='ignore')

    # Define desired column order (case-insensitive for metadata)
    ordered_cols = [
        'dataset', 'nfeature', 'hlr',
        'Correlation', 'Transition', 'Shifting', 'Seasonality', 'Trend', 'Stationarity',
        'in', 'sd', 'fusion', 'embed', 'ff',
        'rank'
    ]

    # Ensure correct case in column names (if metadata cols are lowercase)
    col_map = {col.lower(): col for col in df.columns}
    final_cols = [col_map[col.lower()] for col in ordered_cols if col.lower() in col_map]

    # Reorder and return
    return df[final_cols]

In [15]:
res_final = rearrange_columns(res_filter)

In [16]:
res_final

Unnamed: 0,dataset,hlr,Transition,Shifting,Seasonality,Trend,Stationarity,in,sd,fusion,embed,ff,rank
0,PEMS04,0.25,0.007731,0.136123,0.884784,0.037360,1.381057e-29,False,True,feature,patch,trans,1.0
1,PEMS04,0.25,0.007731,0.136123,0.884784,0.037360,1.381057e-29,False,False,feature,invert,trans,2.0
2,PEMS04,0.25,0.007731,0.136123,0.884784,0.037360,1.381057e-29,False,False,feature,token,rnn,3.5
3,PEMS04,0.25,0.007731,0.136123,0.884784,0.037360,1.381057e-29,False,True,feature,invert,trans,6.5
4,PEMS04,0.25,0.007731,0.136123,0.884784,0.037360,1.381057e-29,False,True,feature,none,trans,8.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1615,ettm1,7.50,0.074074,0.905848,0.596210,0.988068,6.808664e-04,True,True,temporal,patch,trans,25.0
1616,ettm1,7.50,0.074074,0.905848,0.596210,0.988068,6.808664e-04,True,True,feature,token,trans,26.5
1617,ettm1,7.50,0.074074,0.905848,0.596210,0.988068,6.808664e-04,True,False,feature,none,rnn,27.0
1618,ettm1,7.50,0.074074,0.905848,0.596210,0.988068,6.808664e-04,True,True,feature,freq,mlp,27.0


In [17]:
def sort_by_dataset_hlr_rank(df):
    # Ensure correct types
    df['hlr'] = pd.to_numeric(df['hlr'], errors='coerce')
    df['rank'] = pd.to_numeric(df['rank'], errors='coerce')

    # Sort by dataset → hlr → rank
    df = df.sort_values(by=['dataset', 'hlr', 'rank'], ascending=[True, True, True])

    return df

In [18]:
res_final = sort_by_dataset_hlr_rank(res_final).reset_index(drop=True)
res_final = res_final.rename(columns={'in': 'norm'})

In [19]:
res_final

Unnamed: 0,dataset,hlr,Transition,Shifting,Seasonality,Trend,Stationarity,norm,sd,fusion,embed,ff,rank
0,M4Daily,0.1,0.093569,0.513197,0.659342,0.892813,0.499141,False,True,temporal,none,mlp,1.00
1,M4Daily,0.1,0.093569,0.513197,0.659342,0.892813,0.499141,False,True,temporal,freq,mlp,2.00
2,M4Daily,0.1,0.093569,0.513197,0.659342,0.892813,0.499141,True,True,feature,patch,rnn,3.33
3,M4Daily,0.1,0.093569,0.513197,0.659342,0.892813,0.499141,True,False,feature,patch,rnn,3.67
4,M4Daily,0.1,0.093569,0.513197,0.659342,0.892813,0.499141,True,True,feature,token,rnn,5.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1615,weather,7.5,0.006987,0.263141,0.372548,0.867136,0.000000,True,True,feature,freq,mlp,25.50
1616,weather,7.5,0.006987,0.263141,0.372548,0.867136,0.000000,True,True,temporal,none,rnn,27.00
1617,weather,7.5,0.006987,0.263141,0.372548,0.867136,0.000000,True,True,feature,none,trans,28.50
1618,weather,7.5,0.006987,0.263141,0.372548,0.867136,0.000000,True,True,feature,invert,trans,29.00


In [20]:
res_final.to_csv('res_s_processed.csv', index=False)