In [57]:
import numpy as np
import pandas as pd
import os
import re
from collections import defaultdict

In [58]:
exp_list = [
    'etth1_96_m', 'etth1_192_m', 'etth1_336_m', 'etth1_720_m', 
    'etth2_96_m', 'etth2_192_m', 'etth2_336_m', 'etth2_720_m',
    'ettm1_96_m', 'ettm1_192_m', 'ettm1_336_m', 'ettm1_720_m',
    'ettm2_96_m', 'ettm2_192_m', 'ettm2_336_m', 'ettm2_720_m',
    'exchange_96_m', 'exchange_192_m', 'exchange_336_m', 'exchange_720_m',
    'ili_24_m', 'ili_36_m', 'ili_48_m', 'ili_60_m',
    'ecl_96_m', 'ecl_192_m', 'ecl_336_m', 'ecl_720_m',
    'weather_96_m', 'weather_192_m', 'weather_336_m', 'weather_720_m',

    # 'pems03_12_m', 'pems03_24_m', 'pems03_36_m', 'pems03_48_m',
    # 'pems04_12_m', 'pems04_24_m', 'pems04_36_m', 'pems04_48_m',
    # 'pems07_12_m', 'pems07_24_m', 'pems07_36_m', 'pems07_48_m',
    # 'pems08_12_m', 'pems08_24_m', 'pems08_36_m', 'pems08_48_m',

    # 'etth1_96_s', 'etth1_192_s', 'etth1_336_s', 'etth1_720_s', 
    # 'etth2_96_s', 'etth2_192_s', 'etth2_336_s', 'etth2_720_s',
    # 'ettm1_96_s', 'ettm1_192_s', 'ettm1_336_s', 'ettm1_720_s',
    # 'ettm2_96_s', 'ettm2_192_s', 'ettm2_336_s', 'ettm2_720_s',
    # 'exchange_96_s', 'exchange_192_s', 'exchange_336_s', 'exchange_720_s',
    # 'ili_24_s', 'ili_36_s', 'ili_48_s', 'ili_60_s',
    # 'ecl_96_s', 'ecl_192_s', 'ecl_336_s', 'ecl_720_s',
    # 'weather_96_s', 'weather_192_s', 'weather_336_s', 'weather_720_s',

    # 'pems03_12_s', 'pems03_24_s', 'pems03_36_s', 'pems03_48_s',
    # 'pems04_12_s', 'pems04_24_s', 'pems04_36_s', 'pems04_48_s',
    # 'pems07_12_s', 'pems07_24_s', 'pems07_36_s', 'pems07_48_s',
    # 'pems08_12_s', 'pems08_24_s', 'pems08_36_s', 'pems08_48_s',

]

In [59]:
list_a = ['dataset', 'horizon', 'feature']
list_b = ['in', 'sd', 'fusion', 'embed', 'ff']


map = {
    'un': 'in',
    'ud': 'sd',
    'fu': 'fusion',
    'eb': 'embed',
    'ff': 'ff'
}

def parse_experiments(folder_path, top_k=None, component_names=list_a, prefix_map=map):
    rows = []

    for filename in os.listdir(folder_path):
        if not filename.endswith('.txt'):
            continue

        exp_name = filename.replace('.txt', '')
        components = exp_name.split('_')

        if len(components) != len(component_names):
            print(f"Skipping file with unexpected name: {filename}")
            continue

        component_data = dict(zip(component_names, components))

        filepath = os.path.join(folder_path, filename)
        with open(filepath, 'r') as f:
            lines = f.readlines()[1:]  # skip header

            if top_k is not None:
                lines = lines[:top_k]

            for line in lines:
                parts = line.strip().split('|')
                if len(parts) < 2:
                    continue

                config_str = parts[0].strip()
                rank_match = re.search(r'Avg Rank:\s*([0-9.]+)', parts[1])
                if not rank_match:
                    continue

                try:
                    rank = float(rank_match.group(1))
                except ValueError:
                    continue

                config_items = config_str.split('_')
                config = {}
                for item in config_items:
                    for prefix, key in prefix_map.items():
                        if item.startswith(prefix):
                            config[key] = item[len(prefix):]
                            break

                row = {**component_data, **config, 'rank': rank}
                rows.append(row)

    df = pd.DataFrame(rows)
    return df


In [60]:
res = parse_experiments("./rank_book", top_k=30)

In [61]:
res

Unnamed: 0,dataset,horizon,feature,in,sd,fusion,embed,ff,rank
0,PEMS04,24,s,False,True,feature,patch,trans,1.0
1,PEMS04,24,s,False,False,feature,invert,trans,2.0
2,PEMS04,24,s,False,False,feature,token,rnn,3.5
3,PEMS04,24,s,False,True,feature,invert,trans,6.5
4,PEMS04,24,s,False,True,feature,none,trans,8.5
...,...,...,...,...,...,...,...,...,...
2995,exchange,336,m,True,True,feature,token,trans,26.0
2996,exchange,336,m,True,True,feature,invert,rnn,27.5
2997,exchange,336,m,True,False,feature,none,rnn,29.5
2998,exchange,336,m,True,True,feature,token,rnn,29.5


In [82]:
import pandas as pd

def add_hlr_column(df):
    # Define dataset-specific lookback rules
    def get_lookback(dataset):
        return 36 if dataset.lower() == 'ili' else 96

    # Define fallback mapping for non-numeric horizon values
    horizon_fallback_map = {
        'Hourly': 48*10,
        'Daily': 14*10,
        'Weekly': 13*10,
        'Monthly': 18*1.5,
        'Quarterly': 8*1.5,
        'Yearly': 6*1.5,
        # Add more mappings as needed
    }

    # Apply lookback per row
    df['lookback'] = df['dataset'].apply(get_lookback)

    # Try to convert horizon to float; invalid entries become NaN
    def compute_hlr(row):
        try:
            return float(row['horizon']) / row['lookback']
        except (ValueError, TypeError):
            # return horizon_fallback_map.get(str(row['horizon']).lower(), None)
            return horizon_fallback_map[str(row['horizon'])]

    df['hlr'] = df.apply(compute_hlr, axis=1)


    return df


In [83]:
res = add_hlr_column(res)

In [84]:
res[res['dataset']=='M4']

Unnamed: 0,dataset,horizon,feature,in,sd,fusion,embed,ff,rank,lookback,hlr,meta_key
480,M4,Weekly,s,True,True,feature,invert,trans,1.00,96,130.0,m4s
481,M4,Weekly,s,True,True,feature,patch,trans,2.00,96,130.0,m4s
482,M4,Weekly,s,True,False,feature,patch,trans,3.00,96,130.0,m4s
483,M4,Weekly,s,True,False,feature,patch,mlp,4.33,96,130.0,m4s
484,M4,Weekly,s,True,True,temporal,patch,mlp,5.67,96,130.0,m4s
...,...,...,...,...,...,...,...,...,...,...,...,...
2725,M4,Yearly,s,False,False,feature,invert,mlp,26.00,96,9.0,m4s
2726,M4,Yearly,s,True,False,temporal,none,mlp,26.00,96,9.0,m4s
2727,M4,Yearly,s,True,False,feature,invert,rnn,26.33,96,9.0,m4s
2728,M4,Yearly,s,False,False,feature,patch,mlp,28.00,96,9.0,m4s


In [85]:
path_map ={
    'etth1m':'../characteristics/ETT-small/mean_TFB_characteristics_ETTh1.csv',
    'etth2m':'../characteristics/ETT-small/mean_TFB_characteristics_ETTh2.csv',
    'ettm1m':'../characteristics/ETT-small/mean_TFB_characteristics_ETTm1.csv',
    'ettm2m':'../characteristics/ETT-small/mean_TFB_characteristics_ETTm2.csv',
    'exchangem':'../characteristics/exchange_rate/mean_TFB_characteristics_exchange_rate.csv',
    'ilim':'../characteristics/illness/mean_TFB_characteristics_national_illness.csv',
    'eclm':'../characteristics/electricity/mean_TFB_characteristics_electricity.csv',
    'weatherm':'../characteristics/weather/mean_TFB_characteristics_weather.csv',
    'PEMS03m':'../characteristics/pems/mean_TFB_characteristics_PEMS03.csv',
    'PEMS04m':'../characteristics/pems/mean_TFB_characteristics_PEMS04.csv',
    'PEMS07m':'../characteristics/pems/mean_TFB_characteristics_PEMS07.csv',
    'PEMS08m':'../characteristics/pems/mean_TFB_characteristics_PEMS08.csv',
}

In [86]:
def merge_metadata(df, path_map):
    meta_frames = []

    for key, path in path_map.items():
        if not os.path.exists(path):
            print(f"Warning: Path does not exist: {path}")
            continue

        try:
            meta = pd.read_csv(path)
        except Exception as e:
            print(f"Error reading {path}: {e}")
            continue

        # Add dataset-feature key to metadata
        meta['meta_key'] = key.lower()
        meta_frames.append(meta)

    # Combine all meta files
    meta_all = pd.concat(meta_frames, ignore_index=True)

    # Create key in experiment df
    df['meta_key'] = (df['dataset'] + df['feature']).str.lower()

    # Merge on key
    merged_df = df.merge(meta_all, on='meta_key', how='inner')

    # Drop the helper key
    merged_df = merged_df.drop(columns=['meta_key'])

    return merged_df

In [87]:
res_all = merge_metadata(res, path_map)

In [88]:
res_all

Unnamed: 0,dataset,horizon,feature,in,sd,fusion,embed,ff,rank,lookback,hlr,Correlation,Transition,Shifting,Seasonality,Trend,Stationarity,Short_term_jsd,Long_term_jsd
0,exchange,192,m,True,False,temporal,patch,mlp,1.0,96,2.0,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,0.107166,0.081097
1,exchange,192,m,True,False,temporal,none,mlp,3.0,96,2.0,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,0.107166,0.081097
2,exchange,192,m,True,False,feature,patch,mlp,3.0,96,2.0,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,0.107166,0.081097
3,exchange,192,m,True,True,temporal,patch,rnn,4.0,96,2.0,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,0.107166,0.081097
4,exchange,192,m,True,True,temporal,invert,mlp,4.0,96,2.0,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,0.107166,0.081097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,exchange,336,m,True,True,feature,token,trans,26.0,96,3.5,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,0.107166,0.081097
1436,exchange,336,m,True,True,feature,invert,rnn,27.5,96,3.5,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,0.107166,0.081097
1437,exchange,336,m,True,False,feature,none,rnn,29.5,96,3.5,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,0.107166,0.081097
1438,exchange,336,m,True,True,feature,token,rnn,29.5,96,3.5,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,0.107166,0.081097


In [89]:
feature_map ={
    'etth1': 7,
    'etth2': 7 ,
    'ettm1': 7,
    'ettm2': 7,
    'exchange': 8,
    'ili': 7,
    'ecl': 321,
    'weather': 21,
    'PEMS03': 358,
    'PEMS04': 307,
    'PEMS07': 883,
    'PEMS08': 170,
}

In [90]:
res_filter = res_all[res_all['feature'] == 'm']
res_filter['nfeature'] = res_filter['dataset'].map(feature_map)

In [91]:
def rearrange_columns(df):
    # Drop unwanted columns
    df = df.drop(columns=['horizon', 'Short_term_jsd', 'Long_term_jsd'], errors='ignore')

    # Define desired column order (case-insensitive for metadata)
    ordered_cols = [
        'dataset', 'nfeature', 'hlr',
        'Correlation', 'Transition', 'Shifting', 'Seasonality', 'Trend', 'Stationarity',
        'in', 'sd', 'fusion', 'embed', 'ff',
        'rank'
    ]

    # Ensure correct case in column names (if metadata cols are lowercase)
    col_map = {col.lower(): col for col in df.columns}
    final_cols = [col_map[col.lower()] for col in ordered_cols if col.lower() in col_map]

    # Reorder and return
    return df[final_cols]

In [92]:
res_final = rearrange_columns(res_filter)

In [93]:
res_final

Unnamed: 0,dataset,nfeature,hlr,Correlation,Transition,Shifting,Seasonality,Trend,Stationarity,in,sd,fusion,embed,ff,rank
0,exchange,8,2.0,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,True,False,temporal,patch,mlp,1.0
1,exchange,8,2.0,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,True,False,temporal,none,mlp,3.0
2,exchange,8,2.0,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,True,False,feature,patch,mlp,3.0
3,exchange,8,2.0,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,True,True,temporal,patch,rnn,4.0
4,exchange,8,2.0,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,True,True,temporal,invert,mlp,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,exchange,8,3.5,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,True,True,feature,token,trans,26.0
1436,exchange,8,3.5,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,True,True,feature,invert,rnn,27.5
1437,exchange,8,3.5,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,True,False,feature,none,rnn,29.5
1438,exchange,8,3.5,0.56548,0.062335,0.552311,0.540304,0.879264,0.359774,True,True,feature,token,rnn,29.5


In [94]:
def sort_by_dataset_hlr_rank(df):
    # Ensure correct types
    df['hlr'] = pd.to_numeric(df['hlr'], errors='coerce')
    df['rank'] = pd.to_numeric(df['rank'], errors='coerce')

    # Sort by dataset → hlr → rank
    df = df.sort_values(by=['dataset', 'hlr', 'rank'], ascending=[True, True, True])

    return df

In [95]:
res_final = sort_by_dataset_hlr_rank(res_final).reset_index(drop=True)
res_final = res_final.rename(columns={'in': 'norm'})

In [96]:
res_final

Unnamed: 0,dataset,nfeature,hlr,Correlation,Transition,Shifting,Seasonality,Trend,Stationarity,norm,sd,fusion,embed,ff,rank
0,PEMS03,358,0.125,0.800413,0.006313,0.071985,0.870186,0.095749,2.260248e-29,False,True,feature,invert,rnn,1.5
1,PEMS03,358,0.125,0.800413,0.006313,0.071985,0.870186,0.095749,2.260248e-29,True,True,feature,invert,trans,2.0
2,PEMS03,358,0.125,0.800413,0.006313,0.071985,0.870186,0.095749,2.260248e-29,False,False,feature,invert,rnn,2.5
3,PEMS03,358,0.125,0.800413,0.006313,0.071985,0.870186,0.095749,2.260248e-29,False,False,feature,none,rnn,4.0
4,PEMS03,358,0.125,0.800413,0.006313,0.071985,0.870186,0.095749,2.260248e-29,False,True,feature,none,rnn,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,weather,21,7.500,0.662655,0.054499,0.229994,0.652002,0.648986,1.036509e-08,True,False,temporal,invert,mlp,32.0
1436,weather,21,7.500,0.662655,0.054499,0.229994,0.652002,0.648986,1.036509e-08,True,False,feature,none,trans,33.0
1437,weather,21,7.500,0.662655,0.054499,0.229994,0.652002,0.648986,1.036509e-08,True,True,temporal,patch,mlp,33.0
1438,weather,21,7.500,0.662655,0.054499,0.229994,0.652002,0.648986,1.036509e-08,True,True,feature,token,rnn,33.0


In [97]:
res_final.to_csv('res_m_processed.csv', index=False)