
# TimeMixer Hyperparameter Sweep (Metrics-Only)

This notebook runs your `Exp_Long_Term_Forecast` experiments across a grid of hyperparameters, collects **MSE, MAE, msIC, msIR**, and saves results to a CSV.

**Privacy note:** After each test run, the notebook **deletes** `./test_results/<setting>/` and `./results/<setting>/` to avoid storing predictions or large artifacts. Only aggregated metrics and the final CSV are kept.

---


In [5]:

import os
import shutil
import time
import itertools
import pandas as pd
from argparse import Namespace
import torch

# Import your project modules (assumes you're running from repo root)
from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast

# ========== CONFIGS ==========
MODEL_NAME = "TimeMixer"
ROOT_PATH = "./dataset/FBD/"

# Recreate your shellscript's dataset-specific choices
DATASETS = {
    "OPTION": {"data_path": "OPTION.csv", "target": "t", "channel_num": 22},
    "BTCF":   {"data_path": "BTCF.csv",   "target": "taker_buy_volume_spot", "channel_num": 12},
    # Add GSMI here if needed:
    # "GSMI": {"data_path": "GSMI.csv", "target": "volume.SZSE", "channel_num": 100},
}

# Hyperparameter grid to sweep
grid = {
    "pred_len": [5, 21, 63, 126],
    "seq_len": [512],
    "e_layers": [2],
    "down_sampling_layers": [3],
    "down_sampling_window": [2],
    "learning_rate": [0.01],
    "d_model": [16],
    "d_ff": [32],
    "train_epochs": [10],
    "patience": [10],
    "batch_size": [128],
    "data": ["OPTION", "BTCF"],
}

# Output
RESULTS_DIR = "./sweeps"
os.makedirs(RESULTS_DIR, exist_ok=True)
CSV_PATH = os.path.join(RESULTS_DIR, f"sweep_results_{int(time.time())}.csv")
print("Will write CSV to:", CSV_PATH)

def combos(g):
    keys = list(g.keys())
    vals = [g[k] for k in keys]
    for prod in itertools.product(*vals):
        yield dict(zip(keys, prod))


Will write CSV to: ./sweeps/sweep_results_1759258031.csv


In [6]:

def build_args(params, ds_cfg):
    """Match run.py argument names as closely as possible."""
    args = Namespace(
        # basic
        task_name="long_term_forecast",
        is_training=1,
        model_id=f"{params['data']}_{params['seq_len']}_{params['pred_len']}" ,
        model=MODEL_NAME,

        # data loader
        data="custom",
        root_path=ROOT_PATH,
        data_path=ds_cfg["data_path"],
        features="M",
        target=ds_cfg["target"],
        freq="h",
        checkpoints="./checkpoints/",

        # forecasting task
        seq_len=params["seq_len"],
        label_len=0,
        pred_len=params["pred_len"],
        seasonal_patterns="Monthly",
        inverse=False,

        # model define (only the ones we care about for this sweep)
        enc_in=ds_cfg["channel_num"],
        dec_in=ds_cfg["channel_num"],
        c_out=ds_cfg["channel_num"],
        d_model=params["d_model"],
        n_heads=8,
        e_layers=params["e_layers"],
        d_layers=1,
        d_ff=params["d_ff"],
        moving_avg=25,
        factor=1,
        distil=True,
        dropout=0.1,
        embed="timeF",
        activation="gelu",
        channel_independence=1,
        decomp_method="moving_avg",
        use_norm=1,
        down_sampling_layers=params["down_sampling_layers"],
        down_sampling_window=params["down_sampling_window"],
        down_sampling_method="avg",
        seg_len=48,

        # optimization
        num_workers=10,
        itr=1,
        train_epochs=params["train_epochs"],
        batch_size=params["batch_size"],
        patience=params["patience"],
        learning_rate=params["learning_rate"],
        des="Exp",
        loss="MSE",
        lradj="type1",
        use_amp=False,

        # GPU
        use_gpu=torch.cuda.is_available(),
        gpu=0,
        gpu_type="cuda",
        use_multi_gpu=False,
        devices="0",

        # others
        p_hidden_dims=[128,128],
        p_hidden_layers=2,
        use_dtw=False,
        augmentation_ratio=0,
        seed=2,
        jitter=False, scaling=False, permutation=False, randompermutation=False,
        magwarp=False, timewarp=False, windowslice=False, windowwarp=False,
        rotation=False, spawner=False, dtwwarp=False, shapedtwwarp=False,
        wdba=False, discdtw=False, discsdtw=False,
        extra_tag="",
        patch_len=16,
    )
    # Device
    args.device = torch.device("cuda:0" if args.use_gpu else "cpu")
    return args

def safe_cleanup(setting):
    """Delete heavy artifacts saved by test() to keep only aggregated metrics."""
    # These are created in Exp_Long_Term_Forecast.test
    paths = [f"./test_results/{setting}", f"./results/{setting}"]
    for p in paths:
        if os.path.exists(p):
            shutil.rmtree(p, ignore_errors=True)


In [None]:

all_rows = []

for params in combos(grid):
    ds_name = params["data"]
    ds_cfg = DATASETS[ds_name]

    # Construct a 'setting' string consistent with your run.py scheme (shorter but unique)
    setting = f"{params['data']}_{MODEL_NAME}_ftM_sl{params['seq_len']}_pl{params['pred_len']}_dm{params['d_model']}_el{params['e_layers']}_df{params['d_ff']}_dsl{params['down_sampling_layers']}_dsw{params['down_sampling_window']}_lr{params['learning_rate']}_bs{params['batch_size']}"

    args = build_args(params, ds_cfg)
    exp = Exp_Long_Term_Forecast(args)

    print("\n=== RUNNING:", setting, "===\n")
    exp.train(setting)
    mse, mae, msIC, msIR = exp.test(setting)

    # Clean up heavy artifacts
    safe_cleanup(setting)

    # Record a single row of metrics + params
    row = {
        "setting": setting,
        "data": params["data"],
        "pred_len": params["pred_len"],
        "seq_len": params["seq_len"],
        "e_layers": params["e_layers"],
        "down_sampling_layers": params["down_sampling_layers"],
        "down_sampling_window": params["down_sampling_window"],
        "learning_rate": params["learning_rate"],
        "d_model": params["d_model"],
        "d_ff": params["d_ff"],
        "train_epochs": params["train_epochs"],
        "patience": params["patience"],
        "batch_size": params["batch_size"],
        "MSE": mse,
        "MAE": mae,
        "msIC": msIC,
        "msIR": msIR,
    }
    all_rows.append(row)

# Build DataFrame and save
df = pd.DataFrame(all_rows)
df.sort_values(by=["data","pred_len","d_model","e_layers"], inplace=True)
df.to_csv(CSV_PATH, index=False)
df


Use GPU: cuda:0

=== RUNNING: OPTION_TimeMixer_ftM_sl512_pl5_dm16_el2_df32_dsl3_dsw2_lr0.01_bs128 ===

train 25685
val 3740
test 7482


KeyboardInterrupt: 

: 


### Optional: Resume/Skip Logic
If you want to resume a long sweep and **skip** already-finished settings, wrap the loop with a check against an existing CSV. Happy to add that if you need it.
