In [1]:
import importlib
from ucr_benchmark_template.modeling import train_noprop

importlib.reload(train_noprop)

from ucr_benchmark_template.modeling.train_noprop import (
    load_dataset, make_model, train, predict, evaluate, save_model
)

[32m2025-10-09 07:32:09.264[0m | [1mINFO    [0m | [36mucr_benchmark_template.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: /home/jovyan/UCRBenchmarkTemplate[0m


In [10]:
import optuna
import numpy as np
import pandas as pd
import os
import logging
import traceback

LOG_DIR = "."
LOG_FILE = os.path.join(LOG_DIR, "optuna_run.log")

# Ensure the folder exists
os.makedirs(LOG_DIR, exist_ok=True)

# Clear any existing handlers (important if Optuna or Jupyter adds one)
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

# Now configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler(LOG_FILE, mode='a', encoding='utf-8'),
        logging.StreamHandler()  # optional: also show logs in console
    ]
)

logging.info("=== Logging system initialized ===")

def objective(trial):
    try:
        datasets = [
            "WordSynonyms", "ShapesAll", "Phoneme", "PigAirwayPressure",
            "PigArtPressure", "PigCVP", "NonInvasiveFetalECGThorax1",
            "NonInvasiveFetalECGThorax2", "FiftyWords", "Crop", "Adiac"
        ]
    
        # --- Hyperparameters ---
        T = trial.suggest_int("T", 2, 15)
        emb_d = trial.suggest_categorical("embedding_dim", [0, 16, 32, 64, 128])
        eta = trial.suggest_float("eta", 0.1, 1, step=0.1)
        lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
        k_size = trial.suggest_categorical("k_size", [3, 5, 7])
        dropout = trial.suggest_float("dropout", 0.1, 0.4, step=0.1)
        
        n_blocks = trial.suggest_int("n_blocks", 1, 3)
        base_ch = trial.suggest_categorical("base_channels", [8, 16, 32, 64])
        channels = [base_ch * (2 ** i) for i in range(n_blocks)]
        
        n_layers = trial.suggest_int("n_layers", 1, 3)
        fc_layers = n_layers * [256]

        n_merged = trial.suggest_int("n_merged", 1, 3)
        base = trial.suggest_categorical("base_merged", [512, 256, 128])
        fc_merged = [max(base // (2 ** i), 32) for i in range(n_merged)]

        hp_str = (
            f"T={T}, emb={emb_d}, eta={eta}, lr={lr:.1e}, k={k_size}, dpout={dropout}, "
            f"ch={channels}, fc={fc_layers}, merg={fc_merged}"
        )

        logging.info(f"=== Starting Trial {trial.number} ===")
        logging.info(f"Hyperparameters: {hp_str}")
        
        accuracies = []
        for dataset in datasets:
            try:
                logging.info(f"Starting dataset: {dataset}")
                trainloader, testloader = load_dataset(dataset, batch_size=32)
                model = make_model(dataset, emb_d, T, k_size, dropout, channels, fc_layers, fc_merged)
                model, _ = train(model, trainloader, 1, T, eta, lr, 1e-5)
                y_true, y_pred = predict(model, testloader)
                acc = evaluate(y_true, y_pred)["accuracy"]

                accuracies.append(acc)
                logging.info(f"Finished {dataset} → Accuracy: {acc:.4f}")

            except Exception as e:
                error_msg = f"Error on dataset {dataset}: {e}"
                logging.error(error_msg)
                logging.error(traceback.format_exc())
                accuracies.append(np.nan)
                continue
        
        avg_acc = np.nanmean(accuracies)
        logging.info(f"Trial {trial.number} completed with average accuracy: {avg_acc:.4f}")
    
        # --- Save results to CSV ---
        csv_path = "optuna_results_overnight.csv"
    
        if not os.path.exists(csv_path):
            df = pd.DataFrame({"Dataset": datasets + ["AVERAGE"]})
        else:
            df = pd.read_csv(csv_path)
    
        trial_name = f"Trial_{trial.number+1}"
        run_data = accuracies + [avg_acc]
        df[trial_name] = run_data

        # Explicitly create string column for hyperparameters
        col_name = f"{trial_name}_params"
        if col_name not in df.columns:
            df[col_name] = pd.Series(dtype="object")
        df.loc[df["Dataset"] == "AVERAGE", col_name] = hp_str

        df.to_csv(csv_path, index=False)
    
        return avg_acc

    except Exception as e:
        logging.error(f"Trial {trial.number} failed with error: {e}")
        logging.error(traceback.format_exc())
        return np.nan

2025-10-09 08:09:09,484 [INFO] === Logging system initialized ===


In [11]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2025-10-09 08:09:14,795] A new study created in memory with name: no-name-4fc4cae2-444e-4d18-a3ae-3b9d38db89bd
2025-10-09 08:09:14,801 [INFO] === Starting Trial 0 ===
2025-10-09 08:09:14,803 [INFO] Hyperparameters: T=8, emb=128, eta=0.6, lr=2.4e-03, k=3, dpout=0.4, ch=[32], fc=[256, 256], merg=[512, 256, 128]
2025-10-09 08:09:14,803 [INFO] Starting dataset: WordSynonyms
Training Epoch 1 Block 1: 100%|██████████| 8/8 [00:00<00:00, 70.69it/s, loss=0.0872, lr=0.00236]
Training Epoch 1 Block 2: 100%|██████████| 8/8 [00:00<00:00, 76.74it/s, loss=0.287, lr=0.00236]
Training Epoch 1 Block 3: 100%|██████████| 8/8 [00:00<00:00, 77.27it/s, loss=0.577, lr=0.00236]
Training Epoch 1 Block 4: 100%|██████████| 8/8 [00:00<00:00, 76.17it/s, loss=1.11, lr=0.00236]
Training Epoch 1 Block 5: 100%|██████████| 8/8 [00:00<00:00, 70.42it/s, loss=2.31, lr=0.00236]
Training Epoch 1 Block 6: 100%|██████████| 8/8 [00:00<00:00, 74.00it/s, loss=6.23, lr=0.00236]
Training Epoch 1 Block 7: 100%|██████████| 8/8 [00

KeyboardInterrupt: 