In [1]:
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))

PROJECT_ROOT = Path.cwd().parent
DATA_DIR = PROJECT_ROOT / "data"
OUTPUTS_DIR = PROJECT_ROOT / "outputs"

from src.pipeline import Config, build_xy, FlagsDataset, CNN1D, LABELS, class_weights, train_one_epoch, eval_epoch

In [2]:
import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

In [3]:
def run_cnn_cv_for_config(X, y, cfg, dropout, weight_decay, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=cfg.seed)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    fold_metrics = []

    for fold, (tr_idx, va_idx) in enumerate(skf.split(X, y), 1):
        Xtr, Xva = X[tr_idx], X[va_idx]
        ytr, yva = y[tr_idx], y[va_idx]

        train_ds = FlagsDataset(Xtr, ytr)
        val_ds   = FlagsDataset(Xva, yva)
        train_loader = DataLoader(train_ds, batch_size=cfg.batch_size, shuffle=True)
        val_loader   = DataLoader(val_ds, batch_size=cfg.batch_size, shuffle=False)

        model = CNN1D(in_ch=X.shape[1], n_classes=len(LABELS), dropout=dropout).to(device)

        w = class_weights(ytr, n_classes=len(LABELS)).to(device)
        loss_fn = nn.CrossEntropyLoss(weight=w)
        opt = torch.optim.AdamW(model.parameters(), lr=cfg.lr, weight_decay=weight_decay)

        best_val = float("inf")
        best_state = None
        patience, bad = 10, 0

        for ep in range(1, cfg.epochs + 1):
            _ = train_one_epoch(model, train_loader, opt, loss_fn, device)
            va_loss, _, _, _ = eval_epoch(model, val_loader, loss_fn, device)

            if va_loss < best_val - 1e-4:
                best_val = va_loss
                best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
                bad = 0
            else:
                bad += 1
                if bad >= patience:
                    break

        if best_state is not None:
            model.load_state_dict(best_state)

        _, _, y_true, y_pred = eval_epoch(model, val_loader, loss_fn, device)

        acc = accuracy_score(y_true, y_pred)
        f1m = f1_score(y_true, y_pred, average="macro", zero_division=0)
        f1w = f1_score(y_true, y_pred, average="weighted", zero_division=0)
        fold_metrics.append((acc, f1m, f1w))

    fold_metrics = np.array(fold_metrics)
    return {
        "acc_mean": float(fold_metrics[:,0].mean()),
        "acc_std":  float(fold_metrics[:,0].std()),
        "macroF1_mean": float(fold_metrics[:,1].mean()),
        "macroF1_std":  float(fold_metrics[:,1].std()),
        "weightedF1_mean": float(fold_metrics[:,2].mean()),
        "weightedF1_std":  float(fold_metrics[:,2].std()),
    }

In [4]:
base_cfg = Config()

CSV_PATH = DATA_DIR / "EURUSD_15m_007.csv"
JSON_PATH = DATA_DIR / "EURUSD_15m_007_cimkezett.json"

pre_bars_list = [32, 64, 96]
dropout_list = [0.25, 0.5]
wd_list = [1e-4, 1e-3]

rows = []

for pre_bars in pre_bars_list:
    cfg = Config()
    cfg.pre_bars = pre_bars

    X, y = build_xy(CSV_PATH, JSON_PATH, L=cfg.L, pre_bars=cfg.pre_bars)

    for dropout in dropout_list:
        for wd in wd_list:
            print(f"\n=== pre_bars={pre_bars} dropout={dropout} weight_decay={wd} ===")
            res = run_cnn_cv_for_config(X, y, cfg, dropout=dropout, weight_decay=wd, n_splits=5)

            row = {
                "pre_bars": pre_bars,
                "dropout": dropout,
                "weight_decay": wd,
                **res
            }
            rows.append(row)

            print(f"macroF1 = {res['macroF1_mean']:.3f} ± {res['macroF1_std']:.3f} | acc = {res['acc_mean']:.3f} ± {res['acc_std']:.3f}")

df = pd.DataFrame(rows).sort_values(["macroF1_mean", "acc_mean"], ascending=False)
print("\n=== SUMMARY (sorted) ===")
print(df.to_string(index=False))

df.to_csv(OUTPUTS_DIR / "cnn_ablation_results.csv", index=False)
print("\nSaved: cnn_ablation_results.csv")


=== pre_bars=32 dropout=0.25 weight_decay=0.0001 ===
macroF1 = 0.227 ± 0.045 | acc = 0.250 ± 0.032

=== pre_bars=32 dropout=0.25 weight_decay=0.001 ===
macroF1 = 0.285 ± 0.122 | acc = 0.310 ± 0.128

=== pre_bars=32 dropout=0.5 weight_decay=0.0001 ===
macroF1 = 0.304 ± 0.058 | acc = 0.330 ± 0.081

=== pre_bars=32 dropout=0.5 weight_decay=0.001 ===
macroF1 = 0.338 ± 0.079 | acc = 0.350 ± 0.077

=== pre_bars=64 dropout=0.25 weight_decay=0.0001 ===
macroF1 = 0.321 ± 0.129 | acc = 0.340 ± 0.111

=== pre_bars=64 dropout=0.25 weight_decay=0.001 ===
macroF1 = 0.268 ± 0.067 | acc = 0.310 ± 0.086

=== pre_bars=64 dropout=0.5 weight_decay=0.0001 ===
macroF1 = 0.300 ± 0.088 | acc = 0.330 ± 0.093

=== pre_bars=64 dropout=0.5 weight_decay=0.001 ===
macroF1 = 0.330 ± 0.059 | acc = 0.380 ± 0.068

=== pre_bars=96 dropout=0.25 weight_decay=0.0001 ===
macroF1 = 0.338 ± 0.051 | acc = 0.370 ± 0.051

=== pre_bars=96 dropout=0.25 weight_decay=0.001 ===
macroF1 = 0.362 ± 0.050 | acc = 0.390 ± 0.058

=== pre_