### 0. Setting

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, root_mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import ParameterGrid

from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
import torch

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import random
import pywt
import copy
import json

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [None]:
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [None]:
df = pd.read_csv('/content/drive/MyDrive/WaveletFrequencyDecomposed_CNN_Transformer/data/train_data.csv')
df.set_index('timestamp', inplace=True)

### 1. Data Preprocessing

##### 1.1 Wavelet Frequency Decompose Correlation

In [None]:
def wavelet_decomposed_corr(df, input_window_width=30, label_window_width=10, wavelet='db4', level=3):
    X, Y = [], []
    data = df.values

    for t in range(input_window_width, len(df)-label_window_width+1):
        window_data = data[t-input_window_width : t]

        low_band, mid_band, high_band = [], [], []
        for i in range(window_data.shape[1]):
            comod_per_window = window_data[:, i]
            coeffs = pywt.wavedec(comod_per_window, wavelet, level=level)
            cA3, cD3, _, cD1 = coeffs
            low_band.append(cA3)
            mid_band.append(cD3)
            high_band.append(cD1)

        corr_low = np.corrcoef(low_band)
        corr_mid = np.corrcoef(mid_band)
        corr_high = np.corrcoef(high_band)

        corr_tensor = torch.tensor(np.stack([corr_low, corr_mid, corr_high]), dtype=torch.float32) #3x8x8
        X.append(corr_tensor)

        label_window = data[t : t+label_window_width]
        corr_next = np.corrcoef(label_window.T)
        Y.append(torch.tensor(corr_next, dtype=torch.float32))


    return torch.stack(X), torch.stack(Y)

In [None]:
X_tensor, Y_tensor = wavelet_decomposed_corr(df)

total_size = len(X_tensor)
train_size = int(total_size * 0.8)
val_size   = int(total_size * 0.1)

test_size  = total_size - train_size - val_size

X_train = X_tensor[:train_size]
Y_train = Y_tensor[:train_size]

X_val = X_tensor[train_size:train_size + val_size]
Y_val = Y_tensor[train_size:train_size + val_size]

X_test = X_tensor[train_size + val_size:]
Y_test = Y_tensor[train_size + val_size:]

train_ds = TensorDataset(X_train, Y_train)
val_ds   = TensorDataset(X_val, Y_val)
test_ds  = TensorDataset(X_test, Y_test)



### 2. Modeling

##### 2.1 Model Structure Setting

In [None]:
class CorrPredictorCNN(nn.Module):
    def __init__(
            self,
            num_channels=3,
            conv_channels=32,
            kernel_size = 3,
            d_model=128
            ):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(num_channels, d_model, kernel_size, padding=kernel_size // 2),
            nn.ReLU(),
            nn.BatchNorm2d(d_model),

            nn.Conv2d(d_model, 64, kernel_size, padding=kernel_size // 2),
            nn.ReLU(),
            nn.BatchNorm2d(64),

            nn.Conv2d(64, 1, kernel_size, padding=kernel_size // 2),
        )

    def forward(self, x):
        x = self.cnn(x)
        x = x.squeeze(1)

        return x

##### 2.2 Training

In [None]:
def train_model(model, train_loader, val_loader, optimizer_name='Adam', lr=5e-4, epochs=70, device='cuda'):
    model.to(device)

    # Optimizer ÏÑ†ÌÉù
    if optimizer_name == 'Adam':
        opt = torch.optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == 'RMSprop':
        opt = torch.optim.RMSprop(model.parameters(), lr=lr)
    elif optimizer_name == 'AdamW':
        opt = torch.optim.AdamW(model.parameters(), lr=lr)
    else:
        raise ValueError(f"Unsupported optimizer: {optimizer_name}")

    # Loss & LR Scheduler
    criterion = nn.MSELoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        opt, mode='min', factor=0.5, patience=5, verbose=True
    )

    best_val_loss = float('inf')

    for epoch in range(epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()

            pred = model(xb)
            loss = criterion(pred, yb)
            loss.backward()
            opt.step()

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                val_loss += criterion(model(xb), yb).item()
        val_loss /= len(val_loader)

        # Ïä§ÏºÄÏ§ÑÎü¨ Ï†ÅÏö©
        scheduler.step(val_loss)

    return val_loss

In [None]:
model_save_path = '/content/drive/MyDrive/WaveletFrequencyDecomposed_CNN_Transformer/best_model'

In [None]:
# Grid Search
param_grid = {
    'kernel_size': [3, 5],
    'd_model': [32, 64, 128],
    'lr': [0.001, 5e-4],
    'optimizer': ['Adam', 'RMSprop', 'AdamW'],
    'batch_size': [64, 128, 256, 512]
}

best_loss = float('inf')
best_config = None
best_model = None

for config in tqdm(ParameterGrid(param_grid)):
    train_loader = DataLoader(train_ds, batch_size=config['batch_size'], shuffle=False)
    val_loader   = DataLoader(val_ds, batch_size=config['batch_size'], shuffle=False)

    model = CorrPredictorCNN(
        kernel_size=config['kernel_size'],
        d_model=config['d_model'],
    )
    loss = train_model(model, train_loader, val_loader,
                       optimizer_name=config['optimizer'],
                       lr=config['lr'], device=device)

    print(f"Config: {config}, Loss: {loss:.4f}")
    if loss < best_loss:
        best_loss = loss
        best_config = config

        torch.save(model.state_dict(), f"{model_save_path}/best_model_window10per30_WC_weights.pt")
        with open(f'{model_save_path}/best_model_window10per30_WC_config.json', 'w') as f:
            json.dump(best_config, f, indent=4)

# ÏµúÏ¢Ö Í≤∞Í≥º
print(f"\n‚úÖ Best Config: {best_config}")
print(f"‚úÖ Best Loss: {best_loss:.4f}")

  1%|          | 1/144 [00:05<13:02,  5.47s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0698


  1%|‚ñè         | 2/144 [00:10<12:48,  5.41s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0758


  2%|‚ñè         | 3/144 [00:16<12:47,  5.44s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0735


  3%|‚ñé         | 4/144 [00:21<12:43,  5.45s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0739


  3%|‚ñé         | 5/144 [00:27<12:31,  5.41s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0745


  4%|‚ñç         | 6/144 [00:32<12:33,  5.46s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0722


  5%|‚ñç         | 7/144 [00:38<12:39,  5.54s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0746


  6%|‚ñå         | 8/144 [00:43<12:30,  5.52s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0947


  6%|‚ñã         | 9/144 [00:49<12:30,  5.56s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0739


  7%|‚ñã         | 10/144 [00:55<12:26,  5.57s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0726


  8%|‚ñä         | 11/144 [01:00<12:17,  5.55s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0848


  8%|‚ñä         | 12/144 [01:06<12:18,  5.60s/it]

Config: {'batch_size': 64, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0741


  9%|‚ñâ         | 13/144 [01:11<12:13,  5.60s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0717


 10%|‚ñâ         | 14/144 [01:17<11:58,  5.53s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0764


 10%|‚ñà         | 15/144 [01:22<11:51,  5.52s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0738


 11%|‚ñà         | 16/144 [01:28<11:43,  5.50s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0731


 12%|‚ñà‚ñè        | 17/144 [01:33<11:32,  5.45s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0746


 12%|‚ñà‚ñé        | 18/144 [01:39<11:33,  5.50s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0734


 13%|‚ñà‚ñé        | 19/144 [01:44<11:30,  5.52s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0778


 14%|‚ñà‚ñç        | 20/144 [01:50<11:24,  5.52s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0839


 15%|‚ñà‚ñç        | 21/144 [01:55<11:22,  5.55s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0743


 15%|‚ñà‚ñå        | 22/144 [02:01<11:19,  5.57s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0753


 16%|‚ñà‚ñå        | 23/144 [02:06<11:10,  5.54s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0796


 17%|‚ñà‚ñã        | 24/144 [02:12<11:09,  5.58s/it]

Config: {'batch_size': 64, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0748


 17%|‚ñà‚ñã        | 25/144 [02:18<11:03,  5.58s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0744


 18%|‚ñà‚ñä        | 26/144 [02:23<10:52,  5.53s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0809


 19%|‚ñà‚ñâ        | 27/144 [02:29<10:47,  5.54s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0745


 19%|‚ñà‚ñâ        | 28/144 [02:34<10:43,  5.55s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0752


 20%|‚ñà‚ñà        | 29/144 [02:40<10:32,  5.50s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0789


 21%|‚ñà‚ñà        | 30/144 [02:45<10:28,  5.52s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0730


 22%|‚ñà‚ñà‚ñè       | 31/144 [02:51<10:30,  5.58s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0823


 22%|‚ñà‚ñà‚ñè       | 32/144 [02:56<10:20,  5.54s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0824


 23%|‚ñà‚ñà‚ñé       | 33/144 [03:02<10:23,  5.62s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0782


 24%|‚ñà‚ñà‚ñé       | 34/144 [03:08<10:19,  5.64s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0771


 24%|‚ñà‚ñà‚ñç       | 35/144 [03:13<10:12,  5.62s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0861


 25%|‚ñà‚ñà‚ñå       | 36/144 [03:19<10:07,  5.63s/it]

Config: {'batch_size': 64, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0768


 26%|‚ñà‚ñà‚ñå       | 37/144 [03:22<08:44,  4.90s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0723


 26%|‚ñà‚ñà‚ñã       | 38/144 [03:25<07:42,  4.37s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0779


 27%|‚ñà‚ñà‚ñã       | 39/144 [03:29<07:02,  4.02s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0725


 28%|‚ñà‚ñà‚ñä       | 40/144 [03:32<06:31,  3.76s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0718


 28%|‚ñà‚ñà‚ñä       | 41/144 [03:35<06:07,  3.57s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0734


 29%|‚ñà‚ñà‚ñâ       | 42/144 [03:38<05:53,  3.46s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0732


 30%|‚ñà‚ñà‚ñâ       | 43/144 [03:41<05:44,  3.41s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0714


 31%|‚ñà‚ñà‚ñà       | 44/144 [03:45<05:33,  3.34s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0752


 31%|‚ñà‚ñà‚ñà‚ñè      | 45/144 [03:48<05:29,  3.33s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0720


 32%|‚ñà‚ñà‚ñà‚ñè      | 46/144 [03:51<05:24,  3.32s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0736


 33%|‚ñà‚ñà‚ñà‚ñé      | 47/144 [03:54<05:19,  3.30s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0740


 33%|‚ñà‚ñà‚ñà‚ñé      | 48/144 [03:58<05:18,  3.32s/it]

Config: {'batch_size': 128, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0713


 34%|‚ñà‚ñà‚ñà‚ñç      | 49/144 [04:01<05:13,  3.30s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0716


 35%|‚ñà‚ñà‚ñà‚ñç      | 50/144 [04:04<05:06,  3.26s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0718


 35%|‚ñà‚ñà‚ñà‚ñå      | 51/144 [04:08<05:03,  3.26s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0746


 36%|‚ñà‚ñà‚ñà‚ñå      | 52/144 [04:11<04:59,  3.25s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0754


 37%|‚ñà‚ñà‚ñà‚ñã      | 53/144 [04:14<04:54,  3.23s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0725


 38%|‚ñà‚ñà‚ñà‚ñä      | 54/144 [04:17<04:50,  3.23s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0749


 38%|‚ñà‚ñà‚ñà‚ñä      | 55/144 [04:20<04:49,  3.25s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0720


 39%|‚ñà‚ñà‚ñà‚ñâ      | 56/144 [04:24<04:46,  3.26s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0760


 40%|‚ñà‚ñà‚ñà‚ñâ      | 57/144 [04:27<04:44,  3.27s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0757


 40%|‚ñà‚ñà‚ñà‚ñà      | 58/144 [04:30<04:41,  3.27s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0727


 41%|‚ñà‚ñà‚ñà‚ñà      | 59/144 [04:34<04:37,  3.27s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0742


 42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 60/144 [04:37<04:35,  3.28s/it]

Config: {'batch_size': 128, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0730


 42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 61/144 [04:40<04:31,  3.27s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0724


 43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 62/144 [04:43<04:26,  3.25s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0715


 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 63/144 [04:47<04:25,  3.28s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0744


 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 64/144 [04:50<04:22,  3.28s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0757


 45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 65/144 [04:53<04:16,  3.25s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0724


 46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 66/144 [04:56<04:13,  3.25s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0749


 47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 67/144 [05:01<04:33,  3.56s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0734


 47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 68/144 [05:05<04:45,  3.75s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0719


 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 69/144 [05:09<04:52,  3.90s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0703


 49%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 70/144 [05:13<04:56,  4.01s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0737


 49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 71/144 [05:18<04:57,  4.07s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0744


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 72/144 [05:22<04:57,  4.13s/it]

Config: {'batch_size': 128, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0714


 51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 73/144 [05:24<04:09,  3.52s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0721


 51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 74/144 [05:26<03:38,  3.12s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0734


 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 75/144 [05:28<03:13,  2.81s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0734


 53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 76/144 [05:30<02:55,  2.59s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0709


 53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 77/144 [05:33<02:46,  2.48s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0712


 54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 78/144 [05:35<02:36,  2.37s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0735


 55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 79/144 [05:37<02:30,  2.32s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0707


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 80/144 [05:39<02:27,  2.31s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0711


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 81/144 [05:41<02:22,  2.27s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0703


 57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 82/144 [05:44<02:21,  2.28s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0708


 58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 83/144 [05:46<02:16,  2.24s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0703


 58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 84/144 [05:48<02:13,  2.23s/it]

Config: {'batch_size': 256, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0715


 59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 85/144 [05:50<02:11,  2.24s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0739


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 86/144 [05:52<02:06,  2.19s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0716


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 87/144 [05:54<02:03,  2.17s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0718


 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 88/144 [05:57<02:03,  2.21s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0731


 62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 89/144 [05:59<01:59,  2.18s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0728


 62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 90/144 [06:01<01:56,  2.16s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0733


 63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 91/144 [06:04<02:05,  2.36s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0691


 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 92/144 [06:06<02:07,  2.45s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0682


 65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 93/144 [06:09<02:10,  2.57s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0690


 65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 94/144 [06:12<02:10,  2.60s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0708


 66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 95/144 [06:15<02:08,  2.61s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0740


 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 96/144 [06:17<02:08,  2.68s/it]

Config: {'batch_size': 256, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0711


 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 97/144 [06:20<01:58,  2.52s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0712


 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 98/144 [06:22<01:50,  2.40s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0711


 69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 99/144 [06:24<01:46,  2.37s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0727


 69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 100/144 [06:26<01:40,  2.29s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0751


 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 101/144 [06:28<01:37,  2.28s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0737


 71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 102/144 [06:30<01:33,  2.23s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0725


 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 103/144 [06:34<01:53,  2.76s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0689


 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 104/144 [06:39<02:06,  3.17s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0685


 73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 105/144 [06:43<02:13,  3.42s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0695


 74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 106/144 [06:47<02:16,  3.59s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0702


 74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 107/144 [06:51<02:18,  3.75s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0693


 75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 108/144 [06:55<02:17,  3.82s/it]

Config: {'batch_size': 256, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0712


 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 109/144 [06:56<01:51,  3.18s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0710


 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 110/144 [06:58<01:31,  2.69s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0727


 77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 111/144 [07:00<01:18,  2.39s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0727


 78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 112/144 [07:01<01:08,  2.14s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0714


 78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 113/144 [07:03<01:01,  2.00s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0819


 79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 114/144 [07:04<00:55,  1.86s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0751


 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 115/144 [07:06<00:55,  1.92s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0679


 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 116/144 [07:08<00:53,  1.91s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0734


 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 117/144 [07:10<00:52,  1.96s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0682


 82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 118/144 [07:12<00:50,  1.94s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0706


 83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 119/144 [07:14<00:49,  1.97s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0708


 83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 120/144 [07:16<00:46,  1.95s/it]

Config: {'batch_size': 512, 'd_model': 32, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0700


 84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 121/144 [07:18<00:43,  1.88s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0745


 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 122/144 [07:19<00:39,  1.78s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0724


 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 123/144 [07:21<00:36,  1.76s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0716


 86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 124/144 [07:23<00:35,  1.75s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0723


 87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 125/144 [07:24<00:32,  1.69s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0759


 88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 126/144 [07:26<00:30,  1.69s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0745


 88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 127/144 [07:29<00:33,  1.94s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0682


 89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 128/144 [07:31<00:34,  2.16s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0682


 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 129/144 [07:34<00:34,  2.29s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0678


 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 130/144 [07:37<00:33,  2.41s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0697


 91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 131/144 [07:39<00:31,  2.45s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0698


 92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 132/144 [07:42<00:30,  2.52s/it]

Config: {'batch_size': 512, 'd_model': 64, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0725


 92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 133/144 [07:44<00:25,  2.32s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0725


 93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 134/144 [07:46<00:22,  2.23s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0699


 94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 135/144 [07:48<00:19,  2.11s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 3, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0704


 94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 136/144 [07:50<00:16,  2.07s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0752


 95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 137/144 [07:51<00:13,  2.00s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0721


 96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 138/144 [07:53<00:11,  1.99s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 3, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0765


 97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 139/144 [07:57<00:12,  2.55s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'Adam'}, Loss: 0.0670


 97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 140/144 [08:01<00:11,  2.97s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}, Loss: 0.0661


 98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 141/144 [08:05<00:09,  3.23s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'AdamW'}, Loss: 0.0679


 99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 142/144 [08:09<00:06,  3.45s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'Adam'}, Loss: 0.0703


 99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 143/144 [08:13<00:03,  3.56s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'RMSprop'}, Loss: 0.0685


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 144/144 [08:17<00:00,  3.45s/it]

Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 5, 'lr': 0.0005, 'optimizer': 'AdamW'}, Loss: 0.0682

‚úÖ Best Config: {'batch_size': 512, 'd_model': 128, 'kernel_size': 5, 'lr': 0.001, 'optimizer': 'RMSprop'}
‚úÖ Best Loss: 0.0661





##### 2.3 Test

In [None]:
def evaluate_model(model, test_loader, device='cuda'):
    model.eval()
    model.to(device)

    preds, targets = [], []
    with torch.no_grad():
        for xb, yb in test_loader:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb)
            preds.append(pred)
            targets.append(yb)

    # (B*T, 8, 8) ÌÖêÏÑú ÌòïÌÉúÎ°ú Ìï©ÏπòÍ∏∞
    preds_tensor = torch.cat(preds, dim=0)
    targets_tensor = torch.cat(targets, dim=0)

    return preds_tensor, targets_tensor

with open(f'{model_save_path}/best_model_window10per30_WC_config.json', 'r') as f:
    best_config = json.load(f)

best_model = CorrPredictorCNN(
    kernel_size=best_config['kernel_size'],
    d_model=best_config['d_model'],
)
best_model.load_state_dict(torch.load(f"{model_save_path}/best_model_window10per30_WC_weights.pth"))

test_loader = DataLoader(test_ds, batch_size=best_config['batch_size'], shuffle=False)
preds_tensor, targets_tensor = evaluate_model(best_model, test_loader, device=device)

# Ï†ÄÏû•
torch.save({
    'preds': preds_tensor,
    'targets': targets_tensor
}, f"{model_save_path}/best_model_window10per30_WC_result.pt")

In [None]:
# Performance metrics

preds_flat = preds_tensor.view(preds_tensor.size(0), -1).cpu().numpy()
targets_flat = targets_tensor.view(targets_tensor.size(0), -1).cpu().numpy()

mse = mean_squared_error(targets_flat, preds_flat)
mae = mean_absolute_error(targets_flat, preds_flat)
rmse = np.sqrt(mse)

# frobenius_loss
cos_sim = cosine_similarity(targets_flat, preds_flat)
mean_cos_sim = np.diag(cos_sim).mean()

# frobenius_loss
diff = preds_tensor - targets_tensor
frobenius_per_sample = torch.norm(diff, p='fro', dim=(1, 2))
mean_frobenius = frobenius_per_sample.mean().item()

print(f"\nüìä Evaluation Results:")
print(f"MSE               : {mse:.5f}")
print(f"MAE               : {mae:.5f}")
print(f"RMSE              : {rmse:.5f}")
print(f"Cosine Similarity : {mean_cos_sim:.5f}")
print(f"Frobenius Norm    : {mean_frobenius:.5f}")


üìä Evaluation Results:
MSE               : 0.06904
MAE               : 0.18716
RMSE              : 0.26275
Cosine Similarity : 0.93967
Frobenius Norm    : 1.91697
