In [1]:
from torchmetrics.functional import auc, mean_squared_error
from torchmetrics import F1Score
from tools import *
from CONSTANT import *
from models import CNNBiLSTM, CNNTransformer
from config import Params
from torch.utils.data import (
    TensorDataset, DataLoader, SequentialSampler, WeightedRandomSampler)
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
import torch.optim as optim
import torch.nn.functional as F
from torch import nn
import torch
from tqdm import tqdm
import pandas as pd
import numpy as np
import math
import time
import datetime
import os
import sys
import time
import warnings

# load baseline results

In [2]:
bs_results = load_dict_model(r'./output/KEC/valence_CTransformer_loso_0.0001_64_32/results.pkl')
parse_res(bs_results)
# valence CTransformer

0.56804

In [3]:
bs_results = load_dict_model(r'./output/KEC/arousal_CLSTM_loso_0.0001_64_32/results.pkl')
parse_res(bs_results)
# arousal CLSTM

0.9817

# set params

In [4]:
ckpt_path = r'./output/HKU956/valence_CTransformer_loso_0.0001_256_32/fold1_checkpoint.pt'

args = Params(dataset='KEC', 
              model='CTransformer',
              target='arousal', 
              debug=False, 
              fcn_input=12608,
              batch_size=64
              )

# load data

In [5]:
spliter = load_model(args.spliter)
data = pd.read_pickle(args.data)

for i, k in enumerate(spliter[args.valid]):
    train_index = k['train_index']
    test_index = k['test_index']
    break

dataprepare = DataPrepare(args,
                        target=args.target, data=data, train_index=train_index, test_index=test_index, device=args.device, batch_size=args.batch_size
                        )

train_dataloader, test_dataloader = dataprepare.get_data()

(2837, 4, 400) (2837, 1) (608, 4, 400) (608, 1)


# load pretrain model

In [6]:
kec_fcn = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(args.fcn_input, 128),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(128, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

In [7]:
model = CNNTransformer.CTransformer(args)
model.load_state_dict(torch.load(ckpt_path))
model.fcn = kec_fcn
model = model.to(args.device)

# train and eval

In [8]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    train_loss_list = []
    loss_fn = nn.MSELoss()
    for batch_idx, (data, target) in tqdm(enumerate(train_loader)):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output, target.float())
        loss.backward()
        optimizer.step()
        train_loss_list.append(loss.item())
    return np.mean(train_loss_list)


def eval(model, device, val_loader):
    model.eval()
    val_loss = []
    loss_fn = nn.MSELoss()
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = loss_fn(output, target.float())
            val_loss.append(loss.item())
    return np.mean(val_loss)

In [9]:
def run(train_loader, val_loader, ckpt_path):
    best_score = float('inf')
    patience = 25
    stop_count = 0
    optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5,
                                    verbose=True, threshold_mode='rel',
                                    cooldown=0, min_lr=0, eps=1e-08
                                    )
    for epoch in range(1, args.epochs + 1):
        train_loss = train(model, args.device, train_loader, optimizer, epoch)
        val_loss = eval(model, args.device, val_loader)
        scheduler.step(val_loss)
        print('[Epoch{}] | train_loss:{:.4f} | val_loss:{:.4f} | lr:{:e}'.format(epoch, train_loss, val_loss, optimizer.param_groups[0]['lr']))

        if val_loss < best_score:
            best_score = val_loss
            torch.save(model.state_dict(), ckpt_path)
            print("<<<<<< reach best {0} >>>>>>".format(val_loss))
            stop_count = 0
        else:
            model.load_state_dict(torch.load(ckpt_path))
            stop_count += 1
            if stop_count >= patience:
                print("<<<<<< without improvement in {} epoch, early stopping, best score {:.4f} >>>>>>".format(patience, best_score))
                break
        # wandb.log({'train_loss': train_loss, 'val_loss': val_loss})
    print('best score', best_score)
    return model

In [10]:
model = run(train_dataloader, test_dataloader, ckpt_path=os.path.join(args.save_path, 'arousal_checkpoint_dp02.pt'))

45it [00:06,  7.31it/s]


[Epoch1] | train_loss:1.6805 | val_loss:1.3359 | lr:1.000000e-04
<<<<<< reach best 1.3358946472406388 >>>>>>


45it [00:03, 12.38it/s]


[Epoch2] | train_loss:1.2357 | val_loss:1.7690 | lr:1.000000e-04


45it [00:03, 12.43it/s]


[Epoch3] | train_loss:1.2426 | val_loss:1.5200 | lr:1.000000e-04


45it [00:03, 12.39it/s]


[Epoch4] | train_loss:1.2166 | val_loss:1.5087 | lr:1.000000e-04


45it [00:03, 12.32it/s]


[Epoch5] | train_loss:1.2165 | val_loss:1.6437 | lr:1.000000e-04


45it [00:03, 12.37it/s]


[Epoch6] | train_loss:1.2147 | val_loss:1.5626 | lr:1.000000e-04


45it [00:03, 12.43it/s]


Epoch 00007: reducing learning rate of group 0 to 5.0000e-05.
[Epoch7] | train_loss:1.2124 | val_loss:1.5899 | lr:5.000000e-05


45it [00:03, 12.37it/s]


[Epoch8] | train_loss:1.1247 | val_loss:1.3112 | lr:5.000000e-05
<<<<<< reach best 1.3111931949853897 >>>>>>


45it [00:03, 12.36it/s]


[Epoch9] | train_loss:1.1011 | val_loss:1.3267 | lr:5.000000e-05


45it [00:03, 12.36it/s]


[Epoch10] | train_loss:1.0921 | val_loss:1.3639 | lr:5.000000e-05


45it [00:03, 12.37it/s]


[Epoch11] | train_loss:1.0643 | val_loss:1.3209 | lr:5.000000e-05


45it [00:03, 12.35it/s]


[Epoch12] | train_loss:1.0983 | val_loss:1.3146 | lr:5.000000e-05


45it [00:03, 12.38it/s]


[Epoch13] | train_loss:1.1191 | val_loss:1.3086 | lr:5.000000e-05
<<<<<< reach best 1.3086405456066132 >>>>>>


45it [00:03, 12.38it/s]


[Epoch14] | train_loss:1.0750 | val_loss:1.3147 | lr:5.000000e-05


45it [00:03, 12.37it/s]


[Epoch15] | train_loss:1.0885 | val_loss:1.3100 | lr:5.000000e-05


45it [00:03, 12.37it/s]


[Epoch16] | train_loss:1.0782 | val_loss:1.3180 | lr:5.000000e-05


45it [00:03, 12.36it/s]


[Epoch17] | train_loss:1.0851 | val_loss:1.3417 | lr:5.000000e-05


45it [00:03, 12.37it/s]


[Epoch18] | train_loss:1.0661 | val_loss:1.3267 | lr:5.000000e-05


45it [00:03, 12.36it/s]


Epoch 00019: reducing learning rate of group 0 to 2.5000e-05.
[Epoch19] | train_loss:1.0792 | val_loss:1.3161 | lr:2.500000e-05


45it [00:03, 12.37it/s]


[Epoch20] | train_loss:1.0560 | val_loss:1.3332 | lr:2.500000e-05


45it [00:03, 12.36it/s]


[Epoch21] | train_loss:1.0590 | val_loss:1.3186 | lr:2.500000e-05


45it [00:03, 12.42it/s]


[Epoch22] | train_loss:1.0537 | val_loss:1.3131 | lr:2.500000e-05


45it [00:03, 12.42it/s]


[Epoch23] | train_loss:1.0177 | val_loss:1.3198 | lr:2.500000e-05


45it [00:03, 12.43it/s]


[Epoch24] | train_loss:1.0590 | val_loss:1.3196 | lr:2.500000e-05


45it [00:03, 12.42it/s]


Epoch 00025: reducing learning rate of group 0 to 1.2500e-05.
[Epoch25] | train_loss:1.0428 | val_loss:1.3297 | lr:1.250000e-05


45it [00:03, 12.38it/s]


[Epoch26] | train_loss:1.0707 | val_loss:1.2112 | lr:1.250000e-05
<<<<<< reach best 1.211243775486946 >>>>>>


45it [00:03, 12.39it/s]


[Epoch27] | train_loss:1.0023 | val_loss:1.2139 | lr:1.250000e-05


45it [00:03, 12.43it/s]


[Epoch28] | train_loss:1.0154 | val_loss:1.2127 | lr:1.250000e-05


45it [00:03, 12.41it/s]


[Epoch29] | train_loss:1.0022 | val_loss:1.2157 | lr:1.250000e-05


45it [00:03, 12.41it/s]


[Epoch30] | train_loss:1.0153 | val_loss:1.2223 | lr:1.250000e-05


45it [00:03, 12.45it/s]


[Epoch31] | train_loss:1.0067 | val_loss:1.2156 | lr:1.250000e-05


45it [00:03, 12.42it/s]


Epoch 00032: reducing learning rate of group 0 to 6.2500e-06.
[Epoch32] | train_loss:1.0122 | val_loss:1.2185 | lr:6.250000e-06


45it [00:03, 12.41it/s]


[Epoch33] | train_loss:0.9907 | val_loss:1.1988 | lr:6.250000e-06
<<<<<< reach best 1.1988089382648468 >>>>>>


45it [00:03, 12.41it/s]


[Epoch34] | train_loss:1.0028 | val_loss:1.1960 | lr:6.250000e-06
<<<<<< reach best 1.195952335000038 >>>>>>


45it [00:03, 12.41it/s]


[Epoch35] | train_loss:0.9831 | val_loss:1.1974 | lr:6.250000e-06


45it [00:03, 12.40it/s]


[Epoch36] | train_loss:1.0027 | val_loss:1.1969 | lr:6.250000e-06


45it [00:03, 12.42it/s]


[Epoch37] | train_loss:0.9821 | val_loss:1.1963 | lr:6.250000e-06


45it [00:03, 12.42it/s]


[Epoch38] | train_loss:0.9936 | val_loss:1.1967 | lr:6.250000e-06


45it [00:03, 12.43it/s]


[Epoch39] | train_loss:0.9937 | val_loss:1.1987 | lr:6.250000e-06


45it [00:03, 12.42it/s]


Epoch 00040: reducing learning rate of group 0 to 3.1250e-06.
[Epoch40] | train_loss:0.9923 | val_loss:1.1979 | lr:3.125000e-06


45it [00:03, 12.34it/s]


[Epoch41] | train_loss:0.9684 | val_loss:1.1954 | lr:3.125000e-06
<<<<<< reach best 1.1954187542200088 >>>>>>


45it [00:03, 12.33it/s]


[Epoch42] | train_loss:0.9863 | val_loss:1.1949 | lr:3.125000e-06
<<<<<< reach best 1.1949182778596878 >>>>>>


45it [00:03, 12.39it/s]


[Epoch43] | train_loss:0.9723 | val_loss:1.1948 | lr:3.125000e-06
<<<<<< reach best 1.1947668462991714 >>>>>>


45it [00:03, 12.43it/s]


[Epoch44] | train_loss:0.9716 | val_loss:1.1952 | lr:3.125000e-06


45it [00:03, 12.42it/s]


[Epoch45] | train_loss:0.9812 | val_loss:1.1949 | lr:3.125000e-06


45it [00:03, 12.40it/s]


[Epoch46] | train_loss:0.9754 | val_loss:1.1952 | lr:3.125000e-06


45it [00:03, 12.39it/s]


[Epoch47] | train_loss:0.9579 | val_loss:1.1953 | lr:3.125000e-06


45it [00:03, 12.29it/s]


[Epoch48] | train_loss:0.9714 | val_loss:1.1950 | lr:3.125000e-06


45it [00:03, 12.28it/s]


Epoch 00049: reducing learning rate of group 0 to 1.5625e-06.
[Epoch49] | train_loss:0.9660 | val_loss:1.1950 | lr:1.562500e-06


45it [00:03, 12.25it/s]


[Epoch50] | train_loss:0.9664 | val_loss:1.1947 | lr:1.562500e-06
<<<<<< reach best 1.1946853280067444 >>>>>>


45it [00:03, 12.12it/s]


[Epoch51] | train_loss:0.9656 | val_loss:1.1948 | lr:1.562500e-06


45it [00:03, 12.40it/s]


[Epoch52] | train_loss:0.9700 | val_loss:1.1947 | lr:1.562500e-06


45it [00:03, 12.41it/s]


[Epoch53] | train_loss:0.9671 | val_loss:1.1948 | lr:1.562500e-06


45it [00:03, 12.43it/s]


[Epoch54] | train_loss:0.9718 | val_loss:1.1947 | lr:1.562500e-06


45it [00:03, 12.42it/s]


Epoch 00055: reducing learning rate of group 0 to 7.8125e-07.
[Epoch55] | train_loss:0.9631 | val_loss:1.1948 | lr:7.812500e-07


45it [00:03, 12.42it/s]


[Epoch56] | train_loss:0.9695 | val_loss:1.1947 | lr:7.812500e-07
<<<<<< reach best 1.1946550816297532 >>>>>>


45it [00:03, 12.42it/s]


[Epoch57] | train_loss:0.9617 | val_loss:1.1947 | lr:7.812500e-07


45it [00:03, 12.41it/s]


[Epoch58] | train_loss:0.9837 | val_loss:1.1947 | lr:7.812500e-07


45it [00:03, 12.41it/s]


[Epoch59] | train_loss:0.9655 | val_loss:1.1947 | lr:7.812500e-07


45it [00:03, 12.43it/s]


[Epoch60] | train_loss:0.9660 | val_loss:1.1947 | lr:7.812500e-07


45it [00:03, 12.38it/s]


Epoch 00061: reducing learning rate of group 0 to 3.9063e-07.
[Epoch61] | train_loss:0.9484 | val_loss:1.1947 | lr:3.906250e-07


45it [00:03, 12.42it/s]


[Epoch62] | train_loss:0.9540 | val_loss:1.1946 | lr:3.906250e-07
<<<<<< reach best 1.194635882973671 >>>>>>


45it [00:03, 12.41it/s]


[Epoch63] | train_loss:0.9618 | val_loss:1.1947 | lr:3.906250e-07


45it [00:03, 12.42it/s]


[Epoch64] | train_loss:0.9668 | val_loss:1.1947 | lr:3.906250e-07


45it [00:03, 12.42it/s]


[Epoch65] | train_loss:0.9743 | val_loss:1.1947 | lr:3.906250e-07


45it [00:03, 12.40it/s]


[Epoch66] | train_loss:0.9572 | val_loss:1.1947 | lr:3.906250e-07


45it [00:03, 12.42it/s]


[Epoch67] | train_loss:0.9538 | val_loss:1.1947 | lr:3.906250e-07


45it [00:03, 12.29it/s]


Epoch 00068: reducing learning rate of group 0 to 1.9531e-07.
[Epoch68] | train_loss:0.9591 | val_loss:1.1947 | lr:1.953125e-07


45it [00:03, 12.13it/s]


[Epoch69] | train_loss:0.9614 | val_loss:1.1946 | lr:1.953125e-07
<<<<<< reach best 1.1946299374103546 >>>>>>


45it [00:03, 12.35it/s]


[Epoch70] | train_loss:0.9613 | val_loss:1.1946 | lr:1.953125e-07


45it [00:03, 12.40it/s]


[Epoch71] | train_loss:0.9751 | val_loss:1.1946 | lr:1.953125e-07


45it [00:03, 12.41it/s]


[Epoch72] | train_loss:0.9691 | val_loss:1.1946 | lr:1.953125e-07
<<<<<< reach best 1.194629767537117 >>>>>>


45it [00:03, 12.38it/s]


[Epoch73] | train_loss:0.9555 | val_loss:1.1946 | lr:1.953125e-07


45it [00:03, 12.39it/s]


Epoch 00074: reducing learning rate of group 0 to 9.7656e-08.
[Epoch74] | train_loss:0.9737 | val_loss:1.1946 | lr:9.765625e-08


45it [00:03, 12.40it/s]


[Epoch75] | train_loss:0.9450 | val_loss:1.1946 | lr:9.765625e-08


45it [00:03, 12.42it/s]


[Epoch76] | train_loss:0.9468 | val_loss:1.1946 | lr:9.765625e-08


45it [00:03, 12.41it/s]


[Epoch77] | train_loss:0.9633 | val_loss:1.1946 | lr:9.765625e-08


45it [00:03, 12.41it/s]


[Epoch78] | train_loss:0.9684 | val_loss:1.1946 | lr:9.765625e-08
<<<<<< reach best 1.1946296691894531 >>>>>>


45it [00:03, 12.40it/s]


[Epoch79] | train_loss:0.9557 | val_loss:1.1946 | lr:9.765625e-08


45it [00:03, 12.40it/s]


Epoch 00080: reducing learning rate of group 0 to 4.8828e-08.
[Epoch80] | train_loss:0.9905 | val_loss:1.1946 | lr:4.882813e-08


45it [00:03, 12.38it/s]


[Epoch81] | train_loss:0.9600 | val_loss:1.1946 | lr:4.882813e-08


45it [00:03, 12.40it/s]


[Epoch82] | train_loss:0.9543 | val_loss:1.1946 | lr:4.882813e-08


45it [00:03, 12.39it/s]


[Epoch83] | train_loss:0.9693 | val_loss:1.1946 | lr:4.882813e-08


45it [00:03, 12.41it/s]


[Epoch84] | train_loss:0.9625 | val_loss:1.1946 | lr:4.882813e-08


45it [00:03, 12.40it/s]


[Epoch85] | train_loss:0.9546 | val_loss:1.1946 | lr:4.882813e-08


45it [00:03, 12.37it/s]


Epoch 00086: reducing learning rate of group 0 to 2.4414e-08.
[Epoch86] | train_loss:0.9727 | val_loss:1.1946 | lr:2.441406e-08


45it [00:03, 12.39it/s]


[Epoch87] | train_loss:0.9615 | val_loss:1.1946 | lr:2.441406e-08


45it [00:03, 12.42it/s]


[Epoch88] | train_loss:0.9540 | val_loss:1.1946 | lr:2.441406e-08


45it [00:03, 12.42it/s]


[Epoch89] | train_loss:0.9727 | val_loss:1.1946 | lr:2.441406e-08


45it [00:03, 12.37it/s]


[Epoch90] | train_loss:0.9571 | val_loss:1.1946 | lr:2.441406e-08


45it [00:03, 12.40it/s]


[Epoch91] | train_loss:0.9508 | val_loss:1.1946 | lr:2.441406e-08


45it [00:03, 12.42it/s]


Epoch 00092: reducing learning rate of group 0 to 1.2207e-08.
[Epoch92] | train_loss:0.9637 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch93] | train_loss:0.9627 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch94] | train_loss:0.9508 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.40it/s]


[Epoch95] | train_loss:0.9692 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.43it/s]


[Epoch96] | train_loss:0.9525 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch97] | train_loss:0.9735 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.38it/s]


[Epoch98] | train_loss:0.9747 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.38it/s]


[Epoch99] | train_loss:0.9685 | val_loss:1.1946 | lr:1.220703e-08
<<<<<< reach best 1.194629517197609 >>>>>>


45it [00:03, 12.39it/s]


[Epoch100] | train_loss:0.9555 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.42it/s]


[Epoch101] | train_loss:0.9736 | val_loss:1.1946 | lr:1.220703e-08
<<<<<< reach best 1.1946291387081147 >>>>>>


45it [00:03, 12.43it/s]


[Epoch102] | train_loss:0.9700 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.40it/s]


[Epoch103] | train_loss:0.9702 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.39it/s]


[Epoch104] | train_loss:0.9721 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch105] | train_loss:0.9557 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.42it/s]


[Epoch106] | train_loss:0.9739 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.42it/s]


[Epoch107] | train_loss:0.9681 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch108] | train_loss:0.9670 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.40it/s]


[Epoch109] | train_loss:0.9484 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.42it/s]


[Epoch110] | train_loss:0.9647 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch111] | train_loss:0.9561 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch112] | train_loss:0.9619 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch113] | train_loss:0.9589 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch114] | train_loss:0.9588 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.40it/s]


[Epoch115] | train_loss:0.9738 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.44it/s]


[Epoch116] | train_loss:0.9621 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.42it/s]


[Epoch117] | train_loss:0.9637 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.27it/s]


[Epoch118] | train_loss:0.9640 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.40it/s]


[Epoch119] | train_loss:0.9599 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.43it/s]


[Epoch120] | train_loss:0.9572 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.43it/s]


[Epoch121] | train_loss:0.9572 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch122] | train_loss:0.9555 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.35it/s]


[Epoch123] | train_loss:0.9567 | val_loss:1.1946 | lr:1.220703e-08
<<<<<< reach best 1.1946289479732513 >>>>>>


45it [00:03, 12.43it/s]


[Epoch124] | train_loss:0.9580 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.39it/s]


[Epoch125] | train_loss:0.9558 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.40it/s]


[Epoch126] | train_loss:0.9763 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.40it/s]


[Epoch127] | train_loss:0.9420 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.22it/s]


[Epoch128] | train_loss:0.9563 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.32it/s]


[Epoch129] | train_loss:0.9581 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.34it/s]


[Epoch130] | train_loss:0.9769 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.31it/s]


[Epoch131] | train_loss:0.9628 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.39it/s]


[Epoch132] | train_loss:0.9676 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.38it/s]


[Epoch133] | train_loss:0.9577 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.38it/s]


[Epoch134] | train_loss:0.9751 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.38it/s]


[Epoch135] | train_loss:0.9484 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.38it/s]


[Epoch136] | train_loss:0.9671 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.40it/s]


[Epoch137] | train_loss:0.9871 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.39it/s]


[Epoch138] | train_loss:0.9651 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.38it/s]


[Epoch139] | train_loss:0.9640 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.39it/s]


[Epoch140] | train_loss:0.9642 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.40it/s]


[Epoch141] | train_loss:0.9696 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch142] | train_loss:0.9524 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.41it/s]


[Epoch143] | train_loss:0.9657 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.38it/s]


[Epoch144] | train_loss:0.9689 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.39it/s]


[Epoch145] | train_loss:0.9586 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.36it/s]


[Epoch146] | train_loss:0.9721 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.38it/s]


[Epoch147] | train_loss:0.9690 | val_loss:1.1946 | lr:1.220703e-08


45it [00:03, 12.38it/s]


[Epoch148] | train_loss:0.9532 | val_loss:1.1946 | lr:1.220703e-08
<<<<<< without improvement in 25 epoch, early stopping, best score 1.1946 >>>>>>
best score 1.1946289479732513
