
- Basically a 1D CNN starter with bandpass. Filter size hard-coded from [https://www.kaggle.com/kit716/grav-wave-detection](https://www.kaggle.com/kit716/grav-wave-detection) which uses the simple architecture from https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.120.141103 
- Added inference to @hidehisaarai1213 's PyTorch starter, iteration order changed from Y.Nakama's pipeline: "iter on loader first then load model" to "load model first then iter the loader"


## Libraries

In [1]:
import os
import time
import math
import random
from pathlib import Path

import numpy as np
import pandas as pd
import scipy as sp
from scipy import signal
import tensorflow as tf  # for reading TFRecord Dataset
import tensorflow_datasets as tfds  # for making tf.data.Dataset to return numpy arrays
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#from kaggle_datasets import KaggleDatasets
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from tqdm import tqdm
import os

In [2]:
SAVEDIR = Path("./")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## CFG

In [3]:
class CFG:
    debug = False
    print_freq = 1000
    num_workers = 4
    scheduler = "CosineAnnealingLR"
    model_name = "1dcnn"
    epochs = 5
    T_max = 3
    lr = 1e-4
    min_lr = 1e-7
    batch_size = 1024
    val_batch_size = 100
    weight_decay = 1e-5
    gradient_accumulation_steps = 1
    max_grad_norm = 1000
    seed = 42
    target_size = 1
    target_col = "target"
    n_fold = 5
    trn_fold = [0, 1, 2, 3]  # [0, 1, 2, 3, 4]
    train = True
    bandpass_params = dict(lf=20, 
                           hf=500)

## Utils

In [4]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    score = roc_auc_score(y_true, y_pred)
    return score


def init_logger(log_file=SAVEDIR / 'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

## TFRecord Loader

This is the heart of this notebook. Instead of using PyTorch's Dataset and DataLoader, here I define custom Loader that reads samples from TFRecords.

FYI, there's a library that does the same thing, but its implementation is not optimized, so it's slower.

https://github.com/vahidk/tfrecord

In [5]:
gcs_paths = []
all_files = []
path = "./data1/train/"
n_trial = 0
#print(j)
for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        #print(filename)
        #print(os.path.join(dirname, filename))
        gcs_path = os.path.join(dirname, filename)
        gcs_paths.append(gcs_path)
        print(gcs_path)
        all_files.extend(np.sort(np.array(tf.io.gfile.glob(gcs_path))))
            
    
print("train_files: ", len(all_files))
all_files = np.array(all_files)

./data1/train/train0.tfrecords
./data1/train/train1.tfrecords
./data1/train/train10.tfrecords
./data1/train/train11.tfrecords
./data1/train/train12.tfrecords
./data1/train/train13.tfrecords
./data1/train/train14.tfrecords
./data1/train/train15.tfrecords
./data1/train/train16.tfrecords
./data1/train/train17.tfrecords
./data1/train/train18.tfrecords
./data1/train/train19.tfrecords
./data1/train/train2.tfrecords
./data1/train/train3.tfrecords
./data1/train/train4.tfrecords
./data1/train/train5.tfrecords
./data1/train/train6.tfrecords
./data1/train/train7.tfrecords
./data1/train/train8.tfrecords
./data1/train/train9.tfrecords
train_files:  20


In [6]:

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [7]:
def count_data_items(fileids, train=True):
    """
    Count the number of samples.
    Each of the TFRecord datasets is designed to contain 28000 samples for train
    22500 for test.
    """
    sizes = 28000 if train else 22500
    return len(fileids) * sizes


AUTO = tf.data.experimental.AUTOTUNE

## Bandpass

Modified from various notebooks and https://www.kaggle.com/c/g2net-gravitational-wave-detection/discussion/261721#1458564

In [8]:
def bandpass(x, lf=20, hf=500, order=8, sr=2048):
    '''
    Cell 33 of https://www.gw-openscience.org/LVT151012data/LOSC_Event_tutorial_LVT151012.html
    https://scipy-cookbook.readthedocs.io/items/ButterworthBandpass.html
    '''
    sos = signal.butter(order, [lf, hf], btype="bandpass", output="sos", fs=sr)
    normalization = np.sqrt((hf - lf) / (sr / 2))
    if x.ndim ==2:
        for i in range(3):
            x[i] = signal.sosfilt(sos, x[i]) * normalization
    elif x.ndim == 3: # batch
        for i in range(x.shape[0]):
            for j in range(3):
                x[i, j] = signal.sosfilt(sos, x[i, j]) * normalization
    return x

In [9]:
def prepare_wave(wave):
    wave = tf.reshape(tf.io.decode_raw(wave, tf.float64), (3, 4096))
    normalized_waves = []
    scaling = tf.constant([1.5e-20, 1.5e-20, 0.5e-20], dtype=tf.float64)
    for i in range(3):
#         normalized_wave = wave[i] / tf.math.reduce_max(wave[i])
        normalized_wave = wave[i] / scaling[i]
        normalized_waves.append(normalized_wave)
    wave = tf.stack(normalized_waves, axis=0)
    wave = tf.cast(wave, tf.float32)
    return wave


def read_labeled_tfrecord(example):
    tfrec_format = {
        "wave": tf.io.FixedLenFeature([], tf.string),
        "wave_id": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64)
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return prepare_wave(example["wave"]), tf.reshape(tf.cast(example["target"], tf.float32), [1]), example["wave_id"]


def read_unlabeled_tfrecord(example, return_image_id):
    tfrec_format = {
        "wave": tf.io.FixedLenFeature([], tf.string),
        "wave_id": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return prepare_wave(example["wave"]), example["wave_id"] if return_image_id else 0


def get_dataset(files, batch_size=16, repeat=False, cache=False, 
                shuffle=False, labeled=True, return_image_ids=True):
    ds = tf.data.TFRecordDataset(files, num_parallel_reads=AUTO, compression_type="GZIP")
    if cache:
        # You'll need around 15GB RAM if you'd like to cache val dataset, and 50~60GB RAM for train dataset.
        ds = ds.cache()

    if repeat:
        ds = ds.repeat()

    if shuffle:
        ds = ds.shuffle(1024 * 2)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        ds = ds.with_options(opt)

    if labeled:
        ds = ds.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    else:
        ds = ds.map(lambda example: read_unlabeled_tfrecord(example, return_image_ids), num_parallel_calls=AUTO)

    ds = ds.batch(batch_size)
    ds = ds.prefetch(AUTO)
    return tfds.as_numpy(ds)

In [10]:
class TFRecordDataLoader:
    def __init__(self, files, batch_size=32, cache=False, train=True, 
                              repeat=False, shuffle=False, labeled=True, 
                              return_image_ids=True):
        self.ds = get_dataset(
            files, 
            batch_size=batch_size,
            cache=cache,
            repeat=repeat,
            shuffle=shuffle,
            labeled=labeled,
            return_image_ids=return_image_ids)
        
        self.num_examples = count_data_items(files, labeled)

        self.batch_size = batch_size
        self.labeled = labeled
        self.return_image_ids = return_image_ids
        self._iterator = None
    
    def __iter__(self):
        if self._iterator is None:
            self._iterator = iter(self.ds)
        else:
            self._reset()
        return self._iterator

    def _reset(self):
        self._iterator = iter(self.ds)

    def __next__(self):
        batch = next(self._iterator)
        return batch

    def __len__(self):
        n_batches = self.num_examples // self.batch_size
        if self.num_examples % self.batch_size == 0:
            return n_batches
        else:
            return n_batches + 1

## MODEL

In [11]:
class CNN1d(nn.Module):
    """1D convolutional neural network. Classifier of the gravitational waves.
    Architecture from there https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.120.141103
    """

    def __init__(self, debug=False):
        super().__init__()
        self.cnn1 = nn.Sequential(
            nn.Conv1d(3, 64, kernel_size=64),
            nn.BatchNorm1d(64),
            nn.ELU(),
        )
        self.cnn2 = nn.Sequential(
            nn.Conv1d(64, 64, kernel_size=32),
            nn.AvgPool1d(kernel_size=8),
            nn.BatchNorm1d(64),
            nn.ELU(),
        )
        self.cnn3 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=32),
            nn.BatchNorm1d(128),
            nn.ELU(),
        )
        self.cnn4 = nn.Sequential(
            nn.Conv1d(128, 128, kernel_size=16),
            nn.AvgPool1d(kernel_size=6),
            nn.BatchNorm1d(128),
            nn.ELU(),
        )
        self.cnn5 = nn.Sequential(
            nn.Conv1d(128, 256, kernel_size=16),
            nn.BatchNorm1d(256),
            nn.ELU(),
        )
        self.cnn6 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=16),
            nn.MaxPool1d(kernel_size=4),
            nn.BatchNorm1d(256),
            nn.ELU(),
        )
        self.fc1 = nn.Sequential(
            nn.Linear(256 * 11, 64),
            nn.BatchNorm1d(64),
            nn.Dropout(0.4),
            nn.ELU(),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(64, 64),
            nn.BatchNorm1d(64),
            nn.Dropout(0.4),
            nn.ELU(),
        )
        self.fc3 = nn.Sequential(
            nn.Linear(64, 1),
        )
        self.debug = debug

    def forward(self, x, pos=None):
        x = self.cnn1(x)
        x = self.cnn2(x)
        x = self.cnn3(x)
        x = self.cnn4(x)
        x = self.cnn5(x)
        x = self.cnn6(x)
        x = x.flatten(start_dim=1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x


## Helper functions

In [12]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def max_memory_allocated():
    MB = 1024.0 * 1024.0
    mem = torch.cuda.max_memory_allocated() / MB
    return f"{mem:.0f} MB"

## Trainer

In [13]:
def train_fn(files, model, criterion, optimizer, epoch, scheduler, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()

    # switch to train mode
    model.train()
    start = end = time.time()
    global_step = 0

    train_loader = TFRecordDataLoader(
        files, batch_size=CFG.batch_size, 
        shuffle=True)
    for step, d in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        x = bandpass(d[0], **CFG.bandpass_params)
        x = torch.from_numpy(x).to(device)
        labels = torch.from_numpy(d[1]).to(device)

        batch_size = labels.size(0)
        y_preds = model(x)
        loss = criterion(y_preds.view(-1), labels.view(-1))
        # record loss
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        print('Epoch: [{0}/{1}][{2}/{3}] '
              'Loss: {loss.val:.4f}({loss.avg:.4f}) '
              'Grad: {grad_norm:.4f}  '
              'LR: {lr:.6f}  '
              'Elapsed: {remain:s} '
              'Max mem: {mem:s}'
              .format(
               epoch+1, CFG.epochs, step, len(train_loader),
               loss=losses,
               grad_norm=grad_norm,
               lr=scheduler.get_last_lr()[0],
               remain=timeSince(start, float(step + 1) / len(train_loader)),
               mem=max_memory_allocated()))
    return losses.avg


def valid_fn(files, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to evaluation mode
    model.eval()
    filenames = []
    targets = []
    preds = []
    start = end = time.time()
    valid_loader = TFRecordDataLoader(
        files, batch_size=CFG.batch_size * 2, shuffle=False)
    for step, d in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
        targets.extend(d[1].reshape(-1).tolist())
        filenames.extend([f.decode("UTF-8") for f in d[2]])
        x = bandpass(d[0], **CFG.bandpass_params)
        x = torch.from_numpy(x).to(device)
        labels = torch.from_numpy(d[1]).to(device)

        batch_size = labels.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(x)
        loss = criterion(y_preds.view(-1), labels.view(-1))
        losses.update(loss.item(), batch_size)

        preds.append(y_preds.sigmoid().to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        print('EVAL: [{0}/{1}] '
              'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
              'Elapsed {remain:s} '
              'Loss: {loss.val:.4f}({loss.avg:.4f}) '
              .format(
               step, len(valid_loader), batch_time=batch_time,
               data_time=data_time, loss=losses,
               remain=timeSince(start, float(step+1)/len(valid_loader)),
               ))
    predictions = np.concatenate(preds).reshape(-1)
    return losses.avg, predictions, np.array(targets), np.array(filenames)

## Train loop

In [14]:
# ====================================================
# Train loop
# ====================================================
def train_loop(train_tfrecords: np.ndarray, val_tfrecords: np.ndarray, fold: int):
    
    LOGGER.info(f"========== fold: {fold} training ==========")
    
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                             mode='min', 
                                                             factor=CFG.factor, 
                                                             patience=CFG.patience, 
                                                             verbose=True, 
                                                             eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 
                                                             T_max=CFG.T_max, 
                                                             eta_min=CFG.min_lr, 
                                                             last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 
                                                                       T_0=CFG.T_0, 
                                                                       T_mult=1, 
                                                                       eta_min=CFG.min_lr, 
                                                                       last_epoch=-1)
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CNN1d()
    model.to(device)
    model = nn.DataParallel(model)


    optimizer = optim.Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    scheduler = get_scheduler(optimizer)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()

    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        print("\n\n")
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_tfrecords, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, preds, targets, files = valid_fn(val_tfrecords, model, criterion, device)
        valid_result_df = pd.DataFrame({"target": targets, "preds": preds, "id": files})
        
        if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, optim.lr_scheduler.CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, optim.lr_scheduler.CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score = get_score(targets, preds)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        SAVEDIR / f'{CFG.model_name}_fold{fold}_best_score.pth')
        
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            LOGGER.info(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        SAVEDIR / f'{CFG.model_name}_fold{fold}_best_loss.pth')
    
    valid_result_df["preds"] = torch.load(SAVEDIR / f"{CFG.model_name}_fold{fold}_best_loss.pth",
                                          map_location="cpu")["preds"]

    return valid_result_df

In [15]:
torch.cuda.empty_cache()

In [None]:
def get_result(result_df):
    preds = result_df['preds'].values
    labels = result_df[CFG.target_col].values
    score = get_score(labels, preds)
    LOGGER.info(f'Score: {score:<.4f}')

if CFG.train:
    # train 
    oof_df = pd.DataFrame()
    kf = KFold(n_splits=CFG.n_fold, shuffle=True, random_state=CFG.seed)

    folds = list(kf.split(all_files))
    for fold in range(CFG.n_fold):
        if fold in CFG.trn_fold:
            trn_idx, val_idx = folds[fold]
            train_files = all_files[trn_idx]
            valid_files = all_files[val_idx]
            _oof_df = train_loop(train_files, valid_files, fold)
            oof_df = pd.concat([oof_df, _oof_df])
            LOGGER.info(f"========== fold: {fold} result ==========")
            get_result(_oof_df)
    # CV result
    LOGGER.info(f"========== CV ==========")
    get_result(oof_df)
    # save result
    oof_df.to_csv(SAVEDIR / 'oof_df.csv', index=False)










Epoch: [1/5][0/438] Loss: 0.7359(0.7359) Grad: 3.6658  LR: 0.000100  Elapsed: 0m 8s (remain 58m 49s) Max mem: 2935 MB
Epoch: [1/5][1/438] Loss: 0.7347(0.7353) Grad: 3.4208  LR: 0.000100  Elapsed: 0m 10s (remain 36m 32s) Max mem: 2962 MB
Epoch: [1/5][2/438] Loss: 0.7415(0.7374) Grad: 3.2382  LR: 0.000100  Elapsed: 0m 11s (remain 28m 59s) Max mem: 2962 MB
Epoch: [1/5][3/438] Loss: 0.7387(0.7377) Grad: 3.1395  LR: 0.000100  Elapsed: 0m 14s (remain 25m 41s) Max mem: 2962 MB
Epoch: [1/5][4/438] Loss: 0.7437(0.7389) Grad: 2.9424  LR: 0.000100  Elapsed: 0m 16s (remain 23m 39s) Max mem: 2962 MB
Epoch: [1/5][5/438] Loss: 0.7345(0.7382) Grad: 2.8518  LR: 0.000100  Elapsed: 0m 18s (remain 22m 12s) Max mem: 2962 MB
Epoch: [1/5][6/438] Loss: 0.7068(0.7337) Grad: 2.6047  LR: 0.000100  Elapsed: 0m 20s (remain 21m 10s) Max mem: 2962 MB
Epoch: [1/5][7/438] Loss: 0.7304(0.7333) Grad: 2.5366  LR: 0.000100  Elapsed: 0m 22s (remain 20m 20s) Max mem: 2962 MB
Epoch: [1/5][8/438] Loss: 0.7396(0.7340) Grad: 2.

Epoch: [1/5][69/438] Loss: 0.6249(0.6873) Grad: 1.3371  LR: 0.000100  Elapsed: 2m 33s (remain 13m 25s) Max mem: 2962 MB
Epoch: [1/5][70/438] Loss: 0.6194(0.6863) Grad: 1.2632  LR: 0.000100  Elapsed: 2m 35s (remain 13m 22s) Max mem: 2962 MB
Epoch: [1/5][71/438] Loss: 0.6277(0.6855) Grad: 1.2805  LR: 0.000100  Elapsed: 2m 37s (remain 13m 18s) Max mem: 2962 MB
Epoch: [1/5][72/438] Loss: 0.6302(0.6847) Grad: 1.4031  LR: 0.000100  Elapsed: 2m 38s (remain 13m 14s) Max mem: 2962 MB
Epoch: [1/5][73/438] Loss: 0.6067(0.6837) Grad: 1.6333  LR: 0.000100  Elapsed: 2m 40s (remain 13m 10s) Max mem: 2962 MB
Epoch: [1/5][74/438] Loss: 0.6063(0.6826) Grad: 1.4012  LR: 0.000100  Elapsed: 2m 42s (remain 13m 7s) Max mem: 2962 MB
Epoch: [1/5][75/438] Loss: 0.6307(0.6820) Grad: 1.3638  LR: 0.000100  Elapsed: 2m 44s (remain 13m 5s) Max mem: 2962 MB
Epoch: [1/5][76/438] Loss: 0.6210(0.6812) Grad: 1.8665  LR: 0.000100  Elapsed: 2m 47s (remain 13m 3s) Max mem: 2962 MB
Epoch: [1/5][77/438] Loss: 0.5946(0.6801) G

Epoch: [1/5][138/438] Loss: 0.5498(0.6316) Grad: 1.2637  LR: 0.000100  Elapsed: 5m 0s (remain 10m 45s) Max mem: 2962 MB
Epoch: [1/5][139/438] Loss: 0.5369(0.6309) Grad: 1.3648  LR: 0.000100  Elapsed: 5m 2s (remain 10m 42s) Max mem: 2962 MB
Epoch: [1/5][140/438] Loss: 0.5271(0.6302) Grad: 0.8797  LR: 0.000100  Elapsed: 5m 4s (remain 10m 40s) Max mem: 2962 MB
Epoch: [1/5][141/438] Loss: 0.5371(0.6296) Grad: 1.3014  LR: 0.000100  Elapsed: 5m 6s (remain 10m 38s) Max mem: 2962 MB
Epoch: [1/5][142/438] Loss: 0.5463(0.6290) Grad: 1.3696  LR: 0.000100  Elapsed: 5m 8s (remain 10m 36s) Max mem: 2962 MB
Epoch: [1/5][143/438] Loss: 0.5271(0.6283) Grad: 1.0602  LR: 0.000100  Elapsed: 5m 10s (remain 10m 33s) Max mem: 2962 MB
Epoch: [1/5][144/438] Loss: 0.5294(0.6276) Grad: 1.7081  LR: 0.000100  Elapsed: 5m 12s (remain 10m 31s) Max mem: 2962 MB
Epoch: [1/5][145/438] Loss: 0.5443(0.6270) Grad: 2.2698  LR: 0.000100  Elapsed: 5m 14s (remain 10m 28s) Max mem: 2962 MB
Epoch: [1/5][146/438] Loss: 0.5365(0.

Epoch: [1/5][207/438] Loss: 0.5372(0.5997) Grad: 1.0094  LR: 0.000100  Elapsed: 7m 12s (remain 7m 58s) Max mem: 2962 MB
Epoch: [1/5][208/438] Loss: 0.5250(0.5993) Grad: 1.1848  LR: 0.000100  Elapsed: 7m 14s (remain 7m 55s) Max mem: 2962 MB
Epoch: [1/5][209/438] Loss: 0.5502(0.5991) Grad: 1.2271  LR: 0.000100  Elapsed: 7m 16s (remain 7m 53s) Max mem: 2962 MB
Epoch: [1/5][210/438] Loss: 0.5238(0.5987) Grad: 1.2841  LR: 0.000100  Elapsed: 7m 18s (remain 7m 51s) Max mem: 2962 MB
Epoch: [1/5][211/438] Loss: 0.5322(0.5984) Grad: 1.0154  LR: 0.000100  Elapsed: 7m 20s (remain 7m 49s) Max mem: 2962 MB
Epoch: [1/5][212/438] Loss: 0.5315(0.5981) Grad: 0.8147  LR: 0.000100  Elapsed: 7m 22s (remain 7m 47s) Max mem: 2962 MB
Epoch: [1/5][213/438] Loss: 0.5423(0.5979) Grad: 1.1771  LR: 0.000100  Elapsed: 7m 24s (remain 7m 44s) Max mem: 2962 MB
Epoch: [1/5][214/438] Loss: 0.5263(0.5975) Grad: 1.9631  LR: 0.000100  Elapsed: 7m 25s (remain 7m 42s) Max mem: 2962 MB
Epoch: [1/5][215/438] Loss: 0.5348(0.597

Epoch: [1/5][276/438] Loss: 0.5400(0.5807) Grad: 1.2347  LR: 0.000100  Elapsed: 9m 19s (remain 5m 25s) Max mem: 2962 MB
Epoch: [1/5][277/438] Loss: 0.5066(0.5804) Grad: 1.5475  LR: 0.000100  Elapsed: 9m 21s (remain 5m 22s) Max mem: 2962 MB
Epoch: [1/5][278/438] Loss: 0.5415(0.5803) Grad: 1.4915  LR: 0.000100  Elapsed: 9m 22s (remain 5m 20s) Max mem: 2962 MB
Epoch: [1/5][279/438] Loss: 0.5109(0.5800) Grad: 1.5587  LR: 0.000100  Elapsed: 9m 24s (remain 5m 18s) Max mem: 2962 MB
Epoch: [1/5][280/438] Loss: 0.5027(0.5798) Grad: 0.8757  LR: 0.000100  Elapsed: 9m 26s (remain 5m 16s) Max mem: 2962 MB
Epoch: [1/5][281/438] Loss: 0.5225(0.5796) Grad: 1.1634  LR: 0.000100  Elapsed: 9m 28s (remain 5m 14s) Max mem: 2962 MB
Epoch: [1/5][282/438] Loss: 0.5209(0.5793) Grad: 0.8615  LR: 0.000100  Elapsed: 9m 29s (remain 5m 12s) Max mem: 2962 MB
Epoch: [1/5][283/438] Loss: 0.4865(0.5790) Grad: 1.0707  LR: 0.000100  Elapsed: 9m 31s (remain 5m 10s) Max mem: 2962 MB
Epoch: [1/5][284/438] Loss: 0.5016(0.578

Epoch: [1/5][345/438] Loss: 0.5110(0.5666) Grad: 1.5554  LR: 0.000100  Elapsed: 11m 22s (remain 3m 1s) Max mem: 2962 MB
Epoch: [1/5][346/438] Loss: 0.5059(0.5664) Grad: 1.8112  LR: 0.000100  Elapsed: 11m 24s (remain 2m 59s) Max mem: 2962 MB
Epoch: [1/5][347/438] Loss: 0.4747(0.5662) Grad: 1.1198  LR: 0.000100  Elapsed: 11m 25s (remain 2m 57s) Max mem: 2962 MB
Epoch: [1/5][348/438] Loss: 0.5176(0.5660) Grad: 0.8820  LR: 0.000100  Elapsed: 11m 27s (remain 2m 55s) Max mem: 2962 MB
Epoch: [1/5][349/438] Loss: 0.5018(0.5658) Grad: 1.0429  LR: 0.000100  Elapsed: 11m 29s (remain 2m 53s) Max mem: 2962 MB
Epoch: [1/5][350/438] Loss: 0.4983(0.5657) Grad: 1.9708  LR: 0.000100  Elapsed: 11m 31s (remain 2m 51s) Max mem: 2962 MB
Epoch: [1/5][351/438] Loss: 0.4882(0.5654) Grad: 1.0909  LR: 0.000100  Elapsed: 11m 33s (remain 2m 49s) Max mem: 2962 MB
Epoch: [1/5][352/438] Loss: 0.4869(0.5652) Grad: 1.7337  LR: 0.000100  Elapsed: 11m 35s (remain 2m 47s) Max mem: 2962 MB
Epoch: [1/5][353/438] Loss: 0.488

Epoch: [1/5][413/438] Loss: 0.4914(0.5555) Grad: 1.0162  LR: 0.000100  Elapsed: 13m 24s (remain 0m 46s) Max mem: 2962 MB
Epoch: [1/5][414/438] Loss: 0.4949(0.5553) Grad: 1.7142  LR: 0.000100  Elapsed: 13m 25s (remain 0m 44s) Max mem: 2962 MB
Epoch: [1/5][415/438] Loss: 0.4721(0.5551) Grad: 1.9704  LR: 0.000100  Elapsed: 13m 27s (remain 0m 42s) Max mem: 2962 MB
Epoch: [1/5][416/438] Loss: 0.5181(0.5551) Grad: 1.2652  LR: 0.000100  Elapsed: 13m 29s (remain 0m 40s) Max mem: 2962 MB
Epoch: [1/5][417/438] Loss: 0.5119(0.5550) Grad: 1.0652  LR: 0.000100  Elapsed: 13m 31s (remain 0m 38s) Max mem: 2962 MB
Epoch: [1/5][418/438] Loss: 0.4888(0.5548) Grad: 1.0959  LR: 0.000100  Elapsed: 13m 33s (remain 0m 36s) Max mem: 2962 MB
Epoch: [1/5][419/438] Loss: 0.5206(0.5547) Grad: 1.8057  LR: 0.000100  Elapsed: 13m 35s (remain 0m 34s) Max mem: 2962 MB
Epoch: [1/5][420/438] Loss: 0.4751(0.5545) Grad: 1.1901  LR: 0.000100  Elapsed: 13m 36s (remain 0m 32s) Max mem: 2962 MB
Epoch: [1/5][421/438] Loss: 0.45

Epoch 1 - avg_train_loss: 0.5522  avg_val_loss: 0.4954  time: 968s
Epoch 1 - Score: 0.8350
Epoch 1 - Save Best Score: 0.8350 Model
Epoch 1 - Save Best Loss: 0.4954 Model


EVAL: [54/55] Data 0.011 (0.023) Elapsed 2m 2s (remain 0m 0s) Loss: 0.4750(0.4954) 







Epoch: [2/5][0/438] Loss: 0.5126(0.5126) Grad: 1.5570  LR: 0.000075  Elapsed: 0m 3s (remain 22m 53s) Max mem: 2963 MB
Epoch: [2/5][1/438] Loss: 0.4565(0.4846) Grad: 1.0155  LR: 0.000075  Elapsed: 0m 5s (remain 18m 23s) Max mem: 2963 MB
Epoch: [2/5][2/438] Loss: 0.4936(0.4876) Grad: 1.0993  LR: 0.000075  Elapsed: 0m 6s (remain 16m 28s) Max mem: 2963 MB
Epoch: [2/5][3/438] Loss: 0.4964(0.4898) Grad: 0.9877  LR: 0.000075  Elapsed: 0m 8s (remain 15m 35s) Max mem: 2963 MB
Epoch: [2/5][4/438] Loss: 0.4833(0.4885) Grad: 1.0898  LR: 0.000075  Elapsed: 0m 10s (remain 15m 6s) Max mem: 2963 MB
Epoch: [2/5][5/438] Loss: 0.4691(0.4852) Grad: 1.3614  LR: 0.000075  Elapsed: 0m 12s (remain 14m 39s) Max mem: 2963 MB
Epoch: [2/5][6/438] Loss: 0.5068(0.4883) Grad: 1.1004  LR: 0.000075  Elapsed: 0m 13s (remain 14m 19s) Max mem: 2963 MB
Epoch: [2/5][7/438] Loss: 0.4746(0.4866) Grad: 0.9388  LR: 0.000075  Elapsed: 0m 15s (remain 14m 2s) Max mem: 2963 MB
Epoch: [2/5][8/438] Loss: 0.4477(0.4823) Grad: 1.1499 

Epoch: [2/5][69/438] Loss: 0.5077(0.4837) Grad: 1.0584  LR: 0.000075  Elapsed: 2m 3s (remain 10m 49s) Max mem: 2963 MB
Epoch: [2/5][70/438] Loss: 0.4822(0.4837) Grad: 1.3267  LR: 0.000075  Elapsed: 2m 5s (remain 10m 47s) Max mem: 2963 MB
Epoch: [2/5][71/438] Loss: 0.4604(0.4834) Grad: 1.8169  LR: 0.000075  Elapsed: 2m 6s (remain 10m 44s) Max mem: 2963 MB
Epoch: [2/5][72/438] Loss: 0.4786(0.4833) Grad: 1.2748  LR: 0.000075  Elapsed: 2m 8s (remain 10m 42s) Max mem: 2963 MB
Epoch: [2/5][73/438] Loss: 0.4834(0.4833) Grad: 1.1746  LR: 0.000075  Elapsed: 2m 10s (remain 10m 40s) Max mem: 2963 MB
Epoch: [2/5][74/438] Loss: 0.4709(0.4832) Grad: 0.8028  LR: 0.000075  Elapsed: 2m 11s (remain 10m 38s) Max mem: 2963 MB
Epoch: [2/5][75/438] Loss: 0.5207(0.4837) Grad: 1.8492  LR: 0.000075  Elapsed: 2m 13s (remain 10m 36s) Max mem: 2963 MB
Epoch: [2/5][76/438] Loss: 0.4807(0.4836) Grad: 1.0949  LR: 0.000075  Elapsed: 2m 15s (remain 10m 34s) Max mem: 2963 MB
Epoch: [2/5][77/438] Loss: 0.4760(0.4835) Gr

Epoch: [2/5][138/438] Loss: 0.4486(0.4801) Grad: 1.0676  LR: 0.000075  Elapsed: 4m 3s (remain 8m 43s) Max mem: 2963 MB
Epoch: [2/5][139/438] Loss: 0.4811(0.4801) Grad: 1.0620  LR: 0.000075  Elapsed: 4m 5s (remain 8m 41s) Max mem: 2963 MB
Epoch: [2/5][140/438] Loss: 0.4943(0.4802) Grad: 1.1705  LR: 0.000075  Elapsed: 4m 6s (remain 8m 39s) Max mem: 2963 MB
Epoch: [2/5][141/438] Loss: 0.4824(0.4802) Grad: 1.1541  LR: 0.000075  Elapsed: 4m 8s (remain 8m 38s) Max mem: 2963 MB
Epoch: [2/5][142/438] Loss: 0.4641(0.4801) Grad: 1.1143  LR: 0.000075  Elapsed: 4m 10s (remain 8m 36s) Max mem: 2963 MB
Epoch: [2/5][143/438] Loss: 0.4813(0.4801) Grad: 1.2162  LR: 0.000075  Elapsed: 4m 12s (remain 8m 35s) Max mem: 2963 MB
Epoch: [2/5][144/438] Loss: 0.4553(0.4799) Grad: 1.4749  LR: 0.000075  Elapsed: 4m 14s (remain 8m 33s) Max mem: 2963 MB
Epoch: [2/5][145/438] Loss: 0.4844(0.4799) Grad: 0.9916  LR: 0.000075  Elapsed: 4m 15s (remain 8m 31s) Max mem: 2963 MB
Epoch: [2/5][146/438] Loss: 0.4661(0.4799) G

Epoch: [2/5][207/438] Loss: 0.4754(0.4786) Grad: 1.4640  LR: 0.000075  Elapsed: 6m 4s (remain 6m 42s) Max mem: 2963 MB
Epoch: [2/5][208/438] Loss: 0.4828(0.4786) Grad: 1.1125  LR: 0.000075  Elapsed: 6m 5s (remain 6m 40s) Max mem: 2963 MB
Epoch: [2/5][209/438] Loss: 0.4853(0.4786) Grad: 0.8528  LR: 0.000075  Elapsed: 6m 7s (remain 6m 39s) Max mem: 2963 MB
Epoch: [2/5][210/438] Loss: 0.5269(0.4789) Grad: 0.8567  LR: 0.000075  Elapsed: 6m 9s (remain 6m 37s) Max mem: 2963 MB
Epoch: [2/5][211/438] Loss: 0.4690(0.4788) Grad: 1.1383  LR: 0.000075  Elapsed: 6m 11s (remain 6m 35s) Max mem: 2963 MB
Epoch: [2/5][212/438] Loss: 0.4410(0.4786) Grad: 0.8949  LR: 0.000075  Elapsed: 6m 13s (remain 6m 34s) Max mem: 2963 MB
Epoch: [2/5][213/438] Loss: 0.4622(0.4786) Grad: 0.9199  LR: 0.000075  Elapsed: 6m 15s (remain 6m 32s) Max mem: 2963 MB
Epoch: [2/5][214/438] Loss: 0.4679(0.4785) Grad: 0.9148  LR: 0.000075  Elapsed: 6m 16s (remain 6m 30s) Max mem: 2963 MB
Epoch: [2/5][215/438] Loss: 0.4515(0.4784) G

Epoch: [2/5][276/438] Loss: 0.4879(0.4770) Grad: 1.2165  LR: 0.000075  Elapsed: 8m 13s (remain 4m 46s) Max mem: 2963 MB
Epoch: [2/5][277/438] Loss: 0.4589(0.4770) Grad: 0.9004  LR: 0.000075  Elapsed: 8m 15s (remain 4m 45s) Max mem: 2963 MB
Epoch: [2/5][278/438] Loss: 0.4524(0.4769) Grad: 0.8860  LR: 0.000075  Elapsed: 8m 17s (remain 4m 43s) Max mem: 2963 MB
Epoch: [2/5][279/438] Loss: 0.4609(0.4768) Grad: 1.2399  LR: 0.000075  Elapsed: 8m 18s (remain 4m 41s) Max mem: 2963 MB
Epoch: [2/5][280/438] Loss: 0.4910(0.4769) Grad: 0.8465  LR: 0.000075  Elapsed: 8m 20s (remain 4m 39s) Max mem: 2963 MB
Epoch: [2/5][281/438] Loss: 0.4707(0.4769) Grad: 0.7964  LR: 0.000075  Elapsed: 8m 22s (remain 4m 37s) Max mem: 2963 MB
Epoch: [2/5][282/438] Loss: 0.4725(0.4768) Grad: 0.8902  LR: 0.000075  Elapsed: 8m 24s (remain 4m 36s) Max mem: 2963 MB
Epoch: [2/5][283/438] Loss: 0.4894(0.4769) Grad: 0.9438  LR: 0.000075  Elapsed: 8m 26s (remain 4m 34s) Max mem: 2963 MB
Epoch: [2/5][284/438] Loss: 0.4486(0.476

Epoch: [2/5][345/438] Loss: 0.4630(0.4746) Grad: 1.0004  LR: 0.000075  Elapsed: 10m 24s (remain 2m 45s) Max mem: 2963 MB
Epoch: [2/5][346/438] Loss: 0.4623(0.4745) Grad: 1.1305  LR: 0.000075  Elapsed: 10m 26s (remain 2m 44s) Max mem: 2963 MB
Epoch: [2/5][347/438] Loss: 0.4690(0.4745) Grad: 0.8852  LR: 0.000075  Elapsed: 10m 28s (remain 2m 42s) Max mem: 2963 MB
Epoch: [2/5][348/438] Loss: 0.4827(0.4745) Grad: 0.8202  LR: 0.000075  Elapsed: 10m 30s (remain 2m 40s) Max mem: 2963 MB
Epoch: [2/5][349/438] Loss: 0.4709(0.4745) Grad: 1.0223  LR: 0.000075  Elapsed: 10m 32s (remain 2m 38s) Max mem: 2963 MB
Epoch: [2/5][350/438] Loss: 0.4302(0.4744) Grad: 0.8957  LR: 0.000075  Elapsed: 10m 34s (remain 2m 37s) Max mem: 2963 MB
Epoch: [2/5][351/438] Loss: 0.4459(0.4743) Grad: 1.1821  LR: 0.000075  Elapsed: 10m 36s (remain 2m 35s) Max mem: 2963 MB
Epoch: [2/5][352/438] Loss: 0.4368(0.4742) Grad: 1.2060  LR: 0.000075  Elapsed: 10m 38s (remain 2m 33s) Max mem: 2963 MB
Epoch: [2/5][353/438] Loss: 0.46

Epoch: [2/5][413/438] Loss: 0.4625(0.4723) Grad: 1.2055  LR: 0.000075  Elapsed: 12m 39s (remain 0m 44s) Max mem: 2963 MB
Epoch: [2/5][414/438] Loss: 0.4687(0.4723) Grad: 0.9324  LR: 0.000075  Elapsed: 12m 40s (remain 0m 42s) Max mem: 2963 MB
Epoch: [2/5][415/438] Loss: 0.4809(0.4723) Grad: 1.0492  LR: 0.000075  Elapsed: 12m 42s (remain 0m 40s) Max mem: 2963 MB
Epoch: [2/5][416/438] Loss: 0.4561(0.4722) Grad: 0.8115  LR: 0.000075  Elapsed: 12m 44s (remain 0m 38s) Max mem: 2963 MB
Epoch: [2/5][417/438] Loss: 0.4660(0.4722) Grad: 0.9522  LR: 0.000075  Elapsed: 12m 46s (remain 0m 36s) Max mem: 2963 MB
Epoch: [2/5][418/438] Loss: 0.4820(0.4723) Grad: 1.2628  LR: 0.000075  Elapsed: 12m 48s (remain 0m 34s) Max mem: 2963 MB
Epoch: [2/5][419/438] Loss: 0.4516(0.4722) Grad: 0.7566  LR: 0.000075  Elapsed: 12m 50s (remain 0m 33s) Max mem: 2963 MB
Epoch: [2/5][420/438] Loss: 0.4649(0.4722) Grad: 1.0033  LR: 0.000075  Elapsed: 12m 51s (remain 0m 31s) Max mem: 2963 MB
Epoch: [2/5][421/438] Loss: 0.45

Epoch 2 - avg_train_loss: 0.4717  avg_val_loss: 0.4551  time: 925s
Epoch 2 - Score: 0.8516
Epoch 2 - Save Best Score: 0.8516 Model
Epoch 2 - Save Best Loss: 0.4551 Model


EVAL: [54/55] Data 0.010 (0.025) Elapsed 2m 4s (remain 0m 0s) Loss: 0.4378(0.4551) 







Epoch: [3/5][0/438] Loss: 0.4405(0.4405) Grad: 0.9487  LR: 0.000025  Elapsed: 0m 3s (remain 22m 5s) Max mem: 2963 MB
Epoch: [3/5][1/438] Loss: 0.4624(0.4514) Grad: 0.9586  LR: 0.000025  Elapsed: 0m 5s (remain 18m 34s) Max mem: 2963 MB
Epoch: [3/5][2/438] Loss: 0.4362(0.4464) Grad: 0.8248  LR: 0.000025  Elapsed: 0m 6s (remain 16m 34s) Max mem: 2963 MB
Epoch: [3/5][3/438] Loss: 0.4648(0.4510) Grad: 1.1453  LR: 0.000025  Elapsed: 0m 8s (remain 15m 38s) Max mem: 2963 MB
Epoch: [3/5][4/438] Loss: 0.4468(0.4501) Grad: 0.7681  LR: 0.000025  Elapsed: 0m 10s (remain 15m 6s) Max mem: 2963 MB
Epoch: [3/5][5/438] Loss: 0.4838(0.4558) Grad: 0.8990  LR: 0.000025  Elapsed: 0m 12s (remain 14m 39s) Max mem: 2963 MB
Epoch: [3/5][6/438] Loss: 0.4475(0.4546) Grad: 0.8762  LR: 0.000025  Elapsed: 0m 13s (remain 14m 14s) Max mem: 2963 MB
Epoch: [3/5][7/438] Loss: 0.4441(0.4533) Grad: 0.9080  LR: 0.000025  Elapsed: 0m 15s (remain 13m 54s) Max mem: 2963 MB
Epoch: [3/5][8/438] Loss: 0.4532(0.4533) Grad: 1.0862 

Epoch: [3/5][69/438] Loss: 0.4771(0.4573) Grad: 0.7573  LR: 0.000025  Elapsed: 1m 54s (remain 10m 3s) Max mem: 2963 MB
Epoch: [3/5][70/438] Loss: 0.4489(0.4572) Grad: 0.7545  LR: 0.000025  Elapsed: 1m 56s (remain 10m 1s) Max mem: 2963 MB
Epoch: [3/5][71/438] Loss: 0.4397(0.4569) Grad: 1.0309  LR: 0.000025  Elapsed: 1m 57s (remain 9m 59s) Max mem: 2963 MB
Epoch: [3/5][72/438] Loss: 0.4529(0.4569) Grad: 1.2093  LR: 0.000025  Elapsed: 1m 59s (remain 9m 57s) Max mem: 2963 MB
Epoch: [3/5][73/438] Loss: 0.4374(0.4566) Grad: 0.7358  LR: 0.000025  Elapsed: 2m 1s (remain 9m 55s) Max mem: 2963 MB
Epoch: [3/5][74/438] Loss: 0.4440(0.4564) Grad: 0.8450  LR: 0.000025  Elapsed: 2m 2s (remain 9m 53s) Max mem: 2963 MB
Epoch: [3/5][75/438] Loss: 0.4716(0.4566) Grad: 0.8538  LR: 0.000025  Elapsed: 2m 4s (remain 9m 51s) Max mem: 2963 MB
Epoch: [3/5][76/438] Loss: 0.4603(0.4567) Grad: 0.7675  LR: 0.000025  Elapsed: 2m 5s (remain 9m 49s) Max mem: 2963 MB
Epoch: [3/5][77/438] Loss: 0.4606(0.4567) Grad: 0.91

Epoch: [3/5][138/438] Loss: 0.4203(0.4539) Grad: 0.9271  LR: 0.000025  Elapsed: 3m 43s (remain 7m 59s) Max mem: 2963 MB
Epoch: [3/5][139/438] Loss: 0.4655(0.4540) Grad: 0.8040  LR: 0.000025  Elapsed: 3m 44s (remain 7m 58s) Max mem: 2963 MB
Epoch: [3/5][140/438] Loss: 0.4666(0.4541) Grad: 0.8758  LR: 0.000025  Elapsed: 3m 46s (remain 7m 56s) Max mem: 2963 MB
Epoch: [3/5][141/438] Loss: 0.4648(0.4542) Grad: 0.9874  LR: 0.000025  Elapsed: 3m 47s (remain 7m 54s) Max mem: 2963 MB
Epoch: [3/5][142/438] Loss: 0.4198(0.4539) Grad: 0.8746  LR: 0.000025  Elapsed: 3m 49s (remain 7m 53s) Max mem: 2963 MB
Epoch: [3/5][143/438] Loss: 0.4320(0.4538) Grad: 0.8690  LR: 0.000025  Elapsed: 3m 50s (remain 7m 51s) Max mem: 2963 MB
Epoch: [3/5][144/438] Loss: 0.4465(0.4537) Grad: 0.9426  LR: 0.000025  Elapsed: 3m 52s (remain 7m 49s) Max mem: 2963 MB
Epoch: [3/5][145/438] Loss: 0.4291(0.4535) Grad: 0.9874  LR: 0.000025  Elapsed: 3m 53s (remain 7m 47s) Max mem: 2963 MB
Epoch: [3/5][146/438] Loss: 0.4458(0.453

Epoch: [3/5][207/438] Loss: 0.4775(0.4539) Grad: 0.9362  LR: 0.000025  Elapsed: 5m 32s (remain 6m 7s) Max mem: 2963 MB
Epoch: [3/5][208/438] Loss: 0.4654(0.4539) Grad: 1.1422  LR: 0.000025  Elapsed: 5m 33s (remain 6m 5s) Max mem: 2963 MB
Epoch: [3/5][209/438] Loss: 0.4553(0.4539) Grad: 0.8192  LR: 0.000025  Elapsed: 5m 35s (remain 6m 4s) Max mem: 2963 MB
Epoch: [3/5][210/438] Loss: 0.4802(0.4541) Grad: 1.1071  LR: 0.000025  Elapsed: 5m 37s (remain 6m 2s) Max mem: 2963 MB
Epoch: [3/5][211/438] Loss: 0.4168(0.4539) Grad: 0.8026  LR: 0.000025  Elapsed: 5m 38s (remain 6m 1s) Max mem: 2963 MB
Epoch: [3/5][212/438] Loss: 0.4542(0.4539) Grad: 0.9865  LR: 0.000025  Elapsed: 5m 40s (remain 5m 59s) Max mem: 2963 MB
Epoch: [3/5][213/438] Loss: 0.4698(0.4540) Grad: 0.9831  LR: 0.000025  Elapsed: 5m 41s (remain 5m 57s) Max mem: 2963 MB
Epoch: [3/5][214/438] Loss: 0.4453(0.4539) Grad: 0.9009  LR: 0.000025  Elapsed: 5m 43s (remain 5m 56s) Max mem: 2963 MB
Epoch: [3/5][215/438] Loss: 0.4642(0.4540) Gr

Epoch: [3/5][276/438] Loss: 0.4534(0.4540) Grad: 1.0271  LR: 0.000025  Elapsed: 7m 20s (remain 4m 16s) Max mem: 2963 MB
Epoch: [3/5][277/438] Loss: 0.4495(0.4540) Grad: 1.0883  LR: 0.000025  Elapsed: 7m 22s (remain 4m 14s) Max mem: 2963 MB
Epoch: [3/5][278/438] Loss: 0.4322(0.4539) Grad: 0.8412  LR: 0.000025  Elapsed: 7m 23s (remain 4m 12s) Max mem: 2963 MB
Epoch: [3/5][279/438] Loss: 0.4391(0.4539) Grad: 0.9000  LR: 0.000025  Elapsed: 7m 25s (remain 4m 11s) Max mem: 2963 MB
Epoch: [3/5][280/438] Loss: 0.4458(0.4538) Grad: 0.8667  LR: 0.000025  Elapsed: 7m 26s (remain 4m 9s) Max mem: 2963 MB
Epoch: [3/5][281/438] Loss: 0.4372(0.4538) Grad: 0.8797  LR: 0.000025  Elapsed: 7m 28s (remain 4m 8s) Max mem: 2963 MB
Epoch: [3/5][282/438] Loss: 0.4700(0.4538) Grad: 0.8862  LR: 0.000025  Elapsed: 7m 30s (remain 4m 6s) Max mem: 2963 MB
Epoch: [3/5][283/438] Loss: 0.4485(0.4538) Grad: 0.8641  LR: 0.000025  Elapsed: 7m 31s (remain 4m 4s) Max mem: 2963 MB
Epoch: [3/5][284/438] Loss: 0.4677(0.4539) G

Epoch: [3/5][345/438] Loss: 0.4485(0.4529) Grad: 1.0861  LR: 0.000025  Elapsed: 9m 8s (remain 2m 25s) Max mem: 2963 MB
Epoch: [3/5][346/438] Loss: 0.4494(0.4529) Grad: 1.1104  LR: 0.000025  Elapsed: 9m 10s (remain 2m 24s) Max mem: 2963 MB
Epoch: [3/5][347/438] Loss: 0.4654(0.4529) Grad: 0.7995  LR: 0.000025  Elapsed: 9m 11s (remain 2m 22s) Max mem: 2963 MB
Epoch: [3/5][348/438] Loss: 0.4690(0.4530) Grad: 1.0376  LR: 0.000025  Elapsed: 9m 13s (remain 2m 21s) Max mem: 2963 MB
Epoch: [3/5][349/438] Loss: 0.4201(0.4529) Grad: 0.8984  LR: 0.000025  Elapsed: 9m 15s (remain 2m 19s) Max mem: 2963 MB
Epoch: [3/5][350/438] Loss: 0.4339(0.4528) Grad: 0.9174  LR: 0.000025  Elapsed: 9m 16s (remain 2m 17s) Max mem: 2963 MB
Epoch: [3/5][351/438] Loss: 0.4397(0.4528) Grad: 1.0811  LR: 0.000025  Elapsed: 9m 18s (remain 2m 16s) Max mem: 2963 MB
Epoch: [3/5][352/438] Loss: 0.4207(0.4527) Grad: 0.8378  LR: 0.000025  Elapsed: 9m 19s (remain 2m 14s) Max mem: 2963 MB
Epoch: [3/5][353/438] Loss: 0.4494(0.4527

Epoch: [3/5][414/438] Loss: 0.4547(0.4517) Grad: 1.1622  LR: 0.000025  Elapsed: 10m 57s (remain 0m 36s) Max mem: 2963 MB
Epoch: [3/5][415/438] Loss: 0.4564(0.4517) Grad: 1.0120  LR: 0.000025  Elapsed: 10m 58s (remain 0m 34s) Max mem: 2963 MB
Epoch: [3/5][416/438] Loss: 0.4460(0.4517) Grad: 1.0734  LR: 0.000025  Elapsed: 11m 0s (remain 0m 33s) Max mem: 2963 MB
Epoch: [3/5][417/438] Loss: 0.4489(0.4517) Grad: 1.1683  LR: 0.000025  Elapsed: 11m 1s (remain 0m 31s) Max mem: 2963 MB
Epoch: [3/5][418/438] Loss: 0.4504(0.4517) Grad: 1.0777  LR: 0.000025  Elapsed: 11m 3s (remain 0m 30s) Max mem: 2963 MB
Epoch: [3/5][419/438] Loss: 0.4808(0.4518) Grad: 1.1976  LR: 0.000025  Elapsed: 11m 4s (remain 0m 28s) Max mem: 2963 MB
Epoch: [3/5][420/438] Loss: 0.4528(0.4518) Grad: 1.0789  LR: 0.000025  Elapsed: 11m 6s (remain 0m 26s) Max mem: 2963 MB
Epoch: [3/5][421/438] Loss: 0.4243(0.4517) Grad: 0.9653  LR: 0.000025  Elapsed: 11m 8s (remain 0m 25s) Max mem: 2963 MB
Epoch: [3/5][422/438] Loss: 0.4413(0.4

Epoch 3 - avg_train_loss: 0.4516  avg_val_loss: 0.4633  time: 806s
Epoch 3 - Score: 0.8539
Epoch 3 - Save Best Score: 0.8539 Model


EVAL: [54/55] Data 0.009 (0.022) Elapsed 1m 53s (remain 0m 0s) Loss: 0.4471(0.4633) 







Epoch: [4/5][0/438] Loss: 0.4489(0.4489) Grad: 1.1644  LR: 0.000000  Elapsed: 0m 2s (remain 20m 40s) Max mem: 2963 MB
Epoch: [4/5][1/438] Loss: 0.4287(0.4388) Grad: 0.9815  LR: 0.000000  Elapsed: 0m 4s (remain 16m 18s) Max mem: 2963 MB
Epoch: [4/5][2/438] Loss: 0.4519(0.4432) Grad: 0.9669  LR: 0.000000  Elapsed: 0m 6s (remain 14m 43s) Max mem: 2963 MB
Epoch: [4/5][3/438] Loss: 0.4579(0.4468) Grad: 1.1003  LR: 0.000000  Elapsed: 0m 7s (remain 13m 56s) Max mem: 2963 MB
Epoch: [4/5][4/438] Loss: 0.4363(0.4447) Grad: 0.9555  LR: 0.000000  Elapsed: 0m 9s (remain 13m 29s) Max mem: 2963 MB
Epoch: [4/5][5/438] Loss: 0.4536(0.4462) Grad: 1.2231  LR: 0.000000  Elapsed: 0m 10s (remain 13m 8s) Max mem: 2963 MB
Epoch: [4/5][6/438] Loss: 0.4578(0.4479) Grad: 0.9345  LR: 0.000000  Elapsed: 0m 12s (remain 12m 53s) Max mem: 2963 MB
Epoch: [4/5][7/438] Loss: 0.4390(0.4467) Grad: 1.0490  LR: 0.000000  Elapsed: 0m 14s (remain 12m 42s) Max mem: 2963 MB
Epoch: [4/5][8/438] Loss: 0.4260(0.4444) Grad: 0.9900 

Epoch: [4/5][69/438] Loss: 0.4857(0.4495) Grad: 1.1298  LR: 0.000000  Elapsed: 1m 51s (remain 9m 46s) Max mem: 2963 MB
Epoch: [4/5][70/438] Loss: 0.4476(0.4495) Grad: 0.8850  LR: 0.000000  Elapsed: 1m 53s (remain 9m 45s) Max mem: 2963 MB
Epoch: [4/5][71/438] Loss: 0.4366(0.4493) Grad: 1.0579  LR: 0.000000  Elapsed: 1m 54s (remain 9m 43s) Max mem: 2963 MB
Epoch: [4/5][72/438] Loss: 0.4667(0.4495) Grad: 0.9251  LR: 0.000000  Elapsed: 1m 56s (remain 9m 41s) Max mem: 2963 MB
Epoch: [4/5][73/438] Loss: 0.4418(0.4494) Grad: 1.0075  LR: 0.000000  Elapsed: 1m 57s (remain 9m 40s) Max mem: 2963 MB
Epoch: [4/5][74/438] Loss: 0.4601(0.4496) Grad: 1.0883  LR: 0.000000  Elapsed: 1m 59s (remain 9m 38s) Max mem: 2963 MB
Epoch: [4/5][75/438] Loss: 0.4389(0.4494) Grad: 1.2113  LR: 0.000000  Elapsed: 2m 1s (remain 9m 36s) Max mem: 2963 MB
Epoch: [4/5][76/438] Loss: 0.4548(0.4495) Grad: 0.9651  LR: 0.000000  Elapsed: 2m 2s (remain 9m 34s) Max mem: 2963 MB
Epoch: [4/5][77/438] Loss: 0.4316(0.4493) Grad: 1.

Epoch: [4/5][138/438] Loss: 0.4708(0.4478) Grad: 0.9966  LR: 0.000000  Elapsed: 3m 39s (remain 7m 51s) Max mem: 2963 MB
Epoch: [4/5][139/438] Loss: 0.4351(0.4477) Grad: 0.9935  LR: 0.000000  Elapsed: 3m 40s (remain 7m 49s) Max mem: 2963 MB
Epoch: [4/5][140/438] Loss: 0.4592(0.4478) Grad: 0.9808  LR: 0.000000  Elapsed: 3m 42s (remain 7m 48s) Max mem: 2963 MB
Epoch: [4/5][141/438] Loss: 0.4227(0.4476) Grad: 0.9593  LR: 0.000000  Elapsed: 3m 43s (remain 7m 46s) Max mem: 2963 MB
Epoch: [4/5][142/438] Loss: 0.4343(0.4475) Grad: 1.0428  LR: 0.000000  Elapsed: 3m 45s (remain 7m 44s) Max mem: 2963 MB
Epoch: [4/5][143/438] Loss: 0.4442(0.4475) Grad: 1.1161  LR: 0.000000  Elapsed: 3m 46s (remain 7m 43s) Max mem: 2963 MB
Epoch: [4/5][144/438] Loss: 0.4431(0.4474) Grad: 1.0931  LR: 0.000000  Elapsed: 3m 48s (remain 7m 41s) Max mem: 2963 MB
Epoch: [4/5][145/438] Loss: 0.4234(0.4473) Grad: 0.9349  LR: 0.000000  Elapsed: 3m 49s (remain 7m 39s) Max mem: 2963 MB
Epoch: [4/5][146/438] Loss: 0.4409(0.447

Epoch: [4/5][207/438] Loss: 0.4684(0.4480) Grad: 1.3639  LR: 0.000000  Elapsed: 5m 26s (remain 6m 0s) Max mem: 2963 MB
Epoch: [4/5][208/438] Loss: 0.4701(0.4481) Grad: 1.2085  LR: 0.000000  Elapsed: 5m 27s (remain 5m 59s) Max mem: 2963 MB
Epoch: [4/5][209/438] Loss: 0.4515(0.4481) Grad: 1.3193  LR: 0.000000  Elapsed: 5m 29s (remain 5m 57s) Max mem: 2963 MB
Epoch: [4/5][210/438] Loss: 0.4389(0.4480) Grad: 0.9694  LR: 0.000000  Elapsed: 5m 30s (remain 5m 55s) Max mem: 2963 MB
Epoch: [4/5][211/438] Loss: 0.4751(0.4482) Grad: 1.0670  LR: 0.000000  Elapsed: 5m 32s (remain 5m 54s) Max mem: 2963 MB
Epoch: [4/5][212/438] Loss: 0.4511(0.4482) Grad: 0.8766  LR: 0.000000  Elapsed: 5m 34s (remain 5m 52s) Max mem: 2963 MB
Epoch: [4/5][213/438] Loss: 0.4371(0.4481) Grad: 1.1622  LR: 0.000000  Elapsed: 5m 35s (remain 5m 51s) Max mem: 2963 MB
Epoch: [4/5][214/438] Loss: 0.4432(0.4481) Grad: 0.9960  LR: 0.000000  Elapsed: 5m 37s (remain 5m 49s) Max mem: 2963 MB
Epoch: [4/5][215/438] Loss: 0.4584(0.4482

Epoch: [4/5][276/438] Loss: 0.4597(0.4482) Grad: 1.0966  LR: 0.000000  Elapsed: 7m 13s (remain 4m 11s) Max mem: 2963 MB
Epoch: [4/5][277/438] Loss: 0.4611(0.4483) Grad: 1.1137  LR: 0.000000  Elapsed: 7m 15s (remain 4m 10s) Max mem: 2963 MB
Epoch: [4/5][278/438] Loss: 0.4529(0.4483) Grad: 1.1502  LR: 0.000000  Elapsed: 7m 16s (remain 4m 8s) Max mem: 2963 MB
Epoch: [4/5][279/438] Loss: 0.4414(0.4483) Grad: 1.0508  LR: 0.000000  Elapsed: 7m 18s (remain 4m 7s) Max mem: 2963 MB
Epoch: [4/5][280/438] Loss: 0.4456(0.4483) Grad: 0.9769  LR: 0.000000  Elapsed: 7m 19s (remain 4m 5s) Max mem: 2963 MB
Epoch: [4/5][281/438] Loss: 0.4576(0.4483) Grad: 1.1070  LR: 0.000000  Elapsed: 7m 21s (remain 4m 4s) Max mem: 2963 MB
Epoch: [4/5][282/438] Loss: 0.4572(0.4483) Grad: 0.9331  LR: 0.000000  Elapsed: 7m 22s (remain 4m 2s) Max mem: 2963 MB
Epoch: [4/5][283/438] Loss: 0.4286(0.4483) Grad: 1.0170  LR: 0.000000  Elapsed: 7m 24s (remain 4m 0s) Max mem: 2963 MB
Epoch: [4/5][284/438] Loss: 0.4452(0.4483) Gra

Epoch: [4/5][345/438] Loss: 0.4419(0.4475) Grad: 1.0535  LR: 0.000000  Elapsed: 9m 0s (remain 2m 23s) Max mem: 2963 MB
Epoch: [4/5][346/438] Loss: 0.4425(0.4475) Grad: 0.9662  LR: 0.000000  Elapsed: 9m 2s (remain 2m 22s) Max mem: 2963 MB
Epoch: [4/5][347/438] Loss: 0.4476(0.4475) Grad: 1.1504  LR: 0.000000  Elapsed: 9m 3s (remain 2m 20s) Max mem: 2963 MB
Epoch: [4/5][348/438] Loss: 0.4438(0.4475) Grad: 0.8520  LR: 0.000000  Elapsed: 9m 5s (remain 2m 19s) Max mem: 2963 MB
Epoch: [4/5][349/438] Loss: 0.4177(0.4474) Grad: 0.9893  LR: 0.000000  Elapsed: 9m 6s (remain 2m 17s) Max mem: 2963 MB
Epoch: [4/5][350/438] Loss: 0.4365(0.4474) Grad: 1.1383  LR: 0.000000  Elapsed: 9m 8s (remain 2m 15s) Max mem: 2963 MB
Epoch: [4/5][351/438] Loss: 0.4263(0.4473) Grad: 1.1651  LR: 0.000000  Elapsed: 9m 10s (remain 2m 14s) Max mem: 2963 MB
Epoch: [4/5][352/438] Loss: 0.4301(0.4473) Grad: 0.9359  LR: 0.000000  Elapsed: 9m 11s (remain 2m 12s) Max mem: 2963 MB
Epoch: [4/5][353/438] Loss: 0.4296(0.4472) Gra

Epoch: [4/5][414/438] Loss: 0.4110(0.4457) Grad: 1.0358  LR: 0.000000  Elapsed: 10m 47s (remain 0m 35s) Max mem: 2963 MB
Epoch: [4/5][415/438] Loss: 0.4428(0.4457) Grad: 0.9893  LR: 0.000000  Elapsed: 10m 49s (remain 0m 34s) Max mem: 2963 MB
Epoch: [4/5][416/438] Loss: 0.4498(0.4457) Grad: 1.0873  LR: 0.000000  Elapsed: 10m 50s (remain 0m 32s) Max mem: 2963 MB
Epoch: [4/5][417/438] Loss: 0.4404(0.4457) Grad: 1.1947  LR: 0.000000  Elapsed: 10m 52s (remain 0m 31s) Max mem: 2963 MB
Epoch: [4/5][418/438] Loss: 0.4268(0.4457) Grad: 0.9365  LR: 0.000000  Elapsed: 10m 54s (remain 0m 29s) Max mem: 2963 MB
Epoch: [4/5][419/438] Loss: 0.4482(0.4457) Grad: 0.9670  LR: 0.000000  Elapsed: 10m 55s (remain 0m 28s) Max mem: 2963 MB
Epoch: [4/5][420/438] Loss: 0.4257(0.4456) Grad: 0.9176  LR: 0.000000  Elapsed: 10m 57s (remain 0m 26s) Max mem: 2963 MB
Epoch: [4/5][421/438] Loss: 0.4353(0.4456) Grad: 0.9311  LR: 0.000000  Elapsed: 10m 58s (remain 0m 24s) Max mem: 2963 MB
Epoch: [4/5][422/438] Loss: 0.42

Epoch 4 - avg_train_loss: 0.4453  avg_val_loss: 0.4573  time: 795s
Epoch 4 - Score: 0.8543
Epoch 4 - Save Best Score: 0.8543 Model


EVAL: [54/55] Data 0.008 (0.022) Elapsed 1m 52s (remain 0m 0s) Loss: 0.4416(0.4573) 







Epoch: [5/5][0/438] Loss: 0.4248(0.4248) Grad: 1.0613  LR: 0.000025  Elapsed: 0m 2s (remain 21m 4s) Max mem: 2963 MB
Epoch: [5/5][1/438] Loss: 0.4753(0.4501) Grad: 0.9556  LR: 0.000025  Elapsed: 0m 4s (remain 16m 33s) Max mem: 2963 MB
Epoch: [5/5][2/438] Loss: 0.4372(0.4458) Grad: 0.8781  LR: 0.000025  Elapsed: 0m 6s (remain 14m 55s) Max mem: 2963 MB
Epoch: [5/5][3/438] Loss: 0.4577(0.4488) Grad: 1.3172  LR: 0.000025  Elapsed: 0m 7s (remain 14m 7s) Max mem: 2963 MB
Epoch: [5/5][4/438] Loss: 0.4648(0.4520) Grad: 1.1078  LR: 0.000025  Elapsed: 0m 9s (remain 13m 38s) Max mem: 2963 MB
Epoch: [5/5][5/438] Loss: 0.4421(0.4503) Grad: 1.0605  LR: 0.000025  Elapsed: 0m 11s (remain 13m 16s) Max mem: 2963 MB
Epoch: [5/5][6/438] Loss: 0.4475(0.4499) Grad: 0.8422  LR: 0.000025  Elapsed: 0m 12s (remain 13m 0s) Max mem: 2963 MB
Epoch: [5/5][7/438] Loss: 0.4382(0.4485) Grad: 0.9998  LR: 0.000025  Elapsed: 0m 14s (remain 12m 49s) Max mem: 2963 MB
Epoch: [5/5][8/438] Loss: 0.4271(0.4461) Grad: 1.0559  L

Epoch: [5/5][69/438] Loss: 0.4432(0.4483) Grad: 0.9555  LR: 0.000025  Elapsed: 1m 52s (remain 9m 49s) Max mem: 2963 MB
Epoch: [5/5][70/438] Loss: 0.4479(0.4483) Grad: 1.1121  LR: 0.000025  Elapsed: 1m 53s (remain 9m 47s) Max mem: 2963 MB
Epoch: [5/5][71/438] Loss: 0.4609(0.4485) Grad: 0.8977  LR: 0.000025  Elapsed: 1m 55s (remain 9m 45s) Max mem: 2963 MB
Epoch: [5/5][72/438] Loss: 0.4490(0.4485) Grad: 0.9989  LR: 0.000025  Elapsed: 1m 56s (remain 9m 43s) Max mem: 2963 MB
Epoch: [5/5][73/438] Loss: 0.4083(0.4480) Grad: 1.0559  LR: 0.000025  Elapsed: 1m 58s (remain 9m 42s) Max mem: 2963 MB
Epoch: [5/5][74/438] Loss: 0.4495(0.4480) Grad: 1.1593  LR: 0.000025  Elapsed: 1m 59s (remain 9m 40s) Max mem: 2963 MB
Epoch: [5/5][75/438] Loss: 0.4410(0.4479) Grad: 0.9731  LR: 0.000025  Elapsed: 2m 1s (remain 9m 38s) Max mem: 2963 MB
Epoch: [5/5][76/438] Loss: 0.4738(0.4482) Grad: 0.9418  LR: 0.000025  Elapsed: 2m 3s (remain 9m 37s) Max mem: 2963 MB
Epoch: [5/5][77/438] Loss: 0.4396(0.4481) Grad: 1.

Epoch: [5/5][138/438] Loss: 0.4407(0.4468) Grad: 1.0363  LR: 0.000025  Elapsed: 3m 40s (remain 7m 53s) Max mem: 2963 MB
Epoch: [5/5][139/438] Loss: 0.4566(0.4469) Grad: 1.0825  LR: 0.000025  Elapsed: 3m 41s (remain 7m 51s) Max mem: 2963 MB
Epoch: [5/5][140/438] Loss: 0.4373(0.4468) Grad: 0.9350  LR: 0.000025  Elapsed: 3m 43s (remain 7m 50s) Max mem: 2963 MB
Epoch: [5/5][141/438] Loss: 0.4335(0.4467) Grad: 0.9750  LR: 0.000025  Elapsed: 3m 44s (remain 7m 48s) Max mem: 2963 MB
Epoch: [5/5][142/438] Loss: 0.4367(0.4466) Grad: 1.0643  LR: 0.000025  Elapsed: 3m 46s (remain 7m 47s) Max mem: 2963 MB
Epoch: [5/5][143/438] Loss: 0.4475(0.4466) Grad: 1.0923  LR: 0.000025  Elapsed: 3m 47s (remain 7m 45s) Max mem: 2963 MB
Epoch: [5/5][144/438] Loss: 0.4530(0.4467) Grad: 0.9447  LR: 0.000025  Elapsed: 3m 49s (remain 7m 43s) Max mem: 2963 MB
Epoch: [5/5][145/438] Loss: 0.4227(0.4465) Grad: 1.0147  LR: 0.000025  Elapsed: 3m 51s (remain 7m 42s) Max mem: 2963 MB
Epoch: [5/5][146/438] Loss: 0.4334(0.446

Epoch: [5/5][207/438] Loss: 0.4633(0.4466) Grad: 0.9948  LR: 0.000025  Elapsed: 5m 27s (remain 6m 2s) Max mem: 2963 MB
Epoch: [5/5][208/438] Loss: 0.4466(0.4466) Grad: 1.2406  LR: 0.000025  Elapsed: 5m 29s (remain 6m 1s) Max mem: 2963 MB
Epoch: [5/5][209/438] Loss: 0.4566(0.4467) Grad: 0.9162  LR: 0.000025  Elapsed: 5m 31s (remain 5m 59s) Max mem: 2963 MB
Epoch: [5/5][210/438] Loss: 0.4728(0.4468) Grad: 1.1208  LR: 0.000025  Elapsed: 5m 32s (remain 5m 57s) Max mem: 2963 MB
Epoch: [5/5][211/438] Loss: 0.4445(0.4468) Grad: 1.2375  LR: 0.000025  Elapsed: 5m 34s (remain 5m 56s) Max mem: 2963 MB
Epoch: [5/5][212/438] Loss: 0.4389(0.4467) Grad: 0.9757  LR: 0.000025  Elapsed: 5m 35s (remain 5m 54s) Max mem: 2963 MB
Epoch: [5/5][213/438] Loss: 0.4214(0.4466) Grad: 0.8954  LR: 0.000025  Elapsed: 5m 37s (remain 5m 53s) Max mem: 2963 MB
Epoch: [5/5][214/438] Loss: 0.4302(0.4466) Grad: 1.1176  LR: 0.000025  Elapsed: 5m 38s (remain 5m 51s) Max mem: 2963 MB
Epoch: [5/5][215/438] Loss: 0.4625(0.4466)

Epoch: [5/5][276/438] Loss: 0.4712(0.4466) Grad: 1.1422  LR: 0.000025  Elapsed: 7m 23s (remain 4m 17s) Max mem: 2963 MB
Epoch: [5/5][277/438] Loss: 0.4470(0.4466) Grad: 1.0521  LR: 0.000025  Elapsed: 7m 24s (remain 4m 16s) Max mem: 2963 MB
Epoch: [5/5][278/438] Loss: 0.4687(0.4467) Grad: 1.2432  LR: 0.000025  Elapsed: 7m 26s (remain 4m 14s) Max mem: 2963 MB
Epoch: [5/5][279/438] Loss: 0.4235(0.4466) Grad: 1.0152  LR: 0.000025  Elapsed: 7m 28s (remain 4m 12s) Max mem: 2963 MB
Epoch: [5/5][280/438] Loss: 0.4477(0.4466) Grad: 0.9940  LR: 0.000025  Elapsed: 7m 29s (remain 4m 11s) Max mem: 2963 MB
Epoch: [5/5][281/438] Loss: 0.4485(0.4466) Grad: 1.1064  LR: 0.000025  Elapsed: 7m 31s (remain 4m 9s) Max mem: 2963 MB
Epoch: [5/5][282/438] Loss: 0.4371(0.4466) Grad: 0.9216  LR: 0.000025  Elapsed: 7m 33s (remain 4m 8s) Max mem: 2963 MB
Epoch: [5/5][283/438] Loss: 0.4485(0.4466) Grad: 1.1014  LR: 0.000025  Elapsed: 7m 35s (remain 4m 6s) Max mem: 2963 MB
Epoch: [5/5][284/438] Loss: 0.4422(0.4466) 

Epoch: [5/5][345/438] Loss: 0.4899(0.4457) Grad: 1.2995  LR: 0.000025  Elapsed: 9m 18s (remain 2m 28s) Max mem: 2963 MB
Epoch: [5/5][346/438] Loss: 0.4269(0.4456) Grad: 1.0965  LR: 0.000025  Elapsed: 9m 19s (remain 2m 26s) Max mem: 2963 MB
Epoch: [5/5][347/438] Loss: 0.4360(0.4456) Grad: 1.0336  LR: 0.000025  Elapsed: 9m 21s (remain 2m 25s) Max mem: 2963 MB
Epoch: [5/5][348/438] Loss: 0.4573(0.4456) Grad: 1.0091  LR: 0.000025  Elapsed: 9m 23s (remain 2m 23s) Max mem: 2963 MB
Epoch: [5/5][349/438] Loss: 0.4340(0.4456) Grad: 1.0158  LR: 0.000025  Elapsed: 9m 24s (remain 2m 22s) Max mem: 2963 MB
Epoch: [5/5][350/438] Loss: 0.4505(0.4456) Grad: 1.3821  LR: 0.000025  Elapsed: 9m 26s (remain 2m 20s) Max mem: 2963 MB
Epoch: [5/5][351/438] Loss: 0.4318(0.4456) Grad: 1.0938  LR: 0.000025  Elapsed: 9m 28s (remain 2m 18s) Max mem: 2963 MB
Epoch: [5/5][352/438] Loss: 0.4210(0.4455) Grad: 0.9762  LR: 0.000025  Elapsed: 9m 29s (remain 2m 17s) Max mem: 2963 MB
Epoch: [5/5][353/438] Loss: 0.4283(0.445

Epoch: [5/5][414/438] Loss: 0.4145(0.4446) Grad: 1.0246  LR: 0.000025  Elapsed: 11m 16s (remain 0m 37s) Max mem: 2963 MB
Epoch: [5/5][415/438] Loss: 0.4077(0.4445) Grad: 0.9949  LR: 0.000025  Elapsed: 11m 18s (remain 0m 35s) Max mem: 2963 MB
Epoch: [5/5][416/438] Loss: 0.4335(0.4444) Grad: 1.1779  LR: 0.000025  Elapsed: 11m 20s (remain 0m 34s) Max mem: 2963 MB
Epoch: [5/5][417/438] Loss: 0.4562(0.4445) Grad: 1.4085  LR: 0.000025  Elapsed: 11m 21s (remain 0m 32s) Max mem: 2963 MB
Epoch: [5/5][418/438] Loss: 0.4548(0.4445) Grad: 1.1287  LR: 0.000025  Elapsed: 11m 23s (remain 0m 30s) Max mem: 2963 MB
Epoch: [5/5][419/438] Loss: 0.4436(0.4445) Grad: 1.2476  LR: 0.000025  Elapsed: 11m 25s (remain 0m 29s) Max mem: 2963 MB
Epoch: [5/5][420/438] Loss: 0.4639(0.4445) Grad: 1.0278  LR: 0.000025  Elapsed: 11m 26s (remain 0m 27s) Max mem: 2963 MB
Epoch: [5/5][421/438] Loss: 0.4352(0.4445) Grad: 1.1621  LR: 0.000025  Elapsed: 11m 28s (remain 0m 26s) Max mem: 2963 MB
Epoch: [5/5][422/438] Loss: 0.42

Epoch 5 - avg_train_loss: 0.4444  avg_val_loss: 0.4547  time: 837s
Epoch 5 - Score: 0.8538
Epoch 5 - Save Best Loss: 0.4547 Model


EVAL: [54/55] Data 0.011 (0.024) Elapsed 2m 2s (remain 0m 0s) Loss: 0.4382(0.4547) 


Score: 0.8538









Epoch: [1/5][0/438] Loss: 0.7517(0.7517) Grad: 3.2656  LR: 0.000100  Elapsed: 0m 3s (remain 21m 54s) Max mem: 2963 MB
Epoch: [1/5][1/438] Loss: 0.7248(0.7382) Grad: 3.0644  LR: 0.000100  Elapsed: 0m 4s (remain 17m 9s) Max mem: 2963 MB
Epoch: [1/5][2/438] Loss: 0.7295(0.7353) Grad: 2.9843  LR: 0.000100  Elapsed: 0m 6s (remain 15m 31s) Max mem: 2963 MB
Epoch: [1/5][3/438] Loss: 0.7313(0.7343) Grad: 2.7769  LR: 0.000100  Elapsed: 0m 8s (remain 14m 45s) Max mem: 2963 MB
Epoch: [1/5][4/438] Loss: 0.7281(0.7331) Grad: 2.6582  LR: 0.000100  Elapsed: 0m 9s (remain 14m 13s) Max mem: 2963 MB
Epoch: [1/5][5/438] Loss: 0.7240(0.7316) Grad: 2.5588  LR: 0.000100  Elapsed: 0m 11s (remain 13m 52s) Max mem: 2963 MB
Epoch: [1/5][6/438] Loss: 0.7258(0.7307) Grad: 2.5547  LR: 0.000100  Elapsed: 0m 13s (remain 13m 41s) Max mem: 2963 MB
Epoch: [1/5][7/438] Loss: 0.7293(0.7306) Grad: 2.5480  LR: 0.000100  Elapsed: 0m 15s (remain 13m 27s) Max mem: 2963 MB
Epoch: [1/5][8/438] Loss: 0.7285(0.7303) Grad: 2.4004 

Epoch: [1/5][69/438] Loss: 0.6815(0.7124) Grad: 1.4180  LR: 0.000100  Elapsed: 1m 58s (remain 10m 25s) Max mem: 2963 MB
Epoch: [1/5][70/438] Loss: 0.6879(0.7121) Grad: 1.5701  LR: 0.000100  Elapsed: 2m 0s (remain 10m 23s) Max mem: 2963 MB
Epoch: [1/5][71/438] Loss: 0.6765(0.7116) Grad: 1.3934  LR: 0.000100  Elapsed: 2m 2s (remain 10m 22s) Max mem: 2963 MB
Epoch: [1/5][72/438] Loss: 0.6566(0.7108) Grad: 1.4251  LR: 0.000100  Elapsed: 2m 4s (remain 10m 20s) Max mem: 2963 MB
Epoch: [1/5][73/438] Loss: 0.6741(0.7104) Grad: 1.3855  LR: 0.000100  Elapsed: 2m 5s (remain 10m 19s) Max mem: 2963 MB
Epoch: [1/5][74/438] Loss: 0.6563(0.7096) Grad: 1.4309  LR: 0.000100  Elapsed: 2m 7s (remain 10m 17s) Max mem: 2963 MB
Epoch: [1/5][75/438] Loss: 0.6540(0.7089) Grad: 1.4139  LR: 0.000100  Elapsed: 2m 9s (remain 10m 16s) Max mem: 2963 MB
Epoch: [1/5][76/438] Loss: 0.6612(0.7083) Grad: 1.3593  LR: 0.000100  Elapsed: 2m 11s (remain 10m 14s) Max mem: 2963 MB
Epoch: [1/5][77/438] Loss: 0.6599(0.7077) Grad

Epoch: [1/5][138/438] Loss: 0.5559(0.6643) Grad: 1.9090  LR: 0.000100  Elapsed: 3m 52s (remain 8m 20s) Max mem: 2963 MB
Epoch: [1/5][139/438] Loss: 0.5957(0.6638) Grad: 2.0856  LR: 0.000100  Elapsed: 3m 54s (remain 8m 18s) Max mem: 2963 MB
Epoch: [1/5][140/438] Loss: 0.5808(0.6632) Grad: 1.4930  LR: 0.000100  Elapsed: 3m 55s (remain 8m 17s) Max mem: 2963 MB
Epoch: [1/5][141/438] Loss: 0.5903(0.6627) Grad: 1.4598  LR: 0.000100  Elapsed: 3m 57s (remain 8m 15s) Max mem: 2963 MB
Epoch: [1/5][142/438] Loss: 0.5694(0.6621) Grad: 1.0238  LR: 0.000100  Elapsed: 3m 59s (remain 8m 13s) Max mem: 2963 MB
Epoch: [1/5][143/438] Loss: 0.5736(0.6615) Grad: 1.4024  LR: 0.000100  Elapsed: 4m 0s (remain 8m 11s) Max mem: 2963 MB
Epoch: [1/5][144/438] Loss: 0.5646(0.6608) Grad: 1.0078  LR: 0.000100  Elapsed: 4m 2s (remain 8m 10s) Max mem: 2963 MB
Epoch: [1/5][145/438] Loss: 0.5579(0.6601) Grad: 1.6889  LR: 0.000100  Elapsed: 4m 4s (remain 8m 8s) Max mem: 2963 MB
Epoch: [1/5][146/438] Loss: 0.5647(0.6594) G

Epoch: [1/5][207/438] Loss: 0.5270(0.6285) Grad: 1.5284  LR: 0.000100  Elapsed: 5m 44s (remain 6m 21s) Max mem: 2963 MB
Epoch: [1/5][208/438] Loss: 0.5291(0.6280) Grad: 1.2468  LR: 0.000100  Elapsed: 5m 46s (remain 6m 19s) Max mem: 2963 MB
Epoch: [1/5][209/438] Loss: 0.5355(0.6276) Grad: 2.1513  LR: 0.000100  Elapsed: 5m 47s (remain 6m 17s) Max mem: 2963 MB
Epoch: [1/5][210/438] Loss: 0.5350(0.6272) Grad: 1.9464  LR: 0.000100  Elapsed: 5m 49s (remain 6m 16s) Max mem: 2963 MB
Epoch: [1/5][211/438] Loss: 0.5422(0.6268) Grad: 2.1499  LR: 0.000100  Elapsed: 5m 51s (remain 6m 14s) Max mem: 2963 MB
Epoch: [1/5][212/438] Loss: 0.5351(0.6263) Grad: 1.7236  LR: 0.000100  Elapsed: 5m 52s (remain 6m 12s) Max mem: 2963 MB
Epoch: [1/5][213/438] Loss: 0.5455(0.6259) Grad: 1.2685  LR: 0.000100  Elapsed: 5m 54s (remain 6m 11s) Max mem: 2963 MB
Epoch: [1/5][214/438] Loss: 0.5428(0.6256) Grad: 1.6221  LR: 0.000100  Elapsed: 5m 56s (remain 6m 9s) Max mem: 2963 MB
Epoch: [1/5][215/438] Loss: 0.5205(0.6251

Epoch: [1/5][276/438] Loss: 0.5064(0.6045) Grad: 1.2594  LR: 0.000100  Elapsed: 7m 37s (remain 4m 26s) Max mem: 2963 MB
Epoch: [1/5][277/438] Loss: 0.5184(0.6042) Grad: 1.7967  LR: 0.000100  Elapsed: 7m 39s (remain 4m 24s) Max mem: 2963 MB
Epoch: [1/5][278/438] Loss: 0.5250(0.6039) Grad: 1.4013  LR: 0.000100  Elapsed: 7m 40s (remain 4m 22s) Max mem: 2963 MB
Epoch: [1/5][279/438] Loss: 0.5459(0.6037) Grad: 1.5121  LR: 0.000100  Elapsed: 7m 42s (remain 4m 21s) Max mem: 2963 MB
Epoch: [1/5][280/438] Loss: 0.5017(0.6033) Grad: 1.4161  LR: 0.000100  Elapsed: 7m 44s (remain 4m 19s) Max mem: 2963 MB
Epoch: [1/5][281/438] Loss: 0.5350(0.6031) Grad: 1.5171  LR: 0.000100  Elapsed: 7m 45s (remain 4m 17s) Max mem: 2963 MB
Epoch: [1/5][282/438] Loss: 0.5434(0.6029) Grad: 2.0955  LR: 0.000100  Elapsed: 7m 47s (remain 4m 16s) Max mem: 2963 MB
Epoch: [1/5][283/438] Loss: 0.5283(0.6026) Grad: 1.8791  LR: 0.000100  Elapsed: 7m 49s (remain 4m 14s) Max mem: 2963 MB
Epoch: [1/5][284/438] Loss: 0.5180(0.602

Epoch: [1/5][345/438] Loss: 0.5112(0.5868) Grad: 0.9707  LR: 0.000100  Elapsed: 9m 29s (remain 2m 31s) Max mem: 2963 MB
Epoch: [1/5][346/438] Loss: 0.5361(0.5867) Grad: 1.2688  LR: 0.000100  Elapsed: 9m 31s (remain 2m 29s) Max mem: 2963 MB
Epoch: [1/5][347/438] Loss: 0.5172(0.5865) Grad: 1.3571  LR: 0.000100  Elapsed: 9m 33s (remain 2m 28s) Max mem: 2963 MB
Epoch: [1/5][348/438] Loss: 0.5111(0.5863) Grad: 1.0691  LR: 0.000100  Elapsed: 9m 34s (remain 2m 26s) Max mem: 2963 MB
Epoch: [1/5][349/438] Loss: 0.5203(0.5861) Grad: 0.7779  LR: 0.000100  Elapsed: 9m 36s (remain 2m 24s) Max mem: 2963 MB
Epoch: [1/5][350/438] Loss: 0.5122(0.5859) Grad: 1.8094  LR: 0.000100  Elapsed: 9m 37s (remain 2m 23s) Max mem: 2963 MB
Epoch: [1/5][351/438] Loss: 0.4982(0.5856) Grad: 1.1894  LR: 0.000100  Elapsed: 9m 39s (remain 2m 21s) Max mem: 2963 MB
Epoch: [1/5][352/438] Loss: 0.5167(0.5854) Grad: 1.3222  LR: 0.000100  Elapsed: 9m 41s (remain 2m 19s) Max mem: 2963 MB
Epoch: [1/5][353/438] Loss: 0.4946(0.585

Epoch: [1/5][414/438] Loss: 0.5204(0.5736) Grad: 2.4769  LR: 0.000100  Elapsed: 11m 28s (remain 0m 38s) Max mem: 2963 MB
Epoch: [1/5][415/438] Loss: 0.4764(0.5734) Grad: 1.6635  LR: 0.000100  Elapsed: 11m 30s (remain 0m 36s) Max mem: 2963 MB
Epoch: [1/5][416/438] Loss: 0.5151(0.5732) Grad: 0.9661  LR: 0.000100  Elapsed: 11m 31s (remain 0m 34s) Max mem: 2963 MB
Epoch: [1/5][417/438] Loss: 0.4896(0.5730) Grad: 1.4641  LR: 0.000100  Elapsed: 11m 33s (remain 0m 33s) Max mem: 2963 MB
Epoch: [1/5][418/438] Loss: 0.4861(0.5728) Grad: 1.6149  LR: 0.000100  Elapsed: 11m 36s (remain 0m 31s) Max mem: 2963 MB
Epoch: [1/5][419/438] Loss: 0.5101(0.5727) Grad: 1.3055  LR: 0.000100  Elapsed: 11m 38s (remain 0m 29s) Max mem: 2963 MB
Epoch: [1/5][420/438] Loss: 0.4893(0.5725) Grad: 0.8463  LR: 0.000100  Elapsed: 11m 40s (remain 0m 28s) Max mem: 2963 MB
Epoch: [1/5][421/438] Loss: 0.4965(0.5723) Grad: 1.0710  LR: 0.000100  Elapsed: 11m 42s (remain 0m 26s) Max mem: 2963 MB
Epoch: [1/5][422/438] Loss: 0.50

Epoch 1 - avg_train_loss: 0.5694  avg_val_loss: 0.8122  time: 847s
Epoch 1 - Score: 0.8282
Epoch 1 - Save Best Score: 0.8282 Model
Epoch 1 - Save Best Loss: 0.8122 Model


EVAL: [54/55] Data 0.008 (0.024) Elapsed 1m 57s (remain 0m 0s) Loss: 0.7903(0.8122) 







Epoch: [2/5][0/438] Loss: 0.4866(0.4866) Grad: 2.2281  LR: 0.000075  Elapsed: 0m 2s (remain 21m 37s) Max mem: 2963 MB
Epoch: [2/5][1/438] Loss: 0.5029(0.4947) Grad: 1.8538  LR: 0.000075  Elapsed: 0m 4s (remain 16m 54s) Max mem: 2963 MB
Epoch: [2/5][2/438] Loss: 0.4911(0.4935) Grad: 1.3002  LR: 0.000075  Elapsed: 0m 6s (remain 15m 23s) Max mem: 2963 MB
Epoch: [2/5][3/438] Loss: 0.4757(0.4891) Grad: 0.9891  LR: 0.000075  Elapsed: 0m 7s (remain 14m 26s) Max mem: 2963 MB
Epoch: [2/5][4/438] Loss: 0.5078(0.4928) Grad: 1.8400  LR: 0.000075  Elapsed: 0m 9s (remain 13m 54s) Max mem: 2963 MB
Epoch: [2/5][5/438] Loss: 0.4889(0.4922) Grad: 1.2030  LR: 0.000075  Elapsed: 0m 11s (remain 13m 36s) Max mem: 2963 MB
Epoch: [2/5][6/438] Loss: 0.4661(0.4884) Grad: 1.1298  LR: 0.000075  Elapsed: 0m 12s (remain 13m 18s) Max mem: 2963 MB
Epoch: [2/5][7/438] Loss: 0.4843(0.4879) Grad: 1.6085  LR: 0.000075  Elapsed: 0m 14s (remain 13m 6s) Max mem: 2963 MB
Epoch: [2/5][8/438] Loss: 0.4795(0.4870) Grad: 0.9980 

Epoch: [2/5][69/438] Loss: 0.4685(0.4861) Grad: 0.9765  LR: 0.000075  Elapsed: 1m 56s (remain 10m 11s) Max mem: 2963 MB
Epoch: [2/5][70/438] Loss: 0.5048(0.4864) Grad: 1.7142  LR: 0.000075  Elapsed: 1m 58s (remain 10m 10s) Max mem: 2963 MB
Epoch: [2/5][71/438] Loss: 0.5003(0.4866) Grad: 1.0999  LR: 0.000075  Elapsed: 1m 59s (remain 10m 8s) Max mem: 2963 MB
Epoch: [2/5][72/438] Loss: 0.4659(0.4863) Grad: 0.8952  LR: 0.000075  Elapsed: 2m 1s (remain 10m 7s) Max mem: 2963 MB
Epoch: [2/5][73/438] Loss: 0.4714(0.4861) Grad: 1.1937  LR: 0.000075  Elapsed: 2m 3s (remain 10m 5s) Max mem: 2963 MB
Epoch: [2/5][74/438] Loss: 0.4686(0.4859) Grad: 1.0520  LR: 0.000075  Elapsed: 2m 4s (remain 10m 4s) Max mem: 2963 MB
Epoch: [2/5][75/438] Loss: 0.4922(0.4860) Grad: 1.2024  LR: 0.000075  Elapsed: 2m 6s (remain 10m 2s) Max mem: 2963 MB
Epoch: [2/5][76/438] Loss: 0.4766(0.4858) Grad: 0.8771  LR: 0.000075  Elapsed: 2m 7s (remain 10m 0s) Max mem: 2963 MB
Epoch: [2/5][77/438] Loss: 0.4559(0.4855) Grad: 0.9

Epoch: [2/5][138/438] Loss: 0.4667(0.4839) Grad: 0.9662  LR: 0.000075  Elapsed: 3m 46s (remain 8m 6s) Max mem: 2963 MB
Epoch: [2/5][139/438] Loss: 0.4504(0.4836) Grad: 1.3744  LR: 0.000075  Elapsed: 3m 47s (remain 8m 5s) Max mem: 2963 MB
Epoch: [2/5][140/438] Loss: 0.4790(0.4836) Grad: 0.7614  LR: 0.000075  Elapsed: 3m 49s (remain 8m 3s) Max mem: 2963 MB
Epoch: [2/5][141/438] Loss: 0.5012(0.4837) Grad: 1.0462  LR: 0.000075  Elapsed: 3m 51s (remain 8m 1s) Max mem: 2963 MB
Epoch: [2/5][142/438] Loss: 0.4765(0.4837) Grad: 0.9101  LR: 0.000075  Elapsed: 3m 52s (remain 7m 59s) Max mem: 2963 MB
Epoch: [2/5][143/438] Loss: 0.4562(0.4835) Grad: 0.9894  LR: 0.000075  Elapsed: 3m 54s (remain 7m 58s) Max mem: 2963 MB
Epoch: [2/5][144/438] Loss: 0.4644(0.4834) Grad: 1.0081  LR: 0.000075  Elapsed: 3m 55s (remain 7m 56s) Max mem: 2963 MB
Epoch: [2/5][145/438] Loss: 0.4842(0.4834) Grad: 0.9617  LR: 0.000075  Elapsed: 3m 57s (remain 7m 54s) Max mem: 2963 MB
Epoch: [2/5][146/438] Loss: 0.4924(0.4834) G

Epoch: [2/5][207/438] Loss: 0.4877(0.4825) Grad: 0.8750  LR: 0.000075  Elapsed: 5m 35s (remain 6m 11s) Max mem: 2963 MB
Epoch: [2/5][208/438] Loss: 0.4624(0.4824) Grad: 1.0497  LR: 0.000075  Elapsed: 5m 37s (remain 6m 9s) Max mem: 2963 MB
Epoch: [2/5][209/438] Loss: 0.5066(0.4825) Grad: 0.9356  LR: 0.000075  Elapsed: 5m 38s (remain 6m 7s) Max mem: 2963 MB
Epoch: [2/5][210/438] Loss: 0.4742(0.4825) Grad: 1.1991  LR: 0.000075  Elapsed: 5m 40s (remain 6m 6s) Max mem: 2963 MB
Epoch: [2/5][211/438] Loss: 0.4658(0.4824) Grad: 1.0650  LR: 0.000075  Elapsed: 5m 42s (remain 6m 4s) Max mem: 2963 MB
Epoch: [2/5][212/438] Loss: 0.4599(0.4823) Grad: 1.0960  LR: 0.000075  Elapsed: 5m 43s (remain 6m 2s) Max mem: 2963 MB
Epoch: [2/5][213/438] Loss: 0.4932(0.4823) Grad: 1.2118  LR: 0.000075  Elapsed: 5m 45s (remain 6m 1s) Max mem: 2963 MB
Epoch: [2/5][214/438] Loss: 0.4746(0.4823) Grad: 0.9708  LR: 0.000075  Elapsed: 5m 46s (remain 5m 59s) Max mem: 2963 MB
Epoch: [2/5][215/438] Loss: 0.4734(0.4822) Gra

Epoch: [2/5][276/438] Loss: 0.4901(0.4802) Grad: 1.3500  LR: 0.000075  Elapsed: 7m 29s (remain 4m 21s) Max mem: 2963 MB
Epoch: [2/5][277/438] Loss: 0.4735(0.4802) Grad: 0.9753  LR: 0.000075  Elapsed: 7m 31s (remain 4m 19s) Max mem: 2963 MB
Epoch: [2/5][278/438] Loss: 0.4446(0.4801) Grad: 0.9976  LR: 0.000075  Elapsed: 7m 32s (remain 4m 17s) Max mem: 2963 MB
Epoch: [2/5][279/438] Loss: 0.4504(0.4799) Grad: 0.9710  LR: 0.000075  Elapsed: 7m 34s (remain 4m 16s) Max mem: 2963 MB
Epoch: [2/5][280/438] Loss: 0.4702(0.4799) Grad: 0.9488  LR: 0.000075  Elapsed: 7m 36s (remain 4m 14s) Max mem: 2963 MB
Epoch: [2/5][281/438] Loss: 0.4844(0.4799) Grad: 1.1785  LR: 0.000075  Elapsed: 7m 38s (remain 4m 13s) Max mem: 2963 MB
Epoch: [2/5][282/438] Loss: 0.4745(0.4799) Grad: 0.8829  LR: 0.000075  Elapsed: 7m 40s (remain 4m 12s) Max mem: 2963 MB
Epoch: [2/5][283/438] Loss: 0.4791(0.4799) Grad: 1.0394  LR: 0.000075  Elapsed: 7m 42s (remain 4m 10s) Max mem: 2963 MB
Epoch: [2/5][284/438] Loss: 0.4603(0.479

Epoch: [2/5][345/438] Loss: 0.4759(0.4779) Grad: 1.1014  LR: 0.000075  Elapsed: 9m 29s (remain 2m 31s) Max mem: 2963 MB
Epoch: [2/5][346/438] Loss: 0.4777(0.4779) Grad: 0.7736  LR: 0.000075  Elapsed: 9m 30s (remain 2m 29s) Max mem: 2963 MB
Epoch: [2/5][347/438] Loss: 0.4581(0.4779) Grad: 0.7840  LR: 0.000075  Elapsed: 9m 32s (remain 2m 28s) Max mem: 2963 MB
Epoch: [2/5][348/438] Loss: 0.4620(0.4778) Grad: 0.8250  LR: 0.000075  Elapsed: 9m 34s (remain 2m 26s) Max mem: 2963 MB
Epoch: [2/5][349/438] Loss: 0.4554(0.4778) Grad: 1.0818  LR: 0.000075  Elapsed: 9m 36s (remain 2m 24s) Max mem: 2963 MB
Epoch: [2/5][350/438] Loss: 0.4565(0.4777) Grad: 0.9184  LR: 0.000075  Elapsed: 9m 37s (remain 2m 23s) Max mem: 2963 MB
Epoch: [2/5][351/438] Loss: 0.4625(0.4777) Grad: 0.8866  LR: 0.000075  Elapsed: 9m 39s (remain 2m 21s) Max mem: 2963 MB
Epoch: [2/5][352/438] Loss: 0.4755(0.4777) Grad: 0.9585  LR: 0.000075  Elapsed: 9m 41s (remain 2m 19s) Max mem: 2963 MB
Epoch: [2/5][353/438] Loss: 0.4553(0.477

Epoch: [2/5][414/438] Loss: 0.4706(0.4760) Grad: 1.2442  LR: 0.000075  Elapsed: 11m 30s (remain 0m 38s) Max mem: 2963 MB
Epoch: [2/5][415/438] Loss: 0.4412(0.4759) Grad: 0.9109  LR: 0.000075  Elapsed: 11m 32s (remain 0m 36s) Max mem: 2963 MB
Epoch: [2/5][416/438] Loss: 0.4686(0.4759) Grad: 1.1455  LR: 0.000075  Elapsed: 11m 34s (remain 0m 34s) Max mem: 2963 MB
Epoch: [2/5][417/438] Loss: 0.4708(0.4759) Grad: 1.5582  LR: 0.000075  Elapsed: 11m 36s (remain 0m 33s) Max mem: 2963 MB
Epoch: [2/5][418/438] Loss: 0.4749(0.4759) Grad: 1.5002  LR: 0.000075  Elapsed: 11m 38s (remain 0m 31s) Max mem: 2963 MB
Epoch: [2/5][419/438] Loss: 0.4509(0.4759) Grad: 0.9316  LR: 0.000075  Elapsed: 11m 41s (remain 0m 30s) Max mem: 2963 MB
Epoch: [2/5][420/438] Loss: 0.4661(0.4758) Grad: 0.8930  LR: 0.000075  Elapsed: 11m 42s (remain 0m 28s) Max mem: 2963 MB
Epoch: [2/5][421/438] Loss: 0.4550(0.4758) Grad: 0.9800  LR: 0.000075  Elapsed: 11m 44s (remain 0m 26s) Max mem: 2963 MB
Epoch: [2/5][422/438] Loss: 0.45

Epoch 2 - avg_train_loss: 0.4754  avg_val_loss: 0.4664  time: 880s
Epoch 2 - Score: 0.8475
Epoch 2 - Save Best Score: 0.8475 Model
Epoch 2 - Save Best Loss: 0.4664 Model


EVAL: [54/55] Data 0.010 (0.038) Elapsed 2m 24s (remain 0m 0s) Loss: 0.4487(0.4664) 







Epoch: [3/5][0/438] Loss: 0.4582(0.4582) Grad: 0.8251  LR: 0.000025  Elapsed: 0m 3s (remain 27m 38s) Max mem: 2963 MB
Epoch: [3/5][1/438] Loss: 0.4700(0.4641) Grad: 1.0306  LR: 0.000025  Elapsed: 0m 5s (remain 20m 28s) Max mem: 2963 MB
Epoch: [3/5][2/438] Loss: 0.4876(0.4719) Grad: 1.1827  LR: 0.000025  Elapsed: 0m 7s (remain 17m 56s) Max mem: 2963 MB
Epoch: [3/5][3/438] Loss: 0.4497(0.4663) Grad: 0.9069  LR: 0.000025  Elapsed: 0m 9s (remain 16m 38s) Max mem: 2963 MB
Epoch: [3/5][4/438] Loss: 0.4488(0.4628) Grad: 0.9486  LR: 0.000025  Elapsed: 0m 10s (remain 15m 51s) Max mem: 2963 MB
Epoch: [3/5][5/438] Loss: 0.4603(0.4624) Grad: 0.9317  LR: 0.000025  Elapsed: 0m 12s (remain 15m 16s) Max mem: 2963 MB
Epoch: [3/5][6/438] Loss: 0.4530(0.4611) Grad: 0.9730  LR: 0.000025  Elapsed: 0m 14s (remain 14m 53s) Max mem: 2963 MB
Epoch: [3/5][7/438] Loss: 0.4529(0.4600) Grad: 0.7562  LR: 0.000025  Elapsed: 0m 16s (remain 14m 34s) Max mem: 2963 MB
Epoch: [3/5][8/438] Loss: 0.4516(0.4591) Grad: 0.923

## Inference

In [15]:
states = []
for fold  in CFG.trn_fold:
    states.append(torch.load(os.path.join(SAVEDIR, f'{CFG.model_name}_fold{fold}_best_score.pth')))

In [16]:
gcs_paths = []
all_files = []
path = "./data1/test/"
n_trial = 0
#print(j)
for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        #print(filename)
        #print(os.path.join(dirname, filename))
        gcs_path = os.path.join(dirname, filename)
        gcs_paths.append(gcs_path)
        print(gcs_path)
        all_files.extend(np.sort(np.array(tf.io.gfile.glob(gcs_path))))
    
print("test_files: ", len(all_files))
all_files = np.array(all_files)

gs://kds-3af0f2c792ef9e6f5d3c87bb80c99a263fc9707106cd595aaf02e3eb
gs://kds-9eb3135b732342c0aec8339381fec9fd19f8a7ad94ca9c31ad51fe2b
test_files:  10


In [17]:
model= CNN1d()
model.to(device)

wave_ids = []
probs_all = []

for fold, state in enumerate(states):
    tqdm.write(f"\n\nFold{fold}")
    
    model.load_state_dict(state['model'])
    model.eval()
    probs = []

    test_loader = TFRecordDataLoader(all_files, batch_size=CFG.val_batch_size, 
                                     shuffle=False, labeled=False)

    for i, d in tqdm(enumerate(test_loader), total=len(test_loader)):
        x = bandpass(d[0], **CFG.bandpass_params)
        x = torch.from_numpy(x).to(device)

        with torch.no_grad():
            y_preds = model(x)
        preds = y_preds.sigmoid().to('cpu').numpy()
        probs.append(preds)

        if fold==0: # same test loader, no need to do this the second time
            wave_ids.append(d[1].astype('U13'))

    probs = np.concatenate(probs)
    probs_all.append(probs)

probs_avg = np.asarray(probs_all).mean(axis=0).flatten()
wave_ids = np.concatenate(wave_ids)

  0%|          | 0/2250 [00:00<?, ?it/s]



Fold0


2260it [06:43,  5.60it/s]
  0%|          | 0/2250 [00:00<?, ?it/s]



Fold1


2260it [06:40,  5.64it/s]
  0%|          | 0/2250 [00:00<?, ?it/s]



Fold2


2260it [06:28,  5.82it/s]
  0%|          | 0/2250 [00:00<?, ?it/s]



Fold3


2260it [06:31,  5.77it/s]


In [18]:
test_df = pd.DataFrame({'id': wave_ids, 'target': probs_avg})
# Save test dataframe to disk
folds = '_'.join([str(s) for s in CFG.trn_fold])
test_df.to_csv(f'{CFG.model_name}_folds_{folds}.csv', index = False)