
- Basically a 1D CNN starter with bandpass. Filter size hard-coded from [https://www.kaggle.com/kit716/grav-wave-detection](https://www.kaggle.com/kit716/grav-wave-detection) which uses the simple architecture from https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.120.141103 
- Added inference to @hidehisaarai1213 's PyTorch starter, iteration order changed from Y.Nakama's pipeline: "iter on loader first then load model" to "load model first then iter the loader"


## Libraries

In [1]:
import os
import time
import math
import random
from pathlib import Path

import numpy as np
import pandas as pd
import scipy as sp
from scipy import signal
import tensorflow as tf  # for reading TFRecord Dataset
import tensorflow_datasets as tfds  # for making tf.data.Dataset to return numpy arrays
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from kaggle_datasets import KaggleDatasets
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from tqdm import tqdm

ModuleNotFoundError: No module named 'tensorflow'

In [2]:
SAVEDIR = Path("./")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## CFG

In [3]:
class CFG:
    debug = False
    print_freq = 1000
    num_workers = 4
    scheduler = "CosineAnnealingLR"
    model_name = "1dcnn"
    epochs = 5
    T_max = 3
    lr = 1e-4
    min_lr = 1e-7
    batch_size = 64
    val_batch_size = 100
    weight_decay = 1e-5
    gradient_accumulation_steps = 1
    max_grad_norm = 1000
    seed = 42
    target_size = 1
    target_col = "target"
    n_fold = 5
    trn_fold = [0, 1, 2, 3]  # [0, 1, 2, 3, 4]
    train = True
    bandpass_params = dict(lf=20, 
                           hf=500)

## Utils

In [4]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    score = roc_auc_score(y_true, y_pred)
    return score


def init_logger(log_file=SAVEDIR / 'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

## TFRecord Loader

This is the heart of this notebook. Instead of using PyTorch's Dataset and DataLoader, here I define custom Loader that reads samples from TFRecords.

FYI, there's a library that does the same thing, but its implementation is not optimized, so it's slower.

https://github.com/vahidk/tfrecord

In [5]:
gcs_paths = []
for i, j in [(0, 4), (5, 9), (10, 14), (15, 19)]:
    path = f"g2net-waveform-tfrecords-train-{i}-{j}"
    n_trial = 0
    while True:
        try:
            gcs_path = KaggleDatasets().get_gcs_path(path)
            gcs_paths.append(gcs_path)
            print(gcs_path)
            break
        except:
            if n_trial > 10:
                break
            n_trial += 1
            continue
            
all_files = []
for path in gcs_paths:
    all_files.extend(np.sort(np.array(tf.io.gfile.glob(path + "/train*.tfrecords"))))
    
print("train_files: ", len(all_files))
all_files = np.array(all_files)

gs://kds-fe725ba5a3259c712812aed413cfe61fc3827b135988e28e694e515c
gs://kds-ecd1c4515f350d79d5a8d2e9a07df52885278c6cce43ad116c8b03cb
gs://kds-2c0b3b314d8607851f6dddea1e976aab52547b68de5117cf3e76a85a
gs://kds-965e4184655c915b40b37bf7e5d3c8e310dd5751e8a2e1f5bdc8d70d
train_files:  20


In [6]:
def count_data_items(fileids, train=True):
    """
    Count the number of samples.
    Each of the TFRecord datasets is designed to contain 28000 samples for train
    22500 for test.
    """
    sizes = 28000 if train else 22500
    return len(fileids) * sizes


AUTO = tf.data.experimental.AUTOTUNE

## Bandpass

Modified from various notebooks and https://www.kaggle.com/c/g2net-gravitational-wave-detection/discussion/261721#1458564

In [7]:
def bandpass(x, lf=20, hf=500, order=8, sr=2048):
    '''
    Cell 33 of https://www.gw-openscience.org/LVT151012data/LOSC_Event_tutorial_LVT151012.html
    https://scipy-cookbook.readthedocs.io/items/ButterworthBandpass.html
    '''
    sos = signal.butter(order, [lf, hf], btype="bandpass", output="sos", fs=sr)
    normalization = np.sqrt((hf - lf) / (sr / 2))
    if x.ndim ==2:
        for i in range(3):
            x[i] = signal.sosfilt(sos, x[i]) * normalization
    elif x.ndim == 3: # batch
        for i in range(x.shape[0]):
            for j in range(3):
                x[i, j] = signal.sosfilt(sos, x[i, j]) * normalization
    return x

In [8]:
def prepare_wave(wave):
    wave = tf.reshape(tf.io.decode_raw(wave, tf.float64), (3, 4096))
    normalized_waves = []
    scaling = tf.constant([1.5e-20, 1.5e-20, 0.5e-20], dtype=tf.float64)
    for i in range(3):
#         normalized_wave = wave[i] / tf.math.reduce_max(wave[i])
        normalized_wave = wave[i] / scaling[i]
        normalized_waves.append(normalized_wave)
    wave = tf.stack(normalized_waves, axis=0)
    wave = tf.cast(wave, tf.float32)
    return wave


def read_labeled_tfrecord(example):
    tfrec_format = {
        "wave": tf.io.FixedLenFeature([], tf.string),
        "wave_id": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64)
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return prepare_wave(example["wave"]), tf.reshape(tf.cast(example["target"], tf.float32), [1]), example["wave_id"]


def read_unlabeled_tfrecord(example, return_image_id):
    tfrec_format = {
        "wave": tf.io.FixedLenFeature([], tf.string),
        "wave_id": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return prepare_wave(example["wave"]), example["wave_id"] if return_image_id else 0


def get_dataset(files, batch_size=16, repeat=False, cache=False, 
                shuffle=False, labeled=True, return_image_ids=True):
    ds = tf.data.TFRecordDataset(files, num_parallel_reads=AUTO, compression_type="GZIP")
    if cache:
        # You'll need around 15GB RAM if you'd like to cache val dataset, and 50~60GB RAM for train dataset.
        ds = ds.cache()

    if repeat:
        ds = ds.repeat()

    if shuffle:
        ds = ds.shuffle(1024 * 2)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        ds = ds.with_options(opt)

    if labeled:
        ds = ds.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    else:
        ds = ds.map(lambda example: read_unlabeled_tfrecord(example, return_image_ids), num_parallel_calls=AUTO)

    ds = ds.batch(batch_size)
    ds = ds.prefetch(AUTO)
    return tfds.as_numpy(ds)

In [9]:
class TFRecordDataLoader:
    def __init__(self, files, batch_size=32, cache=False, train=True, 
                              repeat=False, shuffle=False, labeled=True, 
                              return_image_ids=True):
        self.ds = get_dataset(
            files, 
            batch_size=batch_size,
            cache=cache,
            repeat=repeat,
            shuffle=shuffle,
            labeled=labeled,
            return_image_ids=return_image_ids)
        
        self.num_examples = count_data_items(files, labeled)

        self.batch_size = batch_size
        self.labeled = labeled
        self.return_image_ids = return_image_ids
        self._iterator = None
    
    def __iter__(self):
        if self._iterator is None:
            self._iterator = iter(self.ds)
        else:
            self._reset()
        return self._iterator

    def _reset(self):
        self._iterator = iter(self.ds)

    def __next__(self):
        batch = next(self._iterator)
        return batch

    def __len__(self):
        n_batches = self.num_examples // self.batch_size
        if self.num_examples % self.batch_size == 0:
            return n_batches
        else:
            return n_batches + 1

## MODEL

In [10]:
class CNN1d(nn.Module):
    """1D convolutional neural network. Classifier of the gravitational waves.
    Architecture from there https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.120.141103
    """

    def __init__(self, debug=False):
        super().__init__()
        self.cnn1 = nn.Sequential(
            nn.Conv1d(3, 64, kernel_size=64),
            nn.BatchNorm1d(64),
            nn.ELU(),
        )
        self.cnn2 = nn.Sequential(
            nn.Conv1d(64, 64, kernel_size=32),
            nn.AvgPool1d(kernel_size=8),
            nn.BatchNorm1d(64),
            nn.ELU(),
        )
        self.cnn3 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=32),
            nn.BatchNorm1d(128),
            nn.ELU(),
        )
        self.cnn4 = nn.Sequential(
            nn.Conv1d(128, 128, kernel_size=16),
            nn.AvgPool1d(kernel_size=6),
            nn.BatchNorm1d(128),
            nn.ELU(),
        )
        self.cnn5 = nn.Sequential(
            nn.Conv1d(128, 256, kernel_size=16),
            nn.BatchNorm1d(256),
            nn.ELU(),
        )
        self.cnn6 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=16),
            nn.MaxPool1d(kernel_size=4),
            nn.BatchNorm1d(256),
            nn.ELU(),
        )
        self.fc1 = nn.Sequential(
            nn.Linear(256 * 11, 64),
            nn.BatchNorm1d(64),
            nn.Dropout(0.4),
            nn.ELU(),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(64, 64),
            nn.BatchNorm1d(64),
            nn.Dropout(0.4),
            nn.ELU(),
        )
        self.fc3 = nn.Sequential(
            nn.Linear(64, 1),
        )
        self.debug = debug

    def forward(self, x, pos=None):
        x = self.cnn1(x)
        x = self.cnn2(x)
        x = self.cnn3(x)
        x = self.cnn4(x)
        x = self.cnn5(x)
        x = self.cnn6(x)
        x = x.flatten(start_dim=1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x


## Helper functions

In [11]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def max_memory_allocated():
    MB = 1024.0 * 1024.0
    mem = torch.cuda.max_memory_allocated() / MB
    return f"{mem:.0f} MB"

## Trainer

In [12]:
def train_fn(files, model, criterion, optimizer, epoch, scheduler, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()

    # switch to train mode
    model.train()
    start = end = time.time()
    global_step = 0

    train_loader = TFRecordDataLoader(
        files, batch_size=CFG.batch_size, 
        shuffle=True)
    for step, d in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        x = bandpass(d[0], **CFG.bandpass_params)
        x = torch.from_numpy(x).to(device)
        labels = torch.from_numpy(d[1]).to(device)

        batch_size = labels.size(0)
        y_preds = model(x)
        loss = criterion(y_preds.view(-1), labels.view(-1))
        # record loss
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0:
            print('Epoch: [{0}/{1}][{2}/{3}] '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.6f}  '
                  'Elapsed: {remain:s} '
                  'Max mem: {mem:s}'
                  .format(
                   epoch+1, CFG.epochs, step, len(train_loader),
                   loss=losses,
                   grad_norm=grad_norm,
                   lr=scheduler.get_last_lr()[0],
                   remain=timeSince(start, float(step + 1) / len(train_loader)),
                   mem=max_memory_allocated()))
    return losses.avg


def valid_fn(files, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to evaluation mode
    model.eval()
    filenames = []
    targets = []
    preds = []
    start = end = time.time()
    valid_loader = TFRecordDataLoader(
        files, batch_size=CFG.batch_size * 2, shuffle=False)
    for step, d in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
        targets.extend(d[1].reshape(-1).tolist())
        filenames.extend([f.decode("UTF-8") for f in d[2]])
        x = bandpass(d[0], **CFG.bandpass_params)
        x = torch.from_numpy(x).to(device)
        labels = torch.from_numpy(d[1]).to(device)

        batch_size = labels.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(x)
        loss = criterion(y_preds.view(-1), labels.view(-1))
        losses.update(loss.item(), batch_size)

        preds.append(y_preds.sigmoid().to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0:
            print('EVAL: [{0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(
                   step, len(valid_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   ))
    predictions = np.concatenate(preds).reshape(-1)
    return losses.avg, predictions, np.array(targets), np.array(filenames)

## Train loop

In [13]:
# ====================================================
# Train loop
# ====================================================
def train_loop(train_tfrecords: np.ndarray, val_tfrecords: np.ndarray, fold: int):
    
    LOGGER.info(f"========== fold: {fold} training ==========")
    
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                             mode='min', 
                                                             factor=CFG.factor, 
                                                             patience=CFG.patience, 
                                                             verbose=True, 
                                                             eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 
                                                             T_max=CFG.T_max, 
                                                             eta_min=CFG.min_lr, 
                                                             last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 
                                                                       T_0=CFG.T_0, 
                                                                       T_mult=1, 
                                                                       eta_min=CFG.min_lr, 
                                                                       last_epoch=-1)
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CNN1d()
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    scheduler = get_scheduler(optimizer)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()

    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        print("\n\n")
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_tfrecords, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, preds, targets, files = valid_fn(val_tfrecords, model, criterion, device)
        valid_result_df = pd.DataFrame({"target": targets, "preds": preds, "id": files})
        
        if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, optim.lr_scheduler.CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, optim.lr_scheduler.CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score = get_score(targets, preds)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        SAVEDIR / f'{CFG.model_name}_fold{fold}_best_score.pth')
        
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            LOGGER.info(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        SAVEDIR / f'{CFG.model_name}_fold{fold}_best_loss.pth')
    
    valid_result_df["preds"] = torch.load(SAVEDIR / f"{CFG.model_name}_fold{fold}_best_loss.pth",
                                          map_location="cpu")["preds"]

    return valid_result_df

In [14]:
def get_result(result_df):
    preds = result_df['preds'].values
    labels = result_df[CFG.target_col].values
    score = get_score(labels, preds)
    LOGGER.info(f'Score: {score:<.4f}')

if CFG.train:
    # train 
    oof_df = pd.DataFrame()
    kf = KFold(n_splits=CFG.n_fold, shuffle=True, random_state=CFG.seed)

    folds = list(kf.split(all_files))
    for fold in range(CFG.n_fold):
        if fold in CFG.trn_fold:
            trn_idx, val_idx = folds[fold]
            train_files = all_files[trn_idx]
            valid_files = all_files[val_idx]
            _oof_df = train_loop(train_files, valid_files, fold)
            oof_df = pd.concat([oof_df, _oof_df])
            LOGGER.info(f"========== fold: {fold} result ==========")
            get_result(_oof_df)
    # CV result
    LOGGER.info(f"========== CV ==========")
    get_result(oof_df)
    # save result
    oof_df.to_csv(SAVEDIR / 'oof_df.csv', index=False)






Epoch: [1/5][0/7000] Loss: 0.7253(0.7253) Grad: 15.4019  LR: 0.000100  Elapsed: 0m 4s (remain 491m 55s) Max mem: 340 MB
Epoch: [1/5][1000/7000] Loss: 0.5553(0.6204) Grad: 2.0464  LR: 0.000100  Elapsed: 2m 58s (remain 17m 52s) Max mem: 359 MB
Epoch: [1/5][2000/7000] Loss: 0.6050(0.5658) Grad: 2.2560  LR: 0.000100  Elapsed: 5m 52s (remain 14m 40s) Max mem: 359 MB
Epoch: [1/5][3000/7000] Loss: 0.4321(0.5385) Grad: 1.8809  LR: 0.000100  Elapsed: 8m 46s (remain 11m 41s) Max mem: 359 MB
Epoch: [1/5][4000/7000] Loss: 0.4675(0.5215) Grad: 1.3852  LR: 0.000100  Elapsed: 11m 39s (remain 8m 44s) Max mem: 359 MB
Epoch: [1/5][5000/7000] Loss: 0.3990(0.5107) Grad: 1.4228  LR: 0.000100  Elapsed: 14m 32s (remain 5m 48s) Max mem: 359 MB
Epoch: [1/5][6000/7000] Loss: 0.5149(0.5029) Grad: 1.3005  LR: 0.000100  Elapsed: 17m 23s (remain 2m 53s) Max mem: 359 MB
EVAL: [0/875] Data 0.681 (0.681) Elapsed 0m 0s (remain 12m 29s) Loss: 0.4207(0.4207) 


Epoch 1 - avg_train_loss: 0.4972  avg_val_loss: 0.4443  time: 1408s
Epoch 1 - Score: 0.8560
Epoch 1 - Save Best Score: 0.8560 Model
Epoch 1 - Save Best Loss: 0.4443 Model





Epoch: [2/5][0/7000] Loss: 0.4543(0.4543) Grad: 1.5554  LR: 0.000075  Elapsed: 0m 2s (remain 333m 1s) Max mem: 359 MB
Epoch: [2/5][1000/7000] Loss: 0.5241(0.4537) Grad: 1.7128  LR: 0.000075  Elapsed: 2m 57s (remain 17m 45s) Max mem: 359 MB
Epoch: [2/5][2000/7000] Loss: 0.3947(0.4513) Grad: 1.1962  LR: 0.000075  Elapsed: 5m 51s (remain 14m 37s) Max mem: 359 MB
Epoch: [2/5][3000/7000] Loss: 0.4975(0.4513) Grad: 1.4686  LR: 0.000075  Elapsed: 8m 46s (remain 11m 42s) Max mem: 359 MB
Epoch: [2/5][4000/7000] Loss: 0.5628(0.4497) Grad: 1.8454  LR: 0.000075  Elapsed: 11m 40s (remain 8m 45s) Max mem: 359 MB
Epoch: [2/5][5000/7000] Loss: 0.4274(0.4488) Grad: 1.0432  LR: 0.000075  Elapsed: 14m 34s (remain 5m 49s) Max mem: 359 MB
Epoch: [2/5][6000/7000] Loss: 0.4218(0.4480) Grad: 1.6685  LR: 0.000075  Elapsed: 17m 29s (remain 2m 54s) Max mem: 359 MB
EVAL: [0/875] Data 0.832 (0.832) Elapsed 0m 1s (remain 14m 35s) Loss: 0.4213(0.4213) 


Epoch 2 - avg_train_loss: 0.4475  avg_val_loss: 0.4611  time: 1396s
Epoch 2 - Score: 0.8616
Epoch 2 - Save Best Score: 0.8616 Model





Epoch: [3/5][0/7000] Loss: 0.4320(0.4320) Grad: 1.5394  LR: 0.000025  Elapsed: 0m 3s (remain 395m 45s) Max mem: 359 MB
Epoch: [3/5][1000/7000] Loss: 0.4152(0.4360) Grad: 0.9497  LR: 0.000025  Elapsed: 2m 56s (remain 17m 35s) Max mem: 359 MB
Epoch: [3/5][2000/7000] Loss: 0.3587(0.4344) Grad: 0.9767  LR: 0.000025  Elapsed: 5m 48s (remain 14m 30s) Max mem: 359 MB
Epoch: [3/5][3000/7000] Loss: 0.3748(0.4347) Grad: 1.1711  LR: 0.000025  Elapsed: 8m 40s (remain 11m 33s) Max mem: 359 MB
Epoch: [3/5][4000/7000] Loss: 0.4182(0.4336) Grad: 1.2432  LR: 0.000025  Elapsed: 11m 32s (remain 8m 39s) Max mem: 359 MB
Epoch: [3/5][5000/7000] Loss: 0.4420(0.4330) Grad: 1.2301  LR: 0.000025  Elapsed: 14m 23s (remain 5m 45s) Max mem: 359 MB
Epoch: [3/5][6000/7000] Loss: 0.4562(0.4324) Grad: 1.2925  LR: 0.000025  Elapsed: 17m 15s (remain 2m 52s) Max mem: 359 MB
EVAL: [0/875] Data 0.758 (0.758) Elapsed 0m 0s (remain 14m 9s) Loss: 0.4078(0.4078) 


Epoch 3 - avg_train_loss: 0.4320  avg_val_loss: 0.4275  time: 1423s
Epoch 3 - Score: 0.8664
Epoch 3 - Save Best Score: 0.8664 Model
Epoch 3 - Save Best Loss: 0.4275 Model





Epoch: [4/5][0/7000] Loss: 0.4236(0.4236) Grad: 1.9376  LR: 0.000000  Elapsed: 0m 3s (remain 413m 53s) Max mem: 359 MB
Epoch: [4/5][1000/7000] Loss: 0.4060(0.4293) Grad: 1.2841  LR: 0.000000  Elapsed: 2m 57s (remain 17m 46s) Max mem: 359 MB
Epoch: [4/5][2000/7000] Loss: 0.3737(0.4281) Grad: 1.7857  LR: 0.000000  Elapsed: 5m 51s (remain 14m 38s) Max mem: 359 MB
Epoch: [4/5][3000/7000] Loss: 0.4562(0.4286) Grad: 2.4528  LR: 0.000000  Elapsed: 8m 44s (remain 11m 38s) Max mem: 359 MB
Epoch: [4/5][4000/7000] Loss: 0.3833(0.4272) Grad: 1.6523  LR: 0.000000  Elapsed: 11m 38s (remain 8m 43s) Max mem: 359 MB
Epoch: [4/5][5000/7000] Loss: 0.5401(0.4268) Grad: 1.4155  LR: 0.000000  Elapsed: 14m 32s (remain 5m 48s) Max mem: 359 MB
Epoch: [4/5][6000/7000] Loss: 0.4837(0.4259) Grad: 1.5287  LR: 0.000000  Elapsed: 17m 25s (remain 2m 54s) Max mem: 359 MB
EVAL: [0/875] Data 0.596 (0.596) Elapsed 0m 0s (remain 14m 4s) Loss: 0.4070(0.4070) 


Epoch 4 - avg_train_loss: 0.4252  avg_val_loss: 0.4323  time: 1384s
Epoch 4 - Score: 0.8669
Epoch 4 - Save Best Score: 0.8669 Model





Epoch: [5/5][0/7000] Loss: 0.3375(0.3375) Grad: 1.2674  LR: 0.000025  Elapsed: 0m 2s (remain 338m 20s) Max mem: 359 MB
Epoch: [5/5][1000/7000] Loss: 0.5645(0.4293) Grad: 1.9623  LR: 0.000025  Elapsed: 2m 57s (remain 17m 43s) Max mem: 359 MB
Epoch: [5/5][2000/7000] Loss: 0.4230(0.4278) Grad: 1.3457  LR: 0.000025  Elapsed: 5m 50s (remain 14m 34s) Max mem: 359 MB
Epoch: [5/5][3000/7000] Loss: 0.4553(0.4282) Grad: 1.3204  LR: 0.000025  Elapsed: 8m 43s (remain 11m 37s) Max mem: 359 MB
Epoch: [5/5][4000/7000] Loss: 0.4547(0.4275) Grad: 1.6034  LR: 0.000025  Elapsed: 11m 37s (remain 8m 43s) Max mem: 359 MB
Epoch: [5/5][5000/7000] Loss: 0.4216(0.4273) Grad: 1.5153  LR: 0.000025  Elapsed: 14m 29s (remain 5m 47s) Max mem: 359 MB
Epoch: [5/5][6000/7000] Loss: 0.4590(0.4268) Grad: 1.8610  LR: 0.000025  Elapsed: 17m 22s (remain 2m 53s) Max mem: 359 MB
EVAL: [0/875] Data 0.700 (0.700) Elapsed 0m 0s (remain 13m 40s) Loss: 0.4023(0.4023) 


Epoch 5 - avg_train_loss: 0.4264  avg_val_loss: 0.4377  time: 1397s
Epoch 5 - Score: 0.8668
Score: 0.8664





Epoch: [1/5][0/7000] Loss: 0.7431(0.7431) Grad: 13.4667  LR: 0.000100  Elapsed: 0m 3s (remain 351m 33s) Max mem: 359 MB
Epoch: [1/5][1000/7000] Loss: 0.5670(0.6283) Grad: 2.3826  LR: 0.000100  Elapsed: 2m 55s (remain 17m 34s) Max mem: 360 MB
Epoch: [1/5][2000/7000] Loss: 0.5016(0.5730) Grad: 1.5860  LR: 0.000100  Elapsed: 5m 49s (remain 14m 33s) Max mem: 360 MB
Epoch: [1/5][3000/7000] Loss: 0.3739(0.5441) Grad: 1.2224  LR: 0.000100  Elapsed: 8m 41s (remain 11m 35s) Max mem: 360 MB
Epoch: [1/5][4000/7000] Loss: 0.5030(0.5262) Grad: 1.2110  LR: 0.000100  Elapsed: 11m 35s (remain 8m 40s) Max mem: 360 MB
Epoch: [1/5][5000/7000] Loss: 0.4874(0.5148) Grad: 1.3197  LR: 0.000100  Elapsed: 14m 26s (remain 5m 46s) Max mem: 360 MB
Epoch: [1/5][6000/7000] Loss: 0.4174(0.5060) Grad: 1.3126  LR: 0.000100  Elapsed: 18m 28s (remain 3m 4s) Max mem: 360 MB
EVAL: [0/875] Data 0.725 (0.725) Elapsed 0m 0s (remain 12m 57s) Loss: 0.4191(0.4191) 


Epoch 1 - avg_train_loss: 0.4999  avg_val_loss: 0.4596  time: 1449s
Epoch 1 - Score: 0.8520
Epoch 1 - Save Best Score: 0.8520 Model
Epoch 1 - Save Best Loss: 0.4596 Model





Epoch: [2/5][0/7000] Loss: 0.5434(0.5434) Grad: 0.7845  LR: 0.000075  Elapsed: 0m 3s (remain 382m 9s) Max mem: 360 MB
Epoch: [2/5][1000/7000] Loss: 0.4162(0.4539) Grad: 1.2662  LR: 0.000075  Elapsed: 2m 55s (remain 17m 32s) Max mem: 360 MB
Epoch: [2/5][2000/7000] Loss: 0.5670(0.4522) Grad: 1.9730  LR: 0.000075  Elapsed: 5m 47s (remain 14m 28s) Max mem: 360 MB
Epoch: [2/5][3000/7000] Loss: 0.4630(0.4511) Grad: 0.8496  LR: 0.000075  Elapsed: 8m 38s (remain 11m 31s) Max mem: 360 MB
Epoch: [2/5][4000/7000] Loss: 0.5224(0.4492) Grad: 2.0929  LR: 0.000075  Elapsed: 11m 31s (remain 8m 38s) Max mem: 360 MB
Epoch: [2/5][5000/7000] Loss: 0.4826(0.4484) Grad: 1.4892  LR: 0.000075  Elapsed: 14m 26s (remain 5m 46s) Max mem: 360 MB
Epoch: [2/5][6000/7000] Loss: 0.4363(0.4471) Grad: 1.3361  LR: 0.000075  Elapsed: 17m 20s (remain 2m 53s) Max mem: 360 MB
EVAL: [0/875] Data 0.653 (0.653) Elapsed 0m 0s (remain 12m 49s) Loss: 0.4184(0.4184) 


Epoch 2 - avg_train_loss: 0.4467  avg_val_loss: 0.4438  time: 1392s
Epoch 2 - Score: 0.8589
Epoch 2 - Save Best Score: 0.8589 Model
Epoch 2 - Save Best Loss: 0.4438 Model





Epoch: [3/5][0/7000] Loss: 0.4104(0.4104) Grad: 1.3862  LR: 0.000025  Elapsed: 0m 2s (remain 346m 35s) Max mem: 360 MB
Epoch: [3/5][1000/7000] Loss: 0.3712(0.4359) Grad: 0.8286  LR: 0.000025  Elapsed: 2m 55s (remain 17m 34s) Max mem: 360 MB
Epoch: [3/5][2000/7000] Loss: 0.3789(0.4345) Grad: 1.4561  LR: 0.000025  Elapsed: 5m 47s (remain 14m 28s) Max mem: 360 MB
Epoch: [3/5][3000/7000] Loss: 0.3859(0.4339) Grad: 1.8068  LR: 0.000025  Elapsed: 8m 39s (remain 11m 32s) Max mem: 360 MB
Epoch: [3/5][4000/7000] Loss: 0.3781(0.4325) Grad: 1.4939  LR: 0.000025  Elapsed: 11m 32s (remain 8m 38s) Max mem: 360 MB
Epoch: [3/5][5000/7000] Loss: 0.3684(0.4320) Grad: 1.3804  LR: 0.000025  Elapsed: 14m 23s (remain 5m 45s) Max mem: 360 MB
Epoch: [3/5][6000/7000] Loss: 0.3960(0.4313) Grad: 1.5108  LR: 0.000025  Elapsed: 17m 16s (remain 2m 52s) Max mem: 360 MB
EVAL: [0/875] Data 0.760 (0.760) Elapsed 0m 0s (remain 13m 20s) Loss: 0.4148(0.4148) 


Epoch 3 - avg_train_loss: 0.4311  avg_val_loss: 0.4431  time: 1375s
Epoch 3 - Score: 0.8646
Epoch 3 - Save Best Score: 0.8646 Model
Epoch 3 - Save Best Loss: 0.4431 Model





Epoch: [4/5][0/7000] Loss: 0.3707(0.3707) Grad: 1.5067  LR: 0.000000  Elapsed: 0m 2s (remain 341m 2s) Max mem: 360 MB
Epoch: [4/5][1000/7000] Loss: 0.4208(0.4302) Grad: 1.6099  LR: 0.000000  Elapsed: 2m 55s (remain 17m 30s) Max mem: 360 MB
Epoch: [4/5][2000/7000] Loss: 0.5475(0.4300) Grad: 1.7094  LR: 0.000000  Elapsed: 5m 47s (remain 14m 27s) Max mem: 360 MB
Epoch: [4/5][3000/7000] Loss: 0.4349(0.4295) Grad: 0.9543  LR: 0.000000  Elapsed: 8m 39s (remain 11m 32s) Max mem: 360 MB
Epoch: [4/5][4000/7000] Loss: 0.3896(0.4279) Grad: 1.4615  LR: 0.000000  Elapsed: 11m 31s (remain 8m 38s) Max mem: 360 MB
Epoch: [4/5][5000/7000] Loss: 0.3497(0.4275) Grad: 1.2440  LR: 0.000000  Elapsed: 14m 24s (remain 5m 45s) Max mem: 360 MB
Epoch: [4/5][6000/7000] Loss: 0.4670(0.4267) Grad: 1.9161  LR: 0.000000  Elapsed: 17m 16s (remain 2m 52s) Max mem: 360 MB
EVAL: [0/875] Data 0.731 (0.731) Elapsed 0m 0s (remain 12m 54s) Loss: 0.4041(0.4041) 


Epoch 4 - avg_train_loss: 0.4260  avg_val_loss: 0.4378  time: 1375s
Epoch 4 - Score: 0.8656
Epoch 4 - Save Best Score: 0.8656 Model
Epoch 4 - Save Best Loss: 0.4378 Model





Epoch: [5/5][0/7000] Loss: 0.3691(0.3691) Grad: 1.3548  LR: 0.000025  Elapsed: 0m 2s (remain 311m 56s) Max mem: 360 MB
Epoch: [5/5][1000/7000] Loss: 0.4505(0.4296) Grad: 2.0098  LR: 0.000025  Elapsed: 2m 55s (remain 17m 31s) Max mem: 360 MB
Epoch: [5/5][2000/7000] Loss: 0.5441(0.4281) Grad: 2.3880  LR: 0.000025  Elapsed: 5m 47s (remain 14m 28s) Max mem: 360 MB
Epoch: [5/5][3000/7000] Loss: 0.3798(0.4282) Grad: 1.0132  LR: 0.000025  Elapsed: 8m 40s (remain 11m 33s) Max mem: 360 MB
Epoch: [5/5][4000/7000] Loss: 0.4023(0.4268) Grad: 2.0093  LR: 0.000025  Elapsed: 11m 33s (remain 8m 39s) Max mem: 360 MB
Epoch: [5/5][5000/7000] Loss: 0.3880(0.4268) Grad: 1.6268  LR: 0.000025  Elapsed: 14m 26s (remain 5m 46s) Max mem: 360 MB
Epoch: [5/5][6000/7000] Loss: 0.4196(0.4264) Grad: 1.3298  LR: 0.000025  Elapsed: 17m 19s (remain 2m 53s) Max mem: 360 MB
EVAL: [0/875] Data 0.703 (0.703) Elapsed 0m 0s (remain 12m 57s) Loss: 0.4103(0.4103) 


Epoch 5 - avg_train_loss: 0.4263  avg_val_loss: 0.4414  time: 1376s
Epoch 5 - Score: 0.8657
Epoch 5 - Save Best Score: 0.8657 Model
Score: 0.8656





Epoch: [1/5][0/7000] Loss: 0.7015(0.7015) Grad: 15.4602  LR: 0.000100  Elapsed: 0m 2s (remain 332m 42s) Max mem: 360 MB
Epoch: [1/5][1000/7000] Loss: 0.5045(0.6115) Grad: 2.1054  LR: 0.000100  Elapsed: 2m 57s (remain 17m 44s) Max mem: 360 MB
Epoch: [1/5][2000/7000] Loss: 0.5485(0.5632) Grad: 2.0410  LR: 0.000100  Elapsed: 5m 50s (remain 14m 36s) Max mem: 360 MB
Epoch: [1/5][3000/7000] Loss: 0.4819(0.5381) Grad: 1.5312  LR: 0.000100  Elapsed: 8m 43s (remain 11m 37s) Max mem: 360 MB
Epoch: [1/5][4000/7000] Loss: 0.4969(0.5227) Grad: 1.1932  LR: 0.000100  Elapsed: 11m 35s (remain 8m 41s) Max mem: 360 MB
Epoch: [1/5][5000/7000] Loss: 0.4762(0.5112) Grad: 0.9735  LR: 0.000100  Elapsed: 14m 27s (remain 5m 46s) Max mem: 360 MB
Epoch: [1/5][6000/7000] Loss: 0.5579(0.5029) Grad: 1.9994  LR: 0.000100  Elapsed: 17m 19s (remain 2m 52s) Max mem: 360 MB
EVAL: [0/875] Data 0.726 (0.726) Elapsed 0m 0s (remain 13m 3s) Loss: 0.5306(0.5306) 


Epoch 1 - avg_train_loss: 0.4967  avg_val_loss: 0.4557  time: 1373s
Epoch 1 - Score: 0.8513
Epoch 1 - Save Best Score: 0.8513 Model
Epoch 1 - Save Best Loss: 0.4557 Model





Epoch: [2/5][0/7000] Loss: 0.3084(0.3084) Grad: 1.0503  LR: 0.000075  Elapsed: 0m 2s (remain 342m 31s) Max mem: 360 MB
Epoch: [2/5][1000/7000] Loss: 0.4772(0.4531) Grad: 1.7629  LR: 0.000075  Elapsed: 2m 55s (remain 17m 30s) Max mem: 360 MB
Epoch: [2/5][2000/7000] Loss: 0.4740(0.4517) Grad: 1.2288  LR: 0.000075  Elapsed: 5m 47s (remain 14m 29s) Max mem: 360 MB
Epoch: [2/5][3000/7000] Loss: 0.3681(0.4503) Grad: 0.9950  LR: 0.000075  Elapsed: 8m 38s (remain 11m 31s) Max mem: 360 MB
Epoch: [2/5][4000/7000] Loss: 0.4025(0.4499) Grad: 1.1412  LR: 0.000075  Elapsed: 11m 30s (remain 8m 37s) Max mem: 360 MB
Epoch: [2/5][5000/7000] Loss: 0.5280(0.4484) Grad: 2.1331  LR: 0.000075  Elapsed: 14m 22s (remain 5m 44s) Max mem: 360 MB
Epoch: [2/5][6000/7000] Loss: 0.4139(0.4470) Grad: 1.2276  LR: 0.000075  Elapsed: 17m 14s (remain 2m 52s) Max mem: 360 MB
EVAL: [0/875] Data 0.842 (0.842) Elapsed 0m 1s (remain 14m 51s) Loss: 0.4978(0.4978) 


Epoch 2 - avg_train_loss: 0.4462  avg_val_loss: 0.4640  time: 1370s
Epoch 2 - Score: 0.8584
Epoch 2 - Save Best Score: 0.8584 Model





Epoch: [3/5][0/7000] Loss: 0.4216(0.4216) Grad: 1.2105  LR: 0.000025  Elapsed: 0m 2s (remain 342m 22s) Max mem: 360 MB
Epoch: [3/5][1000/7000] Loss: 0.4833(0.4330) Grad: 1.6076  LR: 0.000025  Elapsed: 2m 54s (remain 17m 27s) Max mem: 360 MB
Epoch: [3/5][2000/7000] Loss: 0.5100(0.4331) Grad: 2.8132  LR: 0.000025  Elapsed: 5m 45s (remain 14m 23s) Max mem: 360 MB
Epoch: [3/5][3000/7000] Loss: 0.3955(0.4329) Grad: 1.2543  LR: 0.000025  Elapsed: 8m 35s (remain 11m 27s) Max mem: 360 MB
Epoch: [3/5][4000/7000] Loss: 0.3290(0.4334) Grad: 1.2161  LR: 0.000025  Elapsed: 11m 26s (remain 8m 34s) Max mem: 360 MB
Epoch: [3/5][5000/7000] Loss: 0.4336(0.4324) Grad: 1.6552  LR: 0.000025  Elapsed: 14m 17s (remain 5m 42s) Max mem: 360 MB
Epoch: [3/5][6000/7000] Loss: 0.4013(0.4315) Grad: 1.8793  LR: 0.000025  Elapsed: 17m 17s (remain 2m 52s) Max mem: 360 MB
EVAL: [0/875] Data 0.906 (0.906) Elapsed 0m 1s (remain 15m 27s) Loss: 0.4863(0.4863) 


Epoch 3 - avg_train_loss: 0.4309  avg_val_loss: 0.4374  time: 1380s
Epoch 3 - Score: 0.8628
Epoch 3 - Save Best Score: 0.8628 Model
Epoch 3 - Save Best Loss: 0.4374 Model





Epoch: [4/5][0/7000] Loss: 0.4569(0.4569) Grad: 2.0066  LR: 0.000000  Elapsed: 0m 3s (remain 352m 24s) Max mem: 360 MB
Epoch: [4/5][1000/7000] Loss: 0.4055(0.4292) Grad: 1.0587  LR: 0.000000  Elapsed: 2m 54s (remain 17m 24s) Max mem: 360 MB
Epoch: [4/5][2000/7000] Loss: 0.3596(0.4287) Grad: 1.8127  LR: 0.000000  Elapsed: 5m 45s (remain 14m 22s) Max mem: 360 MB
Epoch: [4/5][3000/7000] Loss: 0.4656(0.4291) Grad: 1.5342  LR: 0.000000  Elapsed: 8m 36s (remain 11m 28s) Max mem: 360 MB
Epoch: [4/5][4000/7000] Loss: 0.3970(0.4292) Grad: 1.4709  LR: 0.000000  Elapsed: 11m 26s (remain 8m 34s) Max mem: 360 MB
Epoch: [4/5][5000/7000] Loss: 0.4135(0.4280) Grad: 1.7386  LR: 0.000000  Elapsed: 14m 16s (remain 5m 42s) Max mem: 360 MB
Epoch: [4/5][6000/7000] Loss: 0.2618(0.4267) Grad: 1.1714  LR: 0.000000  Elapsed: 17m 6s (remain 2m 50s) Max mem: 360 MB
EVAL: [0/875] Data 0.686 (0.686) Elapsed 0m 0s (remain 12m 14s) Loss: 0.4983(0.4983) 


Epoch 4 - avg_train_loss: 0.4257  avg_val_loss: 0.4459  time: 1373s
Epoch 4 - Score: 0.8634
Epoch 4 - Save Best Score: 0.8634 Model





Epoch: [5/5][0/7000] Loss: 0.4303(0.4303) Grad: 1.4695  LR: 0.000025  Elapsed: 0m 2s (remain 336m 43s) Max mem: 360 MB
Epoch: [5/5][1000/7000] Loss: 0.6238(0.4274) Grad: 2.7437  LR: 0.000025  Elapsed: 2m 54s (remain 17m 26s) Max mem: 360 MB
Epoch: [5/5][2000/7000] Loss: 0.4637(0.4277) Grad: 1.4663  LR: 0.000025  Elapsed: 5m 46s (remain 14m 24s) Max mem: 360 MB
Epoch: [5/5][3000/7000] Loss: 0.5380(0.4275) Grad: 2.2573  LR: 0.000025  Elapsed: 8m 37s (remain 11m 29s) Max mem: 360 MB
Epoch: [5/5][4000/7000] Loss: 0.5000(0.4281) Grad: 1.6621  LR: 0.000025  Elapsed: 11m 28s (remain 8m 36s) Max mem: 360 MB
Epoch: [5/5][5000/7000] Loss: 0.5602(0.4273) Grad: 2.6144  LR: 0.000025  Elapsed: 14m 19s (remain 5m 43s) Max mem: 360 MB
Epoch: [5/5][6000/7000] Loss: 0.4163(0.4263) Grad: 1.5759  LR: 0.000025  Elapsed: 17m 10s (remain 2m 51s) Max mem: 360 MB
EVAL: [0/875] Data 0.702 (0.702) Elapsed 0m 0s (remain 12m 27s) Loss: 0.4901(0.4901) 


Epoch 5 - avg_train_loss: 0.4257  avg_val_loss: 0.4377  time: 1370s
Epoch 5 - Score: 0.8633
Score: 0.8628





Epoch: [1/5][0/7000] Loss: 0.7124(0.7124) Grad: 12.0316  LR: 0.000100  Elapsed: 0m 2s (remain 338m 7s) Max mem: 360 MB
Epoch: [1/5][1000/7000] Loss: 0.5828(0.5979) Grad: 2.5982  LR: 0.000100  Elapsed: 2m 54s (remain 17m 25s) Max mem: 360 MB
Epoch: [1/5][2000/7000] Loss: 0.3974(0.5532) Grad: 1.3003  LR: 0.000100  Elapsed: 5m 46s (remain 14m 25s) Max mem: 360 MB
Epoch: [1/5][3000/7000] Loss: 0.5314(0.5292) Grad: 1.4170  LR: 0.000100  Elapsed: 8m 36s (remain 11m 28s) Max mem: 360 MB
Epoch: [1/5][4000/7000] Loss: 0.4502(0.5152) Grad: 1.3957  LR: 0.000100  Elapsed: 11m 28s (remain 8m 35s) Max mem: 360 MB
Epoch: [1/5][5000/7000] Loss: 0.4080(0.5058) Grad: 0.9684  LR: 0.000100  Elapsed: 14m 18s (remain 5m 43s) Max mem: 360 MB
Epoch: [1/5][6000/7000] Loss: 0.4133(0.4981) Grad: 1.3636  LR: 0.000100  Elapsed: 17m 9s (remain 2m 51s) Max mem: 360 MB
EVAL: [0/875] Data 0.894 (0.894) Elapsed 0m 1s (remain 15m 16s) Loss: 0.4784(0.4784) 


Epoch 1 - avg_train_loss: 0.4932  avg_val_loss: 0.4685  time: 1379s
Epoch 1 - Score: 0.8555
Epoch 1 - Save Best Score: 0.8555 Model
Epoch 1 - Save Best Loss: 0.4685 Model





Epoch: [2/5][0/7000] Loss: 0.4856(0.4856) Grad: 1.6071  LR: 0.000075  Elapsed: 0m 3s (remain 372m 57s) Max mem: 360 MB
Epoch: [2/5][1000/7000] Loss: 0.4699(0.4532) Grad: 1.2263  LR: 0.000075  Elapsed: 2m 55s (remain 17m 29s) Max mem: 360 MB
Epoch: [2/5][2000/7000] Loss: 0.5427(0.4521) Grad: 1.9811  LR: 0.000075  Elapsed: 5m 46s (remain 14m 24s) Max mem: 360 MB
Epoch: [2/5][3000/7000] Loss: 0.4607(0.4507) Grad: 1.4607  LR: 0.000075  Elapsed: 8m 35s (remain 11m 27s) Max mem: 360 MB
Epoch: [2/5][4000/7000] Loss: 0.4444(0.4496) Grad: 1.4444  LR: 0.000075  Elapsed: 11m 26s (remain 8m 34s) Max mem: 360 MB
Epoch: [2/5][5000/7000] Loss: 0.4533(0.4487) Grad: 0.9315  LR: 0.000075  Elapsed: 14m 18s (remain 5m 43s) Max mem: 360 MB
Epoch: [2/5][6000/7000] Loss: 0.5151(0.4473) Grad: 1.0166  LR: 0.000075  Elapsed: 17m 9s (remain 2m 51s) Max mem: 360 MB
EVAL: [0/875] Data 0.764 (0.764) Elapsed 0m 1s (remain 14m 57s) Loss: 0.4442(0.4442) 


Epoch 2 - avg_train_loss: 0.4470  avg_val_loss: 0.4403  time: 1397s
Epoch 2 - Score: 0.8609
Epoch 2 - Save Best Score: 0.8609 Model
Epoch 2 - Save Best Loss: 0.4403 Model





Epoch: [3/5][0/7000] Loss: 0.5015(0.5015) Grad: 2.5163  LR: 0.000025  Elapsed: 0m 3s (remain 359m 23s) Max mem: 360 MB
Epoch: [3/5][1000/7000] Loss: 0.4084(0.4352) Grad: 1.6139  LR: 0.000025  Elapsed: 2m 59s (remain 17m 55s) Max mem: 360 MB
Epoch: [3/5][2000/7000] Loss: 0.4614(0.4342) Grad: 1.6815  LR: 0.000025  Elapsed: 5m 54s (remain 14m 45s) Max mem: 360 MB
Epoch: [3/5][3000/7000] Loss: 0.4740(0.4338) Grad: 1.4107  LR: 0.000025  Elapsed: 8m 49s (remain 11m 45s) Max mem: 360 MB
Epoch: [3/5][4000/7000] Loss: 0.5007(0.4333) Grad: 1.7480  LR: 0.000025  Elapsed: 11m 45s (remain 8m 48s) Max mem: 360 MB
Epoch: [3/5][5000/7000] Loss: 0.4307(0.4328) Grad: 1.4282  LR: 0.000025  Elapsed: 14m 40s (remain 5m 51s) Max mem: 360 MB
Epoch: [3/5][6000/7000] Loss: 0.4405(0.4316) Grad: 1.6904  LR: 0.000025  Elapsed: 17m 34s (remain 2m 55s) Max mem: 360 MB
EVAL: [0/875] Data 0.663 (0.663) Elapsed 0m 0s (remain 12m 25s) Loss: 0.4389(0.4389) 


Epoch 3 - avg_train_loss: 0.4315  avg_val_loss: 0.4352  time: 1412s
Epoch 3 - Score: 0.8657
Epoch 3 - Save Best Score: 0.8657 Model
Epoch 3 - Save Best Loss: 0.4352 Model





Epoch: [4/5][0/7000] Loss: 0.4954(0.4954) Grad: 2.7415  LR: 0.000000  Elapsed: 0m 2s (remain 338m 15s) Max mem: 360 MB
Epoch: [4/5][1000/7000] Loss: 0.4396(0.4290) Grad: 1.6756  LR: 0.000000  Elapsed: 2m 56s (remain 17m 40s) Max mem: 360 MB
Epoch: [4/5][2000/7000] Loss: 0.4510(0.4287) Grad: 1.3065  LR: 0.000000  Elapsed: 5m 51s (remain 14m 39s) Max mem: 360 MB
Epoch: [4/5][3000/7000] Loss: 0.4706(0.4281) Grad: 2.8171  LR: 0.000000  Elapsed: 8m 46s (remain 11m 41s) Max mem: 360 MB
Epoch: [4/5][4000/7000] Loss: 0.4061(0.4278) Grad: 2.5705  LR: 0.000000  Elapsed: 11m 41s (remain 8m 45s) Max mem: 360 MB
Epoch: [4/5][5000/7000] Loss: 0.4839(0.4274) Grad: 1.7287  LR: 0.000000  Elapsed: 14m 35s (remain 5m 49s) Max mem: 360 MB
Epoch: [4/5][6000/7000] Loss: 0.4142(0.4262) Grad: 1.6384  LR: 0.000000  Elapsed: 17m 30s (remain 2m 54s) Max mem: 360 MB
EVAL: [0/875] Data 0.685 (0.685) Elapsed 0m 0s (remain 14m 6s) Loss: 0.4415(0.4415) 


Epoch 4 - avg_train_loss: 0.4257  avg_val_loss: 0.4388  time: 1409s
Epoch 4 - Score: 0.8661
Epoch 4 - Save Best Score: 0.8661 Model





Epoch: [5/5][0/7000] Loss: 0.4041(0.4041) Grad: 1.9349  LR: 0.000025  Elapsed: 0m 3s (remain 427m 25s) Max mem: 360 MB
Epoch: [5/5][1000/7000] Loss: 0.4642(0.4284) Grad: 1.8024  LR: 0.000025  Elapsed: 2m 58s (remain 17m 51s) Max mem: 360 MB
Epoch: [5/5][2000/7000] Loss: 0.4045(0.4285) Grad: 1.2149  LR: 0.000025  Elapsed: 5m 52s (remain 14m 41s) Max mem: 360 MB
Epoch: [5/5][3000/7000] Loss: 0.5134(0.4282) Grad: 2.0348  LR: 0.000025  Elapsed: 8m 48s (remain 11m 44s) Max mem: 360 MB
Epoch: [5/5][4000/7000] Loss: 0.4078(0.4278) Grad: 1.2354  LR: 0.000025  Elapsed: 11m 43s (remain 8m 47s) Max mem: 360 MB
Epoch: [5/5][5000/7000] Loss: 0.5160(0.4275) Grad: 2.5174  LR: 0.000025  Elapsed: 14m 37s (remain 5m 50s) Max mem: 360 MB
Epoch: [5/5][6000/7000] Loss: 0.3974(0.4266) Grad: 1.6143  LR: 0.000025  Elapsed: 17m 32s (remain 2m 55s) Max mem: 360 MB
EVAL: [0/875] Data 0.722 (0.722) Elapsed 0m 0s (remain 12m 59s) Loss: 0.4270(0.4270) 


Epoch 5 - avg_train_loss: 0.4268  avg_val_loss: 0.4312  time: 1412s
Epoch 5 - Score: 0.8664
Epoch 5 - Save Best Score: 0.8664 Model
Epoch 5 - Save Best Loss: 0.4312 Model
Score: 0.8664
Score: 0.8651


## Inference

In [15]:
states = []
for fold  in CFG.trn_fold:
    states.append(torch.load(os.path.join(SAVEDIR, f'{CFG.model_name}_fold{fold}_best_score.pth')))

In [16]:
gcs_paths = []
for i, j in [(0, 4), (5, 9)]:
    path = f"g2net-waveform-tfrecords-test-{i}-{j}"
    n_trial = 0
    while True:
        try:
            gcs_path = KaggleDatasets().get_gcs_path(path)
            gcs_paths.append(gcs_path)
            print(gcs_path)
            break
        except:
            if n_trial > 10:
                break
            n_trial += 1
            continue
            
all_files = []
for path in gcs_paths:
    all_files.extend(np.sort(np.array(tf.io.gfile.glob(path + "/test*.tfrecords"))))
    
print("test_files: ", len(all_files))
all_files = np.array(all_files)

gs://kds-3af0f2c792ef9e6f5d3c87bb80c99a263fc9707106cd595aaf02e3eb
gs://kds-9eb3135b732342c0aec8339381fec9fd19f8a7ad94ca9c31ad51fe2b
test_files:  10


In [17]:
model= CNN1d()
model.to(device)

wave_ids = []
probs_all = []

for fold, state in enumerate(states):
    tqdm.write(f"\n\nFold{fold}")
    
    model.load_state_dict(state['model'])
    model.eval()
    probs = []

    test_loader = TFRecordDataLoader(all_files, batch_size=CFG.val_batch_size, 
                                     shuffle=False, labeled=False)

    for i, d in tqdm(enumerate(test_loader), total=len(test_loader)):
        x = bandpass(d[0], **CFG.bandpass_params)
        x = torch.from_numpy(x).to(device)

        with torch.no_grad():
            y_preds = model(x)
        preds = y_preds.sigmoid().to('cpu').numpy()
        probs.append(preds)

        if fold==0: # same test loader, no need to do this the second time
            wave_ids.append(d[1].astype('U13'))

    probs = np.concatenate(probs)
    probs_all.append(probs)

probs_avg = np.asarray(probs_all).mean(axis=0).flatten()
wave_ids = np.concatenate(wave_ids)

  0%|          | 0/2250 [00:00<?, ?it/s]



Fold0


2260it [06:43,  5.60it/s]
  0%|          | 0/2250 [00:00<?, ?it/s]



Fold1


2260it [06:40,  5.64it/s]
  0%|          | 0/2250 [00:00<?, ?it/s]



Fold2


2260it [06:28,  5.82it/s]
  0%|          | 0/2250 [00:00<?, ?it/s]



Fold3


2260it [06:31,  5.77it/s]


In [18]:
test_df = pd.DataFrame({'id': wave_ids, 'target': probs_avg})
# Save test dataframe to disk
folds = '_'.join([str(s) for s in CFG.trn_fold])
test_df.to_csv(f'{CFG.model_name}_folds_{folds}.csv', index = False)