In [1]:
import secrets

import easydict
import matplotlib.pyplot as plt
import torch
from torch.cuda.amp import autocast, GradScaler
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from tqdm import tqdm
import wandb

2024-06-23 18:05:40.989208: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-23 18:05:40.989333: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-23 18:05:41.116050: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
import os
from kaggle_secrets import UserSecretsClient

In [3]:
# Get Wandb API key from Kaggle Secrets
user_secrets = UserSecretsClient()
wandb_api_key = user_secrets.get_secret("wandb_api_key")


In [4]:
# Set Wandb API key as an environment variable
os.environ["WANDB_API_KEY"] = wandb_api_key

In [5]:
# Initialize wandb
wandb.login()  # This will now use the API key from the environment variable
wandb.init(project="lstmvae-moving-mnist", entity="ryukijano")

[34m[1mwandb[0m: Currently logged in as: [33mryukijano[0m ([33mhack-the-thong[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Currently logged in as: [33mryukijano[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.17.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.17.0
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20240623_180554-hgyt6xei[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mtrim-rain-5[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/ryukijano/lstmvae-moving-mnist[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/ryukijano/lstmvae-moving-mnist/runs/hgyt6xei[0m


In [6]:


import torch
from torch import nn
from torch.nn import functional as F


class Encoder(nn.Module):
    def __init__(self, input_size=4096, hidden_size=1024, num_layers=2):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(
            input_size,
            hidden_size,
            num_layers,
            batch_first=True,
            bidirectional=False,
        )

    def forward(self, x):
        # x: tensor of shape (batch_size, seq_length, hidden_size)
        outputs, (hidden, cell) = self.lstm(x)
        return (hidden, cell)


class Decoder(nn.Module):
    def __init__(
        self, input_size=4096, hidden_size=1024, output_size=4096, num_layers=2
    ):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(
            input_size,
            hidden_size,
            num_layers,
            batch_first=True,
            bidirectional=False,
        )
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        # x: tensor of shape (batch_size, seq_length, hidden_size)
        output, (hidden, cell) = self.lstm(x, hidden)
        prediction = self.fc(output)
        return prediction, (hidden, cell)


class LSTMVAE(nn.Module):
    """LSTM-based Variational Auto Encoder"""

    def __init__(
        self, input_size, hidden_size, latent_size, device=torch.device("cuda")
    ):
        """
        input_size: int, batch_size x sequence_length x input_dim
        hidden_size: int, output size of LSTM AE
        latent_size: int, latent z-layer size
        num_lstm_layer: int, number of layers in LSTM
        """
        super(LSTMVAE, self).__init__()
        self.device = device

        # dimensions
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.latent_size = latent_size
        self.num_layers = 1

        # lstm ae
        self.lstm_enc = Encoder(
            input_size=input_size, hidden_size=hidden_size, num_layers=self.num_layers
        )
        self.lstm_dec = Decoder(
            input_size=latent_size,
            output_size=input_size,
            hidden_size=hidden_size,
            num_layers=self.num_layers,
        )

        self.fc21 = nn.Linear(self.hidden_size, self.latent_size)
        self.fc22 = nn.Linear(self.hidden_size, self.latent_size)
        self.fc3 = nn.Linear(self.latent_size, self.hidden_size)

    def reparametize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        noise = torch.randn_like(std).to(self.device)

        z = mu + noise * std
        return z

    def forward(self, x):
        batch_size, seq_len, feature_dim = x.shape

        # encode input space to hidden space
        enc_hidden = self.lstm_enc(x)
        enc_h = enc_hidden[0].view(self.num_layers, batch_size, self.hidden_size).to(self.device)
        enc_c = enc_hidden[1].view(self.num_layers, batch_size, self.hidden_size).to(self.device)

        # extract latent variable z(hidden space to latent space)
        mean = self.fc21(enc_h[-1])
        logvar = self.fc22(enc_h[-1])
        z = self.reparametize(mean, logvar)  # batch_size x latent_size

        # initialize hidden state as inputs
        h_ = self.fc3(z).view(self.num_layers, batch_size, self.hidden_size)
        c_ = torch.zeros_like(h_)
        
        # decode latent space to input space
        z = z.unsqueeze(1).repeat(1, seq_len, 1)
        z = z.view(batch_size, seq_len, self.latent_size).to(self.device)

        # initialize hidden state
        hidden = (h_.contiguous(), c_.contiguous())
        reconstruct_output, hidden = self.lstm_dec(z, hidden)

        x_hat = reconstruct_output

        # calculate vae loss
        losses = self.loss_function(x_hat, x, mean, logvar)
        m_loss, recon_loss, kld_loss = losses["loss"], losses["Reconstruction_Loss"], losses["KLD"]

        return m_loss, x_hat, (recon_loss, kld_loss)

    def loss_function(self, *args, **kwargs) -> dict:
        """
        Computes the VAE loss function.
        KL(N(\mu, \sigma), N(0, 1)) = \log \frac{1}{\sigma} + \frac{\sigma^2 + \mu^2}{2} - \frac{1}{2}
        :param args:
        :param kwargs:
        :return:
        """
        recons = args[0]
        input = args[1]
        mu = args[2]
        log_var = args[3]

        kld_weight = 0.00025  # Account for the minibatch samples from the dataset
        recons_loss = F.mse_loss(recons, input)

        kld_loss = torch.mean(
            -0.5 * torch.sum(1 + log_var - mu**2 - log_var.exp(), dim=1), dim=0
        )

        loss = recons_loss + kld_weight * kld_loss
        return {
            "loss": loss,
            "Reconstruction_Loss": recons_loss.detach(),
            "KLD": -kld_loss.detach(),
        }


class LSTMAE(nn.Module):
    """LSTM-based Auto Encoder"""

    def __init__(self, input_size, hidden_size, latent_size, device=torch.device("cuda")):
        """
        input_size: int, batch_size x sequence_length x input_dim
        hidden_size: int, output size of LSTM AE
        latent_size: int, latent z-layer size
        num_lstm_layer: int, number of layers in LSTM
        """
        super(LSTMAE, self).__init__()
        self.device = device

        # dimensions
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.latent_size = latent_size

        # lstm ae
        self.lstm_enc = Encoder(
            input_size=input_size,
            hidden_size=hidden_size,
        )
        self.lstm_dec = Decoder(
            input_size=input_size,
            output_size=input_size,
            hidden_size=hidden_size,
        )

        self.criterion = nn.MSELoss()

    def forward(self, x):
        batch_size, seq_len, feature_dim = x.shape

        enc_hidden = self.lstm_enc(x)

        temp_input = torch.zeros((batch_size, seq_len, feature_dim), dtype=torch.float).to(
            self.device
        )
        hidden = enc_hidden
        reconstruct_output, hidden = self.lstm_dec(temp_input, hidden)
        reconstruct_loss = self.criterion(reconstruct_output, x)

        return reconstruct_loss, reconstruct_output, (0, 0)


In [7]:
# import torch
# from torch import nn
# from torch.nn import functional as F

# class ConvLSTMCell(nn.Module):
#     def __init__(self, input_channels, hidden_channels, kernel_size):
#         super(ConvLSTMCell, self).__init__()
        
#         self.input_channels = input_channels
#         self.hidden_channels = hidden_channels
#         self.kernel_size = kernel_size
#         self.padding = kernel_size // 2
        
#         self.conv = nn.Conv2d(
#             in_channels=self.input_channels + self.hidden_channels,
#             out_channels=4 * self.hidden_channels,
#             kernel_size=self.kernel_size,
#             padding=self.padding,
#             bias=True
#         )

#     def forward(self, x, h, c):
#         combined = torch.cat([x, h], dim=1)
#         combined_conv = self.conv(combined)
#         cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_channels, dim=1)
#         i = torch.sigmoid(cc_i)
#         f = torch.sigmoid(cc_f)
#         o = torch.sigmoid(cc_o)
#         g = torch.tanh(cc_g)
        
#         c_next = f * c + i * g
#         h_next = o * torch.tanh(c_next)
        
#         return h_next, c_next

# class ConvLSTMEncoder(nn.Module):
#     def __init__(self, input_channels, hidden_channels, kernel_size):
#         super(ConvLSTMEncoder, self).__init__()
#         self.convlstm = ConvLSTMCell(input_channels, hidden_channels, kernel_size)
    
#     def forward(self, x):
#         batch_size, seq_len, channels, height, width = x.size()
#         h = torch.zeros(batch_size, self.convlstm.hidden_channels, height, width).to(x.device)
#         c = torch.zeros(batch_size, self.convlstm.hidden_channels, height, width).to(x.device)
        
#         for t in range(seq_len):
#             h, c = self.convlstm(x[:, t, :, :, :], h, c)
        
#         return h, c

# class ConvLSTMDecoder(nn.Module):
#     def __init__(self, input_channels, hidden_channels, output_channels, kernel_size):
#         super(ConvLSTMDecoder, self).__init__()
#         self.convlstm = ConvLSTMCell(input_channels, hidden_channels, kernel_size)
#         self.conv_out = nn.Conv2d(hidden_channels, output_channels, kernel_size=3, padding=1)
    
#     def forward(self, x, h, c, seq_len):
#         outputs = []
        
#         for _ in range(seq_len):
#             h, c = self.convlstm(x, h, c)
#             output = self.conv_out(h)
#             outputs.append(output)
#             x = output
        
#         return torch.stack(outputs, dim=1)

# class CONVLSTMVAE(nn.Module):
#     def __init__(self, input_channels, hidden_channels, latent_size, kernel_size=3):
#         super(CONVLSTMVAE, self).__init__()
        
#         self.encoder = ConvLSTMEncoder(input_channels, hidden_channels, kernel_size)
#         self.decoder = ConvLSTMDecoder(input_channels, hidden_channels, input_channels, kernel_size)
        
#         self.fc_mu = nn.Linear(hidden_channels * 64 * 64, latent_size)  # Assuming 64x64 spatial dimensions
#         self.fc_logvar = nn.Linear(hidden_channels * 64 * 64, latent_size)
#         self.fc_decode = nn.Linear(latent_size, hidden_channels * 64 * 64)
        
#     def reparameterize(self, mu, logvar):
#         std = torch.exp(0.5 * logvar)
#         eps = torch.randn_like(std)
#         return mu + eps * std
    
#     def forward(self, x):
#         batch_size, seq_len, channels, height, width = x.size()
        
#         # Encode
#         h, c = self.encoder(x)
#         h_flat = h.view(batch_size, -1)
        
#         # VAE bottleneck
#         mu = self.fc_mu(h_flat)
#         logvar = self.fc_logvar(h_flat)
#         z = self.reparameterize(mu, logvar)
        
#         # Decode
#         h_decoded = self.fc_decode(z).view(batch_size, -1, height, width)
#         c_decoded = torch.zeros_like(h_decoded)
#         x_decoded = torch.zeros(batch_size, channels, height, width).to(x.device)
        
#         output = self.decoder(x_decoded, h_decoded, c_decoded, seq_len)
        
#         return output, mu, logvar
    
#     def loss_function(self, recon_x, x, mu, logvar):
#         BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
#         KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
#         return BCE + KLD

# # Example usage
# input_channels = 3  # For RGB videos
# hidden_channels = 64
# latent_size = 128
# model = CONVLSTMVAE(input_channels, hidden_channels, latent_size)

# # Assuming input shape: (batch_size, sequence_length, channels, height, width)
# sample_input = torch.randn(16, 10, 3, 64, 64)
# output, mu, logvar = model(sample_input)

# print(f"Input shape: {sample_input.shape}")
# print(f"Output shape: {output.shape}")
# print(f"Mu shape: {mu.shape}")
# print(f"Logvar shape: {logvar.shape}")

In [8]:
from __future__ import print_function

import codecs
import errno
import os
import os.path

import numpy as np
import torch
import torch.utils.data as data
from PIL import Image


class MovingMNIST(data.Dataset):
    """`MovingMNIST <http://www.cs.toronto.edu/~nitish/unsupervised_video/>`_ Dataset.

    Args:
        root (string): Root directory of dataset where ``processed/training.pt``
            and  ``processed/test.pt`` exist.
        train (bool, optional): If True, creates dataset from ``training.pt``,
            otherwise from ``test.pt``.
        split (int, optional): Train/test split size. Number defines how many samples
            belong to test set.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
        transform (callable, optional): A function/transform that takes in an PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in an PIL
            image and returns a transformed version. E.g, ``transforms.RandomCrop``
    """

    urls = ["https://github.com/tychovdo/MovingMNIST/raw/master/mnist_test_seq.npy.gz"]
    raw_folder = "raw"
    processed_folder = "processed"
    training_file = "moving_mnist_train.pt"
    test_file = "moving_mnist_test.pt"

    def __init__(
        self,
        root,
        train=True,
        split=1000,
        transform=None,
        target_transform=None,
        download=False,
    ):
        self.root = os.path.expanduser(root)
        self.transform = transform
        self.target_transform = target_transform
        self.split = split
        self.train = train  # training set or test set

        if download:
            self.download()

        if not self._check_exists():
            raise RuntimeError(
                "Dataset not found." + " You can use download=True to download it"
            )

        if self.train:
            self.train_data = torch.load(
                os.path.join(self.root, self.processed_folder, self.training_file)
            )
        else:
            self.test_data = torch.load(
                os.path.join(self.root, self.processed_folder, self.test_file)
            )

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (seq, target) where sampled sequences are splitted into a seq
                    and target part
        """

        # need to iterate over time
        def _transform_time(data):
            new_data = None
            for i in range(data.size(0)):
                img = Image.fromarray(data[i].numpy(), mode="L")
                new_data = (
                    self.transform(img)
                    if new_data is None
                    else torch.cat([self.transform(img), new_data], dim=0)
                )
            return new_data

        if self.train:
            seq, target = self.train_data[index, :10], self.train_data[index, 10:]
        else:
            seq, target = self.test_data[index, :10], self.test_data[index, 10:]

        if self.transform is not None:
            seq = _transform_time(seq)
        if self.target_transform is not None:
            target = _transform_time(target)

        return seq, target

    def __len__(self):
        if self.train:
            return len(self.train_data)
        else:
            return len(self.test_data)

    def _check_exists(self):
        return os.path.exists(
            os.path.join(self.root, self.processed_folder, self.training_file)
        ) and os.path.exists(
            os.path.join(self.root, self.processed_folder, self.test_file)
        )

    def download(self):
        """Download the Moving MNIST data if it doesn't exist in processed_folder already."""
        import gzip

        from six.moves import urllib

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            print("Downloading " + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition("/")[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            with open(file_path, "wb") as f:
                f.write(data.read())
            with open(file_path.replace(".gz", ""), "wb") as out_f, gzip.GzipFile(
                file_path
            ) as zip_f:
                out_f.write(zip_f.read())
            os.unlink(file_path)

        # process and save as torch files
        print("Processing...")

        training_set = torch.from_numpy(
            np.load(
                os.path.join(self.root, self.raw_folder, "mnist_test_seq.npy")
            ).swapaxes(0, 1)[: -self.split]
        )
        test_set = torch.from_numpy(
            np.load(
                os.path.join(self.root, self.raw_folder, "mnist_test_seq.npy")
            ).swapaxes(0, 1)[-self.split :]
        )

        with open(
            os.path.join(self.root, self.processed_folder, self.training_file), "wb"
        ) as f:
            torch.save(training_set, f)
        with open(
            os.path.join(self.root, self.processed_folder, self.test_file), "wb"
        ) as f:
            torch.save(test_set, f)

        print("Done!")

    def __repr__(self):
        fmt_str = "Dataset " + self.__class__.__name__ + "\n"
        fmt_str += "    Number of datapoints: {}\n".format(self.__len__())
        tmp = "train" if self.train is True else "test"
        fmt_str += "    Train/test: {}\n".format(tmp)
        fmt_str += "    Root Location: {}\n".format(self.root)
        tmp = "    Transforms (if any): "
        fmt_str += "{0}{1}\n".format(
            tmp, self.transform.__repr__().replace("\n", "\n" + " " * len(tmp))
        )
        tmp = "    Target Transforms (if any): "
        fmt_str += "{0}{1}".format(
            tmp, self.target_transform.__repr__().replace("\n", "\n" + " " * len(tmp))
        )
        return fmt_str


In [9]:


import secrets

import easydict
import matplotlib.pyplot as plt
import torch
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from tqdm import tqdm

writer = SummaryWriter()

## visualization
def imshow(past_data, title="MovingMNIST"):
    num_img = len(past_data)
    fig = plt.figure(figsize=(4 * num_img, 4))

    for idx in range(1, num_img + 1):
        ax = fig.add_subplot(1, num_img + 1, idx)
        ax.imshow(past_data[idx - 1])
    plt.suptitle(title, fontsize=30)
    plt.savefig(f"{title}")
    plt.close()

def visualize_reconstructions(model, test_loader, device, epoch):
    model.eval()
    with torch.no_grad():
        for i, batch_data in enumerate(test_loader):
            future_data, past_data = batch_data
            batch_size = past_data.size(0)
            example_size = past_data.size(1)
            image_size = past_data.size(2), past_data.size(3)
            past_data = past_data.view(batch_size, example_size, -1).float().to(device)
            
            _, recon_x, _ = model(past_data)
            
            if i == 0:
                n_examples = min(10, batch_size)
                examples = past_data[:n_examples].cpu().view(n_examples, example_size, image_size[0], -1)
                recon_examples = recon_x[:n_examples].cpu().view(n_examples, example_size, image_size[0], -1)

                fig, axes = plt.subplots(2, n_examples, figsize=(20, 4))
                for j in range(n_examples):
                    axes[0, j].imshow(examples[j, 0], cmap='gray')
                    axes[0, j].axis('off')
                    axes[1, j].imshow(recon_examples[j, 0], cmap='gray')
                    axes[1, j].axis('off')
                plt.suptitle(f"Epoch {epoch}: Original (top) vs Reconstructed (bottom)")
                plt.savefig(f"reconstruction_epoch_{epoch}.png")
                wandb.log({"reconstructions": wandb.Image(plt)})
                plt.close()
                break

def train(args, model, train_loader, test_loader):
    # optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    
    #creating a GradScaler for automatic mixed precision
    scaler = GradScaler()

    ## interation setup
    epochs = tqdm(range(args.max_iter // len(train_loader) + 1))

    ## training
    count = 0
    for epoch in epochs:
        model.train()
        optimizer.zero_grad()
        train_iterator = tqdm(
            enumerate(train_loader), total=len(train_loader), desc="training"
        )

        for i, batch_data in train_iterator:

            if count > args.max_iter:
                return model
            count += 1

            future_data, past_data = batch_data

            ## reshape
            batch_size = past_data.size(0)
            example_size = past_data.size(1)
            image_size = past_data.size(2), past_data.size(3)
            past_data = (
                past_data.view(batch_size, example_size, -1).float().to(args.device)
            )

            #casting operations to mixed precision
            with autocast():
                mloss, recon_x, info = model(past_data)
            
            #scaling the loss for better gradient flow
            scaler.scale(mloss.mean()).backward()
            
            #unscales the gradients before optimization
            scaler.step(optimizer)
            
            #Update the GradScaler state
            scaler.update()


            train_iterator.set_postfix({"train_loss": float(mloss.mean())})
        writer.add_scalar("train_loss", float(mloss.mean()), epoch)
        wandb.log({"train_loss": float(mloss.mean())})
        
        model.eval()
        eval_loss = 0
        test_iterator = tqdm(
            enumerate(test_loader), total=len(test_loader), desc="testing"
        )

        with torch.no_grad():
            for i, batch_data in test_iterator:
                future_data, past_data = batch_data

                ## reshape
                batch_size = past_data.size(0)
                example_size = past_data.size(1)
                past_data = (
                    past_data.view(batch_size, example_size, -1).float().to(args.device)
                )

                #using autocast for validation
                with autocast():
                    mloss, recon_x, info = model(past_data)

                eval_loss += mloss.mean().item()

                test_iterator.set_postfix({"eval_loss": float(mloss.mean())})

        eval_loss = eval_loss / len(test_loader)
        writer.add_scalar("eval_loss", float(eval_loss), epoch)
        wandb.log({"eval_loss": float(eval_loss)})
        print("Evaluation Score : [{}]".format(eval_loss))

        # Visualize reconstructions every 10 epochs
        if epoch % 1 == 0:
            visualize_reconstructions(model, test_loader, args.device, epoch)

    return model


if __name__ == "__main__":

    # training dataset
    train_set = MovingMNIST(
        root=".data/mnist",
        train=True,
        download=True,
        transform=transforms.ToTensor(),
        target_transform=transforms.ToTensor(),
    )

    # test dataset
    test_set = MovingMNIST(
        root=".data/mnist",
        train=False,
        download=True,
        transform=transforms.ToTensor(),
        target_transform=transforms.ToTensor(),
    )

    args = easydict.EasyDict(
        {
            "batch_size": 512,
            "device": torch.device("cuda")
            if torch.cuda.is_available()
            else torch.device("cpu"),
            "input_size": 4096,
            "hidden_size": 2048,
            "latent_size": 1024,
            "learning_rate": 0.001,
            "max_iter": 1000,
        }
    )

    batch_size = args.batch_size
    input_size = args.input_size
    hidden_size = args.hidden_size
    latent_size = args.latent_size

    # define LSTM-based VAE model
    model = LSTMVAE(input_size, hidden_size, latent_size, device=args.device)
    model.to(args.device)

    # convert to format of data loader
    train_loader = torch.utils.data.DataLoader(
        dataset=train_set, batch_size=args.batch_size, shuffle=True
    )
    test_loader = torch.utils.data.DataLoader(
        dataset=test_set, batch_size=args.batch_size, shuffle=False
    )

    # training
    trained_model = train(args, model, train_loader, test_loader)

    # save model
    id_ = secrets.token_hex(nbytes=4)
    model_path = f"lstmvae{id_}.model"
    torch.save(trained_model.state_dict(), f"lstmvae{id_}.model")
    wandb.save(model_path)

    # load model
    model_to_load = LSTMVAE(input_size, hidden_size, latent_size, device=args.device)
    model_to_load.to(args.device)
    model_to_load.load_state_dict(torch.load(f"lstmvae{id_}.model"))
    model_to_load.eval()

    # show results
    ## past_data, future_data -> shape: (10,10)
    future_data, past_data = train_set[0]

    ## reshape
    example_size = past_data.size(0)
    image_size = past_data.size(1), past_data.size(2)
    past_data = past_data.view(example_size, -1).float().to(args.device)
    _, recon_data, info = model_to_load(past_data.unsqueeze(0))

    nhw_orig = past_data.view(example_size, image_size[0], -1).cpu()
    nhw_recon = (
        recon_data.squeeze(0)
        .view(example_size, image_size[0], -1)
        .detach()
        .cpu()
        .numpy()
    )

    imshow(nhw_orig, title=f"final_input{id_}")
    imshow(nhw_recon, title=f"final_output{id_}")
    plt.show()
    wandb.finish()

Downloading https://github.com/tychovdo/MovingMNIST/raw/master/mnist_test_seq.npy.gz
Processing...
Done!


  0%|          | 0/56 [00:00<?, ?it/s]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:02<?, ?it/s, train_loss=0.0528][A
training:   6%|▌         | 1/18 [00:02<00:40,  2.36s/it, train_loss=0.0528][A
training:   6%|▌         | 1/18 [00:04<00:40,  2.36s/it, train_loss=0.0506][A
training:  11%|█         | 2/18 [00:04<00:34,  2.13s/it, train_loss=0.0506][A
training:  11%|█         | 2/18 [00:06<00:34,  2.13s/it, train_loss=0.0478][A
training:  17%|█▋        | 3/18 [00:06<00:30,  2.01s/it, train_loss=0.0478][A
training:  17%|█▋        | 3/18 [00:07<00:30,  2.01s/it, train_loss=0.0468][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.92s/it, train_loss=0.0468][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.92s/it, train_loss=0.0442][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.90s/it, train_loss=0.0442][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.90s/it, train_loss=0.0435][A
training:  33%|███▎      | 6/18 [00:11<00:22,  1

Evaluation Score : [0.03954252786934376]


  2%|▏         | 1/56 [00:38<35:06, 38.30s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0394][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.84s/it, train_loss=0.0394][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.84s/it, train_loss=0.039] [A
training:  11%|█         | 2/18 [00:03<00:29,  1.83s/it, train_loss=0.039][A
training:  11%|█         | 2/18 [00:05<00:29,  1.83s/it, train_loss=0.0392][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.86s/it, train_loss=0.0392][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.86s/it, train_loss=0.0394][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.83s/it, train_loss=0.0394][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.83s/it, train_loss=0.0395][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.83s/it, train_loss=0.0395][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.83s/it, train_loss=0.0389][A
training:  33%|███▎      | 6/18 [00:11<00

Evaluation Score : [0.03764459304511547]


  4%|▎         | 2/56 [01:16<34:16, 38.08s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0378][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.83s/it, train_loss=0.0378][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.83s/it, train_loss=0.0371][A
training:  11%|█         | 2/18 [00:03<00:29,  1.83s/it, train_loss=0.0371][A
training:  11%|█         | 2/18 [00:05<00:29,  1.83s/it, train_loss=0.0377][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.83s/it, train_loss=0.0377][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.83s/it, train_loss=0.0377][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.81s/it, train_loss=0.0377][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.81s/it, train_loss=0.0372][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.81s/it, train_loss=0.0372][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.81s/it, train_loss=0.0376][A
training:  33%|███▎      | 6/18 [00:10<0

Evaluation Score : [0.03705732896924019]


  5%|▌         | 3/56 [01:53<33:29, 37.92s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0371][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.79s/it, train_loss=0.0371][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.79s/it, train_loss=0.0372][A
training:  11%|█         | 2/18 [00:03<00:28,  1.79s/it, train_loss=0.0372][A
training:  11%|█         | 2/18 [00:05<00:28,  1.79s/it, train_loss=0.0368][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.83s/it, train_loss=0.0368][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.83s/it, train_loss=0.0368][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.81s/it, train_loss=0.0368][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.81s/it, train_loss=0.0365][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.80s/it, train_loss=0.0365][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.80s/it, train_loss=0.0367][A
training:  33%|███▎      | 6/18 [00:10<0

Evaluation Score : [0.036653004586696625]


  7%|▋         | 4/56 [02:31<32:51, 37.90s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0367][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.81s/it, train_loss=0.0367][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.81s/it, train_loss=0.0367][A
training:  11%|█         | 2/18 [00:03<00:28,  1.79s/it, train_loss=0.0367][A
training:  11%|█         | 2/18 [00:05<00:28,  1.79s/it, train_loss=0.0363][A
training:  17%|█▋        | 3/18 [00:05<00:26,  1.80s/it, train_loss=0.0363][A
training:  17%|█▋        | 3/18 [00:07<00:26,  1.80s/it, train_loss=0.0366][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.81s/it, train_loss=0.0366][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.81s/it, train_loss=0.0364][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.82s/it, train_loss=0.0364][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.82s/it, train_loss=0.0368][A
training:  33%|███▎      | 6/18 [00:10<0

Evaluation Score : [0.03627078793942928]


  9%|▉         | 5/56 [03:09<32:08, 37.82s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0368][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.85s/it, train_loss=0.0368][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.85s/it, train_loss=0.0364][A
training:  11%|█         | 2/18 [00:03<00:29,  1.82s/it, train_loss=0.0364][A
training:  11%|█         | 2/18 [00:05<00:29,  1.82s/it, train_loss=0.0359][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.81s/it, train_loss=0.0359][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.81s/it, train_loss=0.0362][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.81s/it, train_loss=0.0362][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.81s/it, train_loss=0.0358][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.83s/it, train_loss=0.0358][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.83s/it, train_loss=0.0363][A
training:  33%|███▎      | 6/18 [00:11<0

Evaluation Score : [0.036012567579746246]


 11%|█         | 6/56 [03:47<31:33, 37.87s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0366][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.88s/it, train_loss=0.0366][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.88s/it, train_loss=0.0363][A
training:  11%|█         | 2/18 [00:03<00:29,  1.87s/it, train_loss=0.0363][A
training:  11%|█         | 2/18 [00:05<00:29,  1.87s/it, train_loss=0.0358][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.84s/it, train_loss=0.0358][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.84s/it, train_loss=0.0357][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.82s/it, train_loss=0.0357][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.82s/it, train_loss=0.0359][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.82s/it, train_loss=0.0359][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.82s/it, train_loss=0.0357][A
training:  33%|███▎      | 6/18 [00:10<0

Evaluation Score : [0.03589075058698654]


 12%|█▎        | 7/56 [04:25<30:55, 37.86s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0357][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.80s/it, train_loss=0.0357][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.80s/it, train_loss=0.0361][A
training:  11%|█         | 2/18 [00:03<00:28,  1.81s/it, train_loss=0.0361][A
training:  11%|█         | 2/18 [00:05<00:28,  1.81s/it, train_loss=0.0359][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.81s/it, train_loss=0.0359][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.81s/it, train_loss=0.0354][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.80s/it, train_loss=0.0354][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.80s/it, train_loss=0.0357][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.80s/it, train_loss=0.0357][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.80s/it, train_loss=0.0364][A
training:  33%|███▎      | 6/18 [00:10<0

Evaluation Score : [0.035823965445160866]


 14%|█▍        | 8/56 [05:02<30:13, 37.77s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0362][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.80s/it, train_loss=0.0362][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.80s/it, train_loss=0.0358][A
training:  11%|█         | 2/18 [00:03<00:28,  1.78s/it, train_loss=0.0358][A
training:  11%|█         | 2/18 [00:05<00:28,  1.78s/it, train_loss=0.0358][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.82s/it, train_loss=0.0358][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.82s/it, train_loss=0.036] [A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.81s/it, train_loss=0.036][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.81s/it, train_loss=0.0351][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.82s/it, train_loss=0.0351][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.82s/it, train_loss=0.0357][A
training:  33%|███▎      | 6/18 [00:10<00

Evaluation Score : [0.03566562011837959]


 16%|█▌        | 9/56 [05:40<29:35, 37.77s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0357][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.84s/it, train_loss=0.0357][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.84s/it, train_loss=0.036] [A
training:  11%|█         | 2/18 [00:03<00:29,  1.83s/it, train_loss=0.036][A
training:  11%|█         | 2/18 [00:05<00:29,  1.83s/it, train_loss=0.0354][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.84s/it, train_loss=0.0354][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.84s/it, train_loss=0.0354][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.82s/it, train_loss=0.0354][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.82s/it, train_loss=0.0356][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.84s/it, train_loss=0.0356][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.84s/it, train_loss=0.0359][A
training:  33%|███▎      | 6/18 [00:10<00

Evaluation Score : [0.03567342460155487]


 18%|█▊        | 10/56 [06:18<28:57, 37.77s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0355][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.86s/it, train_loss=0.0355][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.86s/it, train_loss=0.0355][A
training:  11%|█         | 2/18 [00:03<00:29,  1.83s/it, train_loss=0.0355][A
training:  11%|█         | 2/18 [00:05<00:29,  1.83s/it, train_loss=0.0359][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.83s/it, train_loss=0.0359][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.83s/it, train_loss=0.0354][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.82s/it, train_loss=0.0354][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.82s/it, train_loss=0.0361][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.91s/it, train_loss=0.0361][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.91s/it, train_loss=0.0353][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03561978414654732]


 20%|█▉        | 11/56 [06:56<28:28, 37.97s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:02<?, ?it/s, train_loss=0.0358][A
training:   6%|▌         | 1/18 [00:02<00:35,  2.10s/it, train_loss=0.0358][A
training:   6%|▌         | 1/18 [00:03<00:35,  2.10s/it, train_loss=0.0351][A
training:  11%|█         | 2/18 [00:03<00:31,  1.94s/it, train_loss=0.0351][A
training:  11%|█         | 2/18 [00:05<00:31,  1.94s/it, train_loss=0.0354][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.91s/it, train_loss=0.0354][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.91s/it, train_loss=0.0352][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.87s/it, train_loss=0.0352][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.87s/it, train_loss=0.0358][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.87s/it, train_loss=0.0358][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.87s/it, train_loss=0.0355][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03553598187863827]


 21%|██▏       | 12/56 [07:35<27:58, 38.14s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0354][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.84s/it, train_loss=0.0354][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.84s/it, train_loss=0.0356][A
training:  11%|█         | 2/18 [00:03<00:29,  1.85s/it, train_loss=0.0356][A
training:  11%|█         | 2/18 [00:05<00:29,  1.85s/it, train_loss=0.0357][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.86s/it, train_loss=0.0357][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.86s/it, train_loss=0.0355][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.85s/it, train_loss=0.0355][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.85s/it, train_loss=0.0351][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.85s/it, train_loss=0.0351][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.85s/it, train_loss=0.0349][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.035524748265743256]


 23%|██▎       | 13/56 [08:13<27:24, 38.25s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0354][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.87s/it, train_loss=0.0354][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.87s/it, train_loss=0.0356][A
training:  11%|█         | 2/18 [00:03<00:29,  1.86s/it, train_loss=0.0356][A
training:  11%|█         | 2/18 [00:05<00:29,  1.86s/it, train_loss=0.0354][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.86s/it, train_loss=0.0354][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.86s/it, train_loss=0.0351][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.84s/it, train_loss=0.0351][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.84s/it, train_loss=0.0352][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.83s/it, train_loss=0.0352][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.83s/it, train_loss=0.0359][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.035446323454380035]


 25%|██▌       | 14/56 [08:51<26:43, 38.17s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.035][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.82s/it, train_loss=0.035][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.82s/it, train_loss=0.0355][A
training:  11%|█         | 2/18 [00:03<00:29,  1.82s/it, train_loss=0.0355][A
training:  11%|█         | 2/18 [00:05<00:29,  1.82s/it, train_loss=0.0357][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.83s/it, train_loss=0.0357][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.83s/it, train_loss=0.0355][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.86s/it, train_loss=0.0355][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.86s/it, train_loss=0.0351][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.86s/it, train_loss=0.0351][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.86s/it, train_loss=0.0359][A
training:  33%|███▎      | 6/18 [00:11<00

Evaluation Score : [0.035229237750172615]


 27%|██▋       | 15/56 [09:29<26:02, 38.12s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0354][A
training:   6%|▌         | 1/18 [00:01<00:32,  1.88s/it, train_loss=0.0354][A
training:   6%|▌         | 1/18 [00:03<00:32,  1.88s/it, train_loss=0.0355][A
training:  11%|█         | 2/18 [00:03<00:29,  1.84s/it, train_loss=0.0355][A
training:  11%|█         | 2/18 [00:05<00:29,  1.84s/it, train_loss=0.0349][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.90s/it, train_loss=0.0349][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.90s/it, train_loss=0.0349][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.87s/it, train_loss=0.0349][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.87s/it, train_loss=0.0351][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.86s/it, train_loss=0.0351][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.86s/it, train_loss=0.0354][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03520158305764198]


 29%|██▊       | 16/56 [10:08<25:28, 38.21s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0347][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.84s/it, train_loss=0.0347][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.84s/it, train_loss=0.0348][A
training:  11%|█         | 2/18 [00:03<00:29,  1.82s/it, train_loss=0.0348][A
training:  11%|█         | 2/18 [00:05<00:29,  1.82s/it, train_loss=0.0347][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.85s/it, train_loss=0.0347][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.85s/it, train_loss=0.035] [A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.83s/it, train_loss=0.035][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.83s/it, train_loss=0.0358][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.84s/it, train_loss=0.0358][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.84s/it, train_loss=0.0354][A
training:  33%|███▎      | 6/18 [00:11<0

Evaluation Score : [0.03519012965261936]


 30%|███       | 17/56 [10:47<24:58, 38.41s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.035][A
training:   6%|▌         | 1/18 [00:01<00:33,  1.96s/it, train_loss=0.035][A
training:   6%|▌         | 1/18 [00:03<00:33,  1.96s/it, train_loss=0.0348][A
training:  11%|█         | 2/18 [00:03<00:30,  1.90s/it, train_loss=0.0348][A
training:  11%|█         | 2/18 [00:05<00:30,  1.90s/it, train_loss=0.0353][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.89s/it, train_loss=0.0353][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.89s/it, train_loss=0.035] [A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.88s/it, train_loss=0.035][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.88s/it, train_loss=0.0351][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.88s/it, train_loss=0.0351][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.88s/it, train_loss=0.0353][A
training:  33%|███▎      | 6/18 [00:11<00:

Evaluation Score : [0.03510313108563423]


 32%|███▏      | 18/56 [11:25<24:23, 38.51s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0353][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.84s/it, train_loss=0.0353][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.84s/it, train_loss=0.0344][A
training:  11%|█         | 2/18 [00:03<00:29,  1.87s/it, train_loss=0.0344][A
training:  11%|█         | 2/18 [00:05<00:29,  1.87s/it, train_loss=0.0352][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.87s/it, train_loss=0.0352][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.87s/it, train_loss=0.0357][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.85s/it, train_loss=0.0357][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.85s/it, train_loss=0.0352][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.85s/it, train_loss=0.0352][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.85s/it, train_loss=0.0351][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03502357006072998]


 34%|███▍      | 19/56 [12:04<23:44, 38.51s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0348][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.86s/it, train_loss=0.0348][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.86s/it, train_loss=0.0349][A
training:  11%|█         | 2/18 [00:03<00:30,  1.93s/it, train_loss=0.0349][A
training:  11%|█         | 2/18 [00:05<00:30,  1.93s/it, train_loss=0.0347][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.91s/it, train_loss=0.0347][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.91s/it, train_loss=0.0348][A
training:  22%|██▏       | 4/18 [00:07<00:27,  1.93s/it, train_loss=0.0348][A
training:  22%|██▏       | 4/18 [00:09<00:27,  1.93s/it, train_loss=0.0349][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.91s/it, train_loss=0.0349][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.91s/it, train_loss=0.0351][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.0348779633641243]


 36%|███▌      | 20/56 [12:43<23:10, 38.62s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0348][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.86s/it, train_loss=0.0348][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.86s/it, train_loss=0.0352][A
training:  11%|█         | 2/18 [00:03<00:29,  1.85s/it, train_loss=0.0352][A
training:  11%|█         | 2/18 [00:05<00:29,  1.85s/it, train_loss=0.0354][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.86s/it, train_loss=0.0354][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.86s/it, train_loss=0.0348][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.88s/it, train_loss=0.0348][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.88s/it, train_loss=0.0346][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.87s/it, train_loss=0.0346][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.87s/it, train_loss=0.0347][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.034815436229109764]


 38%|███▊      | 21/56 [13:21<22:31, 38.62s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0349][A
training:   6%|▌         | 1/18 [00:01<00:32,  1.90s/it, train_loss=0.0349][A
training:   6%|▌         | 1/18 [00:03<00:32,  1.90s/it, train_loss=0.0353][A
training:  11%|█         | 2/18 [00:03<00:29,  1.87s/it, train_loss=0.0353][A
training:  11%|█         | 2/18 [00:05<00:29,  1.87s/it, train_loss=0.0341][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.85s/it, train_loss=0.0341][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.85s/it, train_loss=0.0343][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.86s/it, train_loss=0.0343][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.86s/it, train_loss=0.0346][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.85s/it, train_loss=0.0346][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.85s/it, train_loss=0.0345][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.0348187405616045]


 39%|███▉      | 22/56 [14:00<21:50, 38.54s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0347][A
training:   6%|▌         | 1/18 [00:01<00:32,  1.89s/it, train_loss=0.0347][A
training:   6%|▌         | 1/18 [00:03<00:32,  1.89s/it, train_loss=0.0344][A
training:  11%|█         | 2/18 [00:03<00:29,  1.84s/it, train_loss=0.0344][A
training:  11%|█         | 2/18 [00:05<00:29,  1.84s/it, train_loss=0.0347][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.83s/it, train_loss=0.0347][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.83s/it, train_loss=0.0347][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.85s/it, train_loss=0.0347][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.85s/it, train_loss=0.0344][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.86s/it, train_loss=0.0344][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.86s/it, train_loss=0.0347][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.034749699756503105]


 41%|████      | 23/56 [14:38<21:11, 38.52s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.035][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.86s/it, train_loss=0.035][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.86s/it, train_loss=0.0348][A
training:  11%|█         | 2/18 [00:03<00:29,  1.83s/it, train_loss=0.0348][A
training:  11%|█         | 2/18 [00:05<00:29,  1.83s/it, train_loss=0.0343][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.85s/it, train_loss=0.0343][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.85s/it, train_loss=0.0341][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.85s/it, train_loss=0.0341][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.85s/it, train_loss=0.0347][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.92s/it, train_loss=0.0347][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.92s/it, train_loss=0.0345][A
training:  33%|███▎      | 6/18 [00:11<00

Evaluation Score : [0.034514689818024635]


 43%|████▎     | 24/56 [15:17<20:37, 38.67s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:02<?, ?it/s, train_loss=0.0345][A
training:   6%|▌         | 1/18 [00:02<00:36,  2.12s/it, train_loss=0.0345][A
training:   6%|▌         | 1/18 [00:03<00:36,  2.12s/it, train_loss=0.0343][A
training:  11%|█         | 2/18 [00:04<00:31,  1.98s/it, train_loss=0.0343][A
training:  11%|█         | 2/18 [00:05<00:31,  1.98s/it, train_loss=0.0343][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.93s/it, train_loss=0.0343][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.93s/it, train_loss=0.0341][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.89s/it, train_loss=0.0341][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.89s/it, train_loss=0.0346][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.89s/it, train_loss=0.0346][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.89s/it, train_loss=0.0345][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03449227847158909]


 45%|████▍     | 25/56 [15:56<20:01, 38.76s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0344][A
training:   6%|▌         | 1/18 [00:01<00:32,  1.90s/it, train_loss=0.0344][A
training:   6%|▌         | 1/18 [00:03<00:32,  1.90s/it, train_loss=0.0343][A
training:  11%|█         | 2/18 [00:03<00:29,  1.85s/it, train_loss=0.0343][A
training:  11%|█         | 2/18 [00:05<00:29,  1.85s/it, train_loss=0.0345][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.87s/it, train_loss=0.0345][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.87s/it, train_loss=0.0347][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.85s/it, train_loss=0.0347][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.85s/it, train_loss=0.0345][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.85s/it, train_loss=0.0345][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.85s/it, train_loss=0.0347][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03440648131072521]


 46%|████▋     | 26/56 [16:35<19:20, 38.70s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0346][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.88s/it, train_loss=0.0346][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.88s/it, train_loss=0.0337][A
training:  11%|█         | 2/18 [00:03<00:30,  1.89s/it, train_loss=0.0337][A
training:  11%|█         | 2/18 [00:05<00:30,  1.89s/it, train_loss=0.0345][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.89s/it, train_loss=0.0345][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.89s/it, train_loss=0.0341][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.87s/it, train_loss=0.0341][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.87s/it, train_loss=0.0339][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.88s/it, train_loss=0.0339][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.88s/it, train_loss=0.0346][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.034380875527858734]


 48%|████▊     | 27/56 [17:14<18:43, 38.73s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0343][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.88s/it, train_loss=0.0343][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.88s/it, train_loss=0.0342][A
training:  11%|█         | 2/18 [00:03<00:29,  1.85s/it, train_loss=0.0342][A
training:  11%|█         | 2/18 [00:05<00:29,  1.85s/it, train_loss=0.0345][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.86s/it, train_loss=0.0345][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.86s/it, train_loss=0.0343][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.90s/it, train_loss=0.0343][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.90s/it, train_loss=0.0342][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.88s/it, train_loss=0.0342][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.88s/it, train_loss=0.0339][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03433830477297306]


 50%|█████     | 28/56 [17:53<18:06, 38.79s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0345][A
training:   6%|▌         | 1/18 [00:01<00:32,  1.89s/it, train_loss=0.0345][A
training:   6%|▌         | 1/18 [00:03<00:32,  1.89s/it, train_loss=0.0342][A
training:  11%|█         | 2/18 [00:03<00:31,  1.97s/it, train_loss=0.0342][A
training:  11%|█         | 2/18 [00:05<00:31,  1.97s/it, train_loss=0.0339][A
training:  17%|█▋        | 3/18 [00:05<00:29,  1.95s/it, train_loss=0.0339][A
training:  17%|█▋        | 3/18 [00:07<00:29,  1.95s/it, train_loss=0.0341][A
training:  22%|██▏       | 4/18 [00:07<00:27,  1.93s/it, train_loss=0.0341][A
training:  22%|██▏       | 4/18 [00:09<00:27,  1.93s/it, train_loss=0.0342][A
training:  28%|██▊       | 5/18 [00:09<00:25,  1.94s/it, train_loss=0.0342][A
training:  28%|██▊       | 5/18 [00:11<00:25,  1.94s/it, train_loss=0.0346][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03429035097360611]


 52%|█████▏    | 29/56 [18:32<17:31, 38.93s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0339][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.87s/it, train_loss=0.0339][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.87s/it, train_loss=0.0343][A
training:  11%|█         | 2/18 [00:03<00:30,  1.88s/it, train_loss=0.0343][A
training:  11%|█         | 2/18 [00:05<00:30,  1.88s/it, train_loss=0.0341][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.87s/it, train_loss=0.0341][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.87s/it, train_loss=0.0342][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.86s/it, train_loss=0.0342][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.86s/it, train_loss=0.0343][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.87s/it, train_loss=0.0343][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.87s/it, train_loss=0.034] [A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03413352929055691]


 54%|█████▎    | 30/56 [19:11<16:54, 39.00s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0342][A
training:   6%|▌         | 1/18 [00:01<00:32,  1.90s/it, train_loss=0.0342][A
training:   6%|▌         | 1/18 [00:03<00:32,  1.90s/it, train_loss=0.0339][A
training:  11%|█         | 2/18 [00:03<00:29,  1.83s/it, train_loss=0.0339][A
training:  11%|█         | 2/18 [00:05<00:29,  1.83s/it, train_loss=0.034] [A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.80s/it, train_loss=0.034][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.80s/it, train_loss=0.034][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.83s/it, train_loss=0.034][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.83s/it, train_loss=0.0342][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.83s/it, train_loss=0.0342][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.83s/it, train_loss=0.0341][A
training:  33%|███▎      | 6/18 [00:10<00:

Evaluation Score : [0.03412444517016411]


 55%|█████▌    | 31/56 [19:49<16:07, 38.69s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0341][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.82s/it, train_loss=0.0341][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.82s/it, train_loss=0.0339][A
training:  11%|█         | 2/18 [00:03<00:29,  1.83s/it, train_loss=0.0339][A
training:  11%|█         | 2/18 [00:05<00:29,  1.83s/it, train_loss=0.034] [A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.83s/it, train_loss=0.034][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.83s/it, train_loss=0.0332][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.86s/it, train_loss=0.0332][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.86s/it, train_loss=0.0346][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.84s/it, train_loss=0.0346][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.84s/it, train_loss=0.0343][A
training:  33%|███▎      | 6/18 [00:11<0

Evaluation Score : [0.03413606993854046]


 57%|█████▋    | 32/56 [20:26<15:20, 38.36s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:02<?, ?it/s, train_loss=0.0338][A
training:   6%|▌         | 1/18 [00:02<00:34,  2.00s/it, train_loss=0.0338][A
training:   6%|▌         | 1/18 [00:03<00:34,  2.00s/it, train_loss=0.0343][A
training:  11%|█         | 2/18 [00:03<00:29,  1.86s/it, train_loss=0.0343][A
training:  11%|█         | 2/18 [00:05<00:29,  1.86s/it, train_loss=0.0337][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.91s/it, train_loss=0.0337][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.91s/it, train_loss=0.0339][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.85s/it, train_loss=0.0339][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.85s/it, train_loss=0.0341][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.85s/it, train_loss=0.0341][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.85s/it, train_loss=0.0341][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.034095410257577896]


 59%|█████▉    | 33/56 [21:05<14:45, 38.48s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.034][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.87s/it, train_loss=0.034][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.87s/it, train_loss=0.0335][A
training:  11%|█         | 2/18 [00:03<00:29,  1.85s/it, train_loss=0.0335][A
training:  11%|█         | 2/18 [00:05<00:29,  1.85s/it, train_loss=0.0343][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.83s/it, train_loss=0.0343][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.83s/it, train_loss=0.0338][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.82s/it, train_loss=0.0338][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.82s/it, train_loss=0.0334][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.81s/it, train_loss=0.0334][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.81s/it, train_loss=0.034] [A
training:  33%|███▎      | 6/18 [00:10<00

Evaluation Score : [0.03405788168311119]


 61%|██████    | 34/56 [21:43<14:01, 38.24s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0331][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.80s/it, train_loss=0.0331][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.80s/it, train_loss=0.0341][A
training:  11%|█         | 2/18 [00:03<00:28,  1.78s/it, train_loss=0.0341][A
training:  11%|█         | 2/18 [00:05<00:28,  1.78s/it, train_loss=0.034] [A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.80s/it, train_loss=0.034][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.80s/it, train_loss=0.0335][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.79s/it, train_loss=0.0335][A
training:  22%|██▏       | 4/18 [00:08<00:25,  1.79s/it, train_loss=0.0338][A
training:  28%|██▊       | 5/18 [00:08<00:23,  1.79s/it, train_loss=0.0338][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.79s/it, train_loss=0.0338][A
training:  33%|███▎      | 6/18 [00:10<0

Evaluation Score : [0.034056851640343666]


 62%|██████▎   | 35/56 [22:21<13:19, 38.06s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0343][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.77s/it, train_loss=0.0343][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.77s/it, train_loss=0.0336][A
training:  11%|█         | 2/18 [00:03<00:28,  1.79s/it, train_loss=0.0336][A
training:  11%|█         | 2/18 [00:05<00:28,  1.79s/it, train_loss=0.0339][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.81s/it, train_loss=0.0339][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.81s/it, train_loss=0.0333][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.81s/it, train_loss=0.0333][A
training:  22%|██▏       | 4/18 [00:08<00:25,  1.81s/it, train_loss=0.0338][A
training:  28%|██▊       | 5/18 [00:08<00:23,  1.80s/it, train_loss=0.0338][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.80s/it, train_loss=0.0337][A
training:  33%|███▎      | 6/18 [00:10<

Evaluation Score : [0.03396954573690891]


 64%|██████▍   | 36/56 [22:58<12:37, 37.90s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0338][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.83s/it, train_loss=0.0338][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.83s/it, train_loss=0.034] [A
training:  11%|█         | 2/18 [00:03<00:29,  1.84s/it, train_loss=0.034][A
training:  11%|█         | 2/18 [00:05<00:29,  1.84s/it, train_loss=0.0334][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.83s/it, train_loss=0.0334][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.83s/it, train_loss=0.0335][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.81s/it, train_loss=0.0335][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.81s/it, train_loss=0.034] [A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.81s/it, train_loss=0.034][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.81s/it, train_loss=0.0342][A
training:  33%|███▎      | 6/18 [00:11<00

Evaluation Score : [0.034054212272167206]


 66%|██████▌   | 37/56 [23:36<11:59, 37.85s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0337][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.82s/it, train_loss=0.0337][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.82s/it, train_loss=0.0339][A
training:  11%|█         | 2/18 [00:03<00:29,  1.82s/it, train_loss=0.0339][A
training:  11%|█         | 2/18 [00:05<00:29,  1.82s/it, train_loss=0.0337][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.84s/it, train_loss=0.0337][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.84s/it, train_loss=0.0338][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.82s/it, train_loss=0.0338][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.82s/it, train_loss=0.0338][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.81s/it, train_loss=0.0338][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.81s/it, train_loss=0.0338][A
training:  33%|███▎      | 6/18 [00:10<

Evaluation Score : [0.03399459831416607]


 68%|██████▊   | 38/56 [24:14<11:20, 37.80s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0337][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.79s/it, train_loss=0.0337][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.79s/it, train_loss=0.0336][A
training:  11%|█         | 2/18 [00:03<00:29,  1.82s/it, train_loss=0.0336][A
training:  11%|█         | 2/18 [00:05<00:29,  1.82s/it, train_loss=0.0336][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.86s/it, train_loss=0.0336][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.86s/it, train_loss=0.0334][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.83s/it, train_loss=0.0334][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.83s/it, train_loss=0.0335][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.82s/it, train_loss=0.0335][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.82s/it, train_loss=0.034] [A
training:  33%|███▎      | 6/18 [00:10<

Evaluation Score : [0.03397636488080025]


 70%|██████▉   | 39/56 [24:52<10:43, 37.87s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0335][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.81s/it, train_loss=0.0335][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.81s/it, train_loss=0.0343][A
training:  11%|█         | 2/18 [00:03<00:29,  1.83s/it, train_loss=0.0343][A
training:  11%|█         | 2/18 [00:05<00:29,  1.83s/it, train_loss=0.0339][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.84s/it, train_loss=0.0339][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.84s/it, train_loss=0.0342][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.84s/it, train_loss=0.0342][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.84s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.83s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.83s/it, train_loss=0.0339][A
training:  33%|███▎      | 6/18 [00:10<

Evaluation Score : [0.033964915201067924]


 71%|███████▏  | 40/56 [25:29<10:05, 37.86s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0336][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.79s/it, train_loss=0.0336][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.79s/it, train_loss=0.0339][A
training:  11%|█         | 2/18 [00:03<00:28,  1.77s/it, train_loss=0.0339][A
training:  11%|█         | 2/18 [00:05<00:28,  1.77s/it, train_loss=0.0338][A
training:  17%|█▋        | 3/18 [00:05<00:26,  1.77s/it, train_loss=0.0338][A
training:  17%|█▋        | 3/18 [00:07<00:26,  1.77s/it, train_loss=0.0341][A
training:  22%|██▏       | 4/18 [00:07<00:24,  1.78s/it, train_loss=0.0341][A
training:  22%|██▏       | 4/18 [00:09<00:24,  1.78s/it, train_loss=0.0338][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.82s/it, train_loss=0.0338][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.82s/it, train_loss=0.0337][A
training:  33%|███▎      | 6/18 [00:10<

Evaluation Score : [0.03379441238939762]


 73%|███████▎  | 41/56 [26:07<09:27, 37.85s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0335][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.84s/it, train_loss=0.0335][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.84s/it, train_loss=0.0337][A
training:  11%|█         | 2/18 [00:03<00:28,  1.81s/it, train_loss=0.0337][A
training:  11%|█         | 2/18 [00:05<00:28,  1.81s/it, train_loss=0.0337][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.83s/it, train_loss=0.0337][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.83s/it, train_loss=0.0342][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.81s/it, train_loss=0.0342][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.81s/it, train_loss=0.0334][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.84s/it, train_loss=0.0334][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.84s/it, train_loss=0.0335][A
training:  33%|███▎      | 6/18 [00:10<

Evaluation Score : [0.03380509838461876]


 75%|███████▌  | 42/56 [26:45<08:49, 37.79s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0343][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.81s/it, train_loss=0.0343][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.81s/it, train_loss=0.0342][A
training:  11%|█         | 2/18 [00:03<00:30,  1.90s/it, train_loss=0.0342][A
training:  11%|█         | 2/18 [00:05<00:30,  1.90s/it, train_loss=0.0336][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.86s/it, train_loss=0.0336][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.86s/it, train_loss=0.0337][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.84s/it, train_loss=0.0337][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.84s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.84s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.84s/it, train_loss=0.0339][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03381691314280033]


 77%|███████▋  | 43/56 [27:23<08:11, 37.84s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0337][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.80s/it, train_loss=0.0337][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.80s/it, train_loss=0.0338][A
training:  11%|█         | 2/18 [00:03<00:28,  1.81s/it, train_loss=0.0338][A
training:  11%|█         | 2/18 [00:05<00:28,  1.81s/it, train_loss=0.0336][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.86s/it, train_loss=0.0336][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.86s/it, train_loss=0.0335][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.83s/it, train_loss=0.0335][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.83s/it, train_loss=0.0335][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.82s/it, train_loss=0.0335][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.82s/it, train_loss=0.0337][A
training:  33%|███▎      | 6/18 [00:10<

Evaluation Score : [0.03378703631460667]


 79%|███████▊  | 44/56 [28:02<07:37, 38.12s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0337][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.86s/it, train_loss=0.0337][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.86s/it, train_loss=0.0336][A
training:  11%|█         | 2/18 [00:03<00:29,  1.85s/it, train_loss=0.0336][A
training:  11%|█         | 2/18 [00:05<00:29,  1.85s/it, train_loss=0.0333][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.83s/it, train_loss=0.0333][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.83s/it, train_loss=0.0333][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.84s/it, train_loss=0.0333][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.84s/it, train_loss=0.0338][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.84s/it, train_loss=0.0338][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.84s/it, train_loss=0.0333][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.033769963309168816]


 80%|████████  | 45/56 [28:40<06:59, 38.13s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.033][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.86s/it, train_loss=0.033][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.86s/it, train_loss=0.0336][A
training:  11%|█         | 2/18 [00:03<00:29,  1.84s/it, train_loss=0.0336][A
training:  11%|█         | 2/18 [00:05<00:29,  1.84s/it, train_loss=0.0335][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.82s/it, train_loss=0.0335][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.82s/it, train_loss=0.034] [A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.81s/it, train_loss=0.034][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.81s/it, train_loss=0.0336][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.83s/it, train_loss=0.0336][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.83s/it, train_loss=0.0338][A
training:  33%|███▎      | 6/18 [00:11<00:

Evaluation Score : [0.033778032287955284]


 82%|████████▏ | 46/56 [29:18<06:20, 38.08s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0334][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.82s/it, train_loss=0.0334][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.82s/it, train_loss=0.0338][A
training:  11%|█         | 2/18 [00:03<00:29,  1.82s/it, train_loss=0.0338][A
training:  11%|█         | 2/18 [00:05<00:29,  1.82s/it, train_loss=0.0332][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.82s/it, train_loss=0.0332][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.82s/it, train_loss=0.0336][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.90s/it, train_loss=0.0336][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.90s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.86s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.86s/it, train_loss=0.0335][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.0336117148399353]


 84%|████████▍ | 47/56 [29:56<05:42, 38.09s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.034][A
training:   6%|▌         | 1/18 [00:02<00:33,  2.00s/it, train_loss=0.034][A
training:   6%|▌         | 1/18 [00:03<00:33,  2.00s/it, train_loss=0.0336][A
training:  11%|█         | 2/18 [00:03<00:30,  1.88s/it, train_loss=0.0336][A
training:  11%|█         | 2/18 [00:05<00:30,  1.88s/it, train_loss=0.0334][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.87s/it, train_loss=0.0334][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.87s/it, train_loss=0.0338][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.85s/it, train_loss=0.0338][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.85s/it, train_loss=0.0332][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.84s/it, train_loss=0.0332][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.84s/it, train_loss=0.0334][A
training:  33%|███▎      | 6/18 [00:11<00

Evaluation Score : [0.033625489100813866]


 86%|████████▌ | 48/56 [30:34<05:04, 38.05s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0334][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.82s/it, train_loss=0.0334][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.82s/it, train_loss=0.0334][A
training:  11%|█         | 2/18 [00:03<00:29,  1.87s/it, train_loss=0.0334][A
training:  11%|█         | 2/18 [00:05<00:29,  1.87s/it, train_loss=0.0333][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.87s/it, train_loss=0.0333][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.87s/it, train_loss=0.0335][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.85s/it, train_loss=0.0335][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.85s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.84s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.84s/it, train_loss=0.0334][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03356829285621643]


 88%|████████▊ | 49/56 [31:12<04:27, 38.16s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0339][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.84s/it, train_loss=0.0339][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.84s/it, train_loss=0.033] [A
training:  11%|█         | 2/18 [00:03<00:29,  1.84s/it, train_loss=0.033][A
training:  11%|█         | 2/18 [00:05<00:29,  1.84s/it, train_loss=0.0333][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.85s/it, train_loss=0.0333][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.85s/it, train_loss=0.0336][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.84s/it, train_loss=0.0336][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.84s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.84s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:10<00:23,  1.84s/it, train_loss=0.0336][A
training:  33%|███▎      | 6/18 [00:10<0

Evaluation Score : [0.03365252912044525]


 89%|████████▉ | 50/56 [31:51<03:49, 38.23s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0333][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.84s/it, train_loss=0.0333][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.84s/it, train_loss=0.0337][A
training:  11%|█         | 2/18 [00:03<00:29,  1.83s/it, train_loss=0.0337][A
training:  11%|█         | 2/18 [00:05<00:29,  1.83s/it, train_loss=0.0327][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.91s/it, train_loss=0.0327][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.91s/it, train_loss=0.0334][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.90s/it, train_loss=0.0334][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.90s/it, train_loss=0.0335][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.89s/it, train_loss=0.0335][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.89s/it, train_loss=0.0338][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03369782119989395]


 91%|█████████ | 51/56 [32:29<03:11, 38.30s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0335][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.81s/it, train_loss=0.0335][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.81s/it, train_loss=0.0337][A
training:  11%|█         | 2/18 [00:03<00:30,  1.88s/it, train_loss=0.0337][A
training:  11%|█         | 2/18 [00:05<00:30,  1.88s/it, train_loss=0.0339][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.86s/it, train_loss=0.0339][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.86s/it, train_loss=0.0329][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.83s/it, train_loss=0.0329][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.83s/it, train_loss=0.0336][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.84s/it, train_loss=0.0336][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.84s/it, train_loss=0.033] [A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.033544110134243965]


 93%|█████████▎| 52/56 [33:07<02:32, 38.24s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0333][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.85s/it, train_loss=0.0333][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.85s/it, train_loss=0.0335][A
training:  11%|█         | 2/18 [00:03<00:29,  1.83s/it, train_loss=0.0335][A
training:  11%|█         | 2/18 [00:05<00:29,  1.83s/it, train_loss=0.0336][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.83s/it, train_loss=0.0336][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.83s/it, train_loss=0.033] [A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.82s/it, train_loss=0.033][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.82s/it, train_loss=0.0333][A
training:  28%|██▊       | 5/18 [00:09<00:23,  1.84s/it, train_loss=0.0333][A
training:  28%|██▊       | 5/18 [00:11<00:23,  1.84s/it, train_loss=0.0332][A
training:  33%|███▎      | 6/18 [00:11<0

Evaluation Score : [0.033643221482634544]


 95%|█████████▍| 53/56 [33:46<01:54, 38.28s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0331][A
training:   6%|▌         | 1/18 [00:01<00:31,  1.84s/it, train_loss=0.0331][A
training:   6%|▌         | 1/18 [00:03<00:31,  1.84s/it, train_loss=0.0332][A
training:  11%|█         | 2/18 [00:03<00:29,  1.87s/it, train_loss=0.0332][A
training:  11%|█         | 2/18 [00:05<00:29,  1.87s/it, train_loss=0.0343][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.86s/it, train_loss=0.0343][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.86s/it, train_loss=0.0339][A
training:  22%|██▏       | 4/18 [00:07<00:25,  1.85s/it, train_loss=0.0339][A
training:  22%|██▏       | 4/18 [00:09<00:25,  1.85s/it, train_loss=0.0341][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.85s/it, train_loss=0.0341][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.85s/it, train_loss=0.0333][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.03365586884319782]


 96%|█████████▋| 54/56 [34:24<01:16, 38.35s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0331][A
training:   6%|▌         | 1/18 [00:01<00:30,  1.81s/it, train_loss=0.0331][A
training:   6%|▌         | 1/18 [00:03<00:30,  1.81s/it, train_loss=0.0334][A
training:  11%|█         | 2/18 [00:03<00:28,  1.80s/it, train_loss=0.0334][A
training:  11%|█         | 2/18 [00:05<00:28,  1.80s/it, train_loss=0.0336][A
training:  17%|█▋        | 3/18 [00:05<00:27,  1.81s/it, train_loss=0.0336][A
training:  17%|█▋        | 3/18 [00:07<00:27,  1.81s/it, train_loss=0.0335][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.89s/it, train_loss=0.0335][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.89s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.88s/it, train_loss=0.0337][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.88s/it, train_loss=0.0331][A
training:  33%|███▎      | 6/18 [00:11<

Evaluation Score : [0.033516500145196915]


 98%|█████████▊| 55/56 [35:02<00:38, 38.38s/it]
training:   0%|          | 0/18 [00:00<?, ?it/s][A
training:   0%|          | 0/18 [00:01<?, ?it/s, train_loss=0.0335][A
training:   6%|▌         | 1/18 [00:01<00:32,  1.93s/it, train_loss=0.0335][A
training:   6%|▌         | 1/18 [00:03<00:32,  1.93s/it, train_loss=0.033] [A
training:  11%|█         | 2/18 [00:03<00:29,  1.87s/it, train_loss=0.033][A
training:  11%|█         | 2/18 [00:05<00:29,  1.87s/it, train_loss=0.0333][A
training:  17%|█▋        | 3/18 [00:05<00:28,  1.87s/it, train_loss=0.0333][A
training:  17%|█▋        | 3/18 [00:07<00:28,  1.87s/it, train_loss=0.0337][A
training:  22%|██▏       | 4/18 [00:07<00:26,  1.89s/it, train_loss=0.0337][A
training:  22%|██▏       | 4/18 [00:09<00:26,  1.89s/it, train_loss=0.033] [A
training:  28%|██▊       | 5/18 [00:09<00:24,  1.90s/it, train_loss=0.033][A
training:  28%|██▊       | 5/18 [00:11<00:24,  1.90s/it, train_loss=0.0332][A
training:  33%|███▎      | 6/18 [00:11<00