<a href="https://colab.research.google.com/github/R12942159/NTU_DLCV/blob/Hw2/p1_ModelA_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import os
import torch
import numpy as np
import pandas as pd
from torch import nn
from PIL import Image
from tqdm import tqdm
import torch.nn.functional as F
import torchvision.transforms as tr
from torchvision.utils import save_image, make_grid

#### Download Mnist data

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install gsutil
!gsutil cp /content/drive/MyDrive/NTU_DLCV/Hw2/hw2_data.zip /content/hw2_data.zip

In [None]:
!unzip /content/hw2_data.zip

In [4]:
train_csv = pd.read_csv('/content/hw2_data/digits/mnistm/train.csv').values.tolist()
val_csv = pd.read_csv('/content/hw2_data/digits/mnistm/val.csv').values.tolist()

In [7]:
len(train_csv) + len(val_csv)

56000

#### Get cuda from GPU

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using: {device}")

Using: cuda


#### Construct a Dataset

In [9]:
class MnistDataset(torch.utils.data.Dataset):
    def __init__(self, train_csv: list, test_csv: list, join_path: str, transform) -> None:
        self.transform = transform
        self.img_paths = []
        self.img_labels = []

        for data_csv in [train_csv, test_csv]:
            for row in data_csv:
                self.img_paths.append(os.path.join(join_path, row[0]))
                self.img_labels.append(row[1])
        assert len(self.img_paths) == len(self.img_labels)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx) -> (torch.Tensor, int):
        img_path = self.img_paths[idx]
        img = Image.open(img_path).convert('RGB')
        img = self.transform(img)

        label = self.img_labels[idx]
        return img, label

In [10]:
BATCH_SIZE = 256

mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

dataset = MnistDataset(train_csv,
                  val_csv,
                  join_path = '/content/hw2_data/digits/mnistm/data',
                  transform = tr.Compose([
                      tr.ToTensor(),
                      tr.Normalize(mean=mean, std=std),
                  ]),
                  )
dataset_loader = torch.utils.data.DataLoader(dataset, BATCH_SIZE, shuffle=True, num_workers=2)



#### Build UNet Model

In [11]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, residual: bool=False) -> None:
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.residual = residual

        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1),
            nn.GroupNorm(1, out_channels),
            nn.GELU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(out_channels, out_channels, 3, 1, 1),
            nn.GroupNorm(1, out_channels),
            nn.GELU()
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x1 = self.conv1(x)
        x2 = self.conv2(x1)
        if self.residual:
            if self.in_channels == self.out_channels:
                out = x + x2
            else:
                out = x1 + x2
            return out / 1.414
        else:
            return x2


class Encoder_set(nn.Module):
    def __init__(self, in_channels, out_channels) -> None:
        super().__init__()
        self.encoder_set = nn.Sequential(
            ConvBlock(in_channels, out_channels),
            nn.MaxPool2d(2),
        )

    def forward(self, x):
        return self.encoder_set(x)


class Decoder_set(nn.Module):
    def __init__(self, in_channels, out_channels) -> None:
        super().__init__()
        self.decoder_set = nn.Sequential(
            nn.ConvTranspose2d(in_channels, out_channels, 2, 2),
            ConvBlock(out_channels, out_channels),
            ConvBlock(out_channels, out_channels)
        )

    def forward(self, x, skip_connect):
        x = torch.cat((x, skip_connect), 1)
        x = self.decoder_set(x)

        return x


class EmbeddingFC(nn.Module):
    def __init__(self, input_dim, output_dim) -> None:
        super().__init__()
        self.input_dim = input_dim
        self.embeddingfc = nn.Sequential(
            nn.Linear(input_dim, output_dim),
            nn.GELU(),
            nn.Linear(output_dim, output_dim),
        )

    def forward(self, x):
        x = x.view(-1, self.input_dim)
        x = self.embeddingfc(x)

        return x

class UNet(nn.Module):
    def __init__(self, in_channels, n_features, n_classes) -> None:
        super().__init__()
        self.in_channels = in_channels
        self.n_features = n_features
        self.n_classes = n_classes

        self.init_conv = ConvBlock(in_channels, n_features, residual=True)

        self.encoder1 = Encoder_set(n_features, 1 * n_features)
        self.encoder2 = Encoder_set(n_features, 2 * n_features)

        self.to_vec = nn.Sequential(
            nn.AvgPool2d(7),
            nn.GELU(),
        )

        self.timeembedding1 = EmbeddingFC(1, 2 * n_features)
        self.timeembedding2 = EmbeddingFC(1, 1 * n_features)

        self.contextembedding1 = EmbeddingFC(n_classes, 2 * n_features)
        self.contextembedding2 = EmbeddingFC(n_classes, 1 * n_features)

        self.decoder0 = nn.Sequential(
            nn.ConvTranspose2d(2 * n_features, 2 * n_features, 7, 7),
            nn.GroupNorm(8, 2 * n_features),
            nn.ReLU(),
        )
        self.decoder1 = Decoder_set(4 * n_features, n_features)
        self.decoder2 = Decoder_set(2 * n_features, n_features)
        self.out = nn.Sequential(
            nn.Conv2d(2 * n_features, n_features, 3, 1, 1),
            nn.GroupNorm(8, n_features),
            nn.ReLU(),
            nn.Conv2d(n_features, self.in_channels, 3, 1, 1)
        )

    def forward(self, x, c, t, context_mask):
        # x is (noisy) image, c is context label, t is timestep,
        # context_mask says which samples to block the context on

        x = self.init_conv(x)
        encode1 = self.encoder1(x)
        encode2 = self.encoder2(encode1)
        hiddenvec = self.to_vec(encode2)

        # convert to 1-hot embedding
        c = nn.functional.one_hot(c, num_classes=self.n_classes).type(torch.float)

        # mask out context if context_mask == 1
        context_mask = context_mask[:, None]
        context_mask = context_mask.repeat(1,self.n_classes)
        context_mask = (-1*(1-context_mask)) # flip 0 <-> 1
        c *= context_mask

        # embed context with time step
        c_embed1 = self.contextembedding1(c).view(-1, self.n_features * 2, 1, 1)
        t_embed1 = self.timeembedding1(t).view(-1, self.n_features * 2, 1, 1)
        c_embed2 = self.contextembedding2(c).view(-1, self.n_features * 1, 1, 1)
        t_embed2 = self.timeembedding2(t).view(-1, self.n_features * 1, 1, 1)

        decode1 = self.decoder0(hiddenvec)
        decode2 = self.decoder1(c_embed1 * decode1 + t_embed1, encode2)
        decode3 = self.decoder2(c_embed2 * decode2 + t_embed2, encode1)
        out = self.out(torch.cat((decode3, x), 1))

        return out


#### Denoising Duffusion Probabilistic Models

$
\beta_t = \beta_1 + (\beta_2 - \beta_1) * \frac{range(0, T + 1)}{T} \\
\alpha_t  = 1 - \beta_t \\
\hat{\alpha_t} = \prod_{s=1}^{t}(1 - \beta_s) = \prod_{s=1}^{t}\alpha_t
$

In [None]:
def ddpm_schedules(beta1, beta2, T):
    assert beta1 < beta2 < 1.0

    beta_t = (beta2 - beta1) * torch.arange(0, T + 1, dtype=torch.float32) / T + beta1
    sqrt_beta_t = torch.sqrt(beta_t)
    alpha_t = 1 - beta_t
    alphabar_t = torch.cumprod(alpha_t, dim=0)

    sqrtab = torch.sqrt(alphabar_t)
    oneover_sqrta = 1 / torch.sqrt(alpha_t)

    sqrtmab = torch.sqrt(1 - alphabar_t)
    mab_over_sqrtmab_inv = (1 - alpha_t) / sqrtmab

    return {
        "alpha_t": alpha_t,
        "oneover_sqrta": oneover_sqrta,
        "sqrt_beta_t": sqrt_beta_t,
        "alphabar_t": alphabar_t,
        "sqrtab": sqrtab,
        "sqrtmab": sqrtmab,
        "mab_over_sqrtmab": mab_over_sqrtmab_inv,
    }


"alpha_t": $\alpha_t$
"oneover_sqrta": $\frac{1}{\sqrt{\alpha_t}}$
"sqrt_beta_t": $\sqrt{\beta_t}$
"alphabar_t": $\bar{\alpha_t}$
"sqrtab": $\sqrt{\bar{\alpha_t}}$
"sqrtmab": $\sqrt{1-\bar{\alpha_t}}$
"mab_over_sqrtmab": $\frac{(1-\alpha_t)}{\sqrt{1-\bar{\alpha_t}}}$


In [69]:
class DDPM(nn.Module):
    def __init__(self, model, beta_start: int=1e-4, beta_end: int=0.02, noise_step: int=1000, device: str='cuda', drop_prob: int=0.1) -> None:
        super().__init__()
        self.model = model.to(device)
        self.noise_step = noise_step
        self.device = device
        self.drop_prob = drop_prob
        self.mse_loss = nn.MSELoss()

        for k, v in ddpm_schedules(beta_start, beta_end, noise_step).items():
            self.register_buffer(k, v)

    def forward(self, x, t):
        #  x.shape[0] is batch_size; x = (batch_size, 3, 28, 28)
        _ts = torch.randint(1, self.noise_step + 1, (x.shape[0],)).to(self.device)  # t ~ Uniform(0, noise_step)
        noise = torch.randn_like(x)  # eps ~ N(0, I)

        # We should predict the "error term" from this x_t. Loss is what we return.
        x_t = (
            self.sqrtab[_ts, None, None, None] * x
            + self.sqrtmab[_ts, None, None, None] * noise
        )

        # dropout context with some probability
        context_mask = torch.bernoulli(torch.zeros_like(t)+self.drop_prob).to(self.device)

        # return MSE between added noise, and our predicted noise
        return self.mse_loss(noise, self.model(x_t, t, _ts / self.noise_step, context_mask))

    def sample(self, n_sample, size, device, guide_w = 0.0):
        x_i = torch.randn(n_sample, *size).to(device)  # x_T ~ N(0, 1), sample initial noise
        c_i = torch.arange(0,10).to(device) # context for us just cycles throught the mnist labels
        c_i = c_i.repeat(n_sample // c_i.shape[0])

        # don't drop context at test time
        context_mask = torch.zeros_like(c_i).to(device)

        # double the batch
        c_i = c_i.repeat(2)
        context_mask = context_mask.repeat(2)
        context_mask[n_sample:] = 1.

        # keep track of generated steps in case want to plot something
        x_i_store = []

        for i in range(self.noise_step, 0, -1):
            t_is = torch.tensor([i / self.noise_step]).to(device)
            t_is = t_is.repeat(n_sample, 1, 1, 1)

            # double batch
            x_i = x_i.repeat(2, 1, 1, 1)
            t_is = t_is.repeat(2, 1, 1, 1)

            noise = torch.randn(n_sample, *size).to(device) if i > 1 else 0

            # split predictions and compute weighting
            predicted_noise = self.model(x_i, c_i, t_is, context_mask)
            epsilon1 = predicted_noise[:n_sample]
            epsilon2 = predicted_noise[n_sample:]
            predicted_noise = (1 + guide_w) * epsilon1 - guide_w * epsilon2
            x_i = x_i[:n_sample]
            x_i = (
                self.oneover_sqrta[i] * (x_i - predicted_noise * self.mab_over_sqrtmab[i])
                + self.sqrt_beta_t[i] * noise
            )
            if i % 20 == 0 or i == self.noise_step or i < 8:
                x_i_store.append(x_i.detach().cpu().numpy())

        x_i_store = np.array(x_i_store)
        return x_i, x_i_store

#### Training process

In [70]:
def modling(dataloader, ddpm, optimizer):
    ddpm.train() # to training mode.
    optimizer.param_groups[0]['lr'] = lr * (1 - epoch / EPOCHS)
    loss_ema = None

    for batch_i, (x, t) in enumerate(tqdm(dataloader, leave=False)):
        x, t = x.to(device, non_blocking=True), t.to(device, non_blocking=True) # move data to GPU

        optimizer.zero_grad()
        loss = ddpm(x, t)
        loss.backward()
        if loss_ema is None:
            loss_ema = loss.item()
        else:
            loss_ema = 0.95 * loss_ema + 0.05 * loss.item()
        optimizer.step() # update model params

    ddpm.eval()
    with torch.no_grad():
        n_samples = 30
        for w in [0, 0.5, 2]:
            x_gen, x_gen_store = ddpm.sample(n_samples, (3, 28, 28), device, guide_w=w)
            grid = make_grid(x_gen*(-1) + 1, nrow=3)
            if epoch % 5 == 0:
                save_image(grid, f'/content/drive/MyDrive/NTU_DLCV/Hw2/p1_img/epoch{epoch+1}_w{w:.1f}.png')
    if epoch % 5 == 0:
        torch.save(ddpm.state_dict(), f'/content/drive/MyDrive/NTU_DLCV/Hw2/p1_ckpt/epoch{epoch+1}.pth')

In [72]:
EPOCHS = 100

n_feature = 128 # try 256 maybe better

Unet = UNet(in_channels=3, n_features=n_feature, n_classes=10)
ddpm = DDPM(model=Unet)
ddpm.to(device)

lr = 1e-4
optimizer = torch.optim.Adam(ddpm.parameters(), lr=lr)

for epoch in tqdm(range(EPOCHS)):
    modling(dataset_loader, ddpm, optimizer)

  0%|          | 0/100 [00:00<?, ?it/s]

epoch1



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:57,  1.09s/it][A
  1%|          | 2/219 [00:01<02:36,  1.38it/s][A
  1%|▏         | 3/219 [00:02<02:12,  1.63it/s][A
  2%|▏         | 4/219 [00:02<01:59,  1.80it/s][A
  2%|▏         | 5/219 [00:02<01:52,  1.91it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.98it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.02it/s][A
  4%|▎         | 8/219 [00:04<01:43,  2.05it/s][A
  4%|▍         | 9/219 [00:04<01:42,  2.06it/s][A
  5%|▍         | 10/219 [00:05<01:41,  2.05it/s][A
  5%|▌         | 11/219 [00:05<01:41,  2.05it/s][A
  5%|▌         | 12/219 [00:06<01:40,  2.06it/s][A
  6%|▌         | 13/219 [00:06<01:38,  2.08it/s][A
  6%|▋         | 14/219 [00:07<01:38,  2.08it/s][A
  7%|▋         | 15/219 [00:07<01:37,  2.08it/s][A
  7%|▋         | 16/219 [00:08<01:37,  2.08it/s][A
  8%|▊         | 17/219 [00:08<01:37,  2.08it/s][A
  8%|▊         | 18/219 [00:09<01:36,  2.08it/s][A
  9%|▊         | 19/219 [00:0

epoch2



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<04:55,  1.35s/it][A
  1%|          | 2/219 [00:01<03:05,  1.17it/s][A
  1%|▏         | 3/219 [00:02<02:28,  1.45it/s][A
  2%|▏         | 4/219 [00:02<02:10,  1.65it/s][A
  2%|▏         | 5/219 [00:03<01:58,  1.80it/s][A
  3%|▎         | 6/219 [00:03<01:52,  1.90it/s][A
  3%|▎         | 7/219 [00:04<01:47,  1.97it/s][A
  4%|▎         | 8/219 [00:04<01:44,  2.03it/s][A
  4%|▍         | 9/219 [00:05<01:41,  2.06it/s][A
  5%|▍         | 10/219 [00:05<01:40,  2.09it/s][A
  5%|▌         | 11/219 [00:06<01:39,  2.09it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.10it/s][A
  6%|▌         | 13/219 [00:07<01:37,  2.11it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 15/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 16/219 [00:08<01:35,  2.14it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.13it/s][A
  8%|▊         | 18/219 [00:09<01:33,  2.14it/s][A
  9%|▊         | 19/219 [00:0

epoch3



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<04:57,  1.37s/it][A
  1%|          | 2/219 [00:01<03:06,  1.16it/s][A
  1%|▏         | 3/219 [00:02<02:29,  1.45it/s][A
  2%|▏         | 4/219 [00:02<02:10,  1.65it/s][A
  2%|▏         | 5/219 [00:03<01:59,  1.79it/s][A
  3%|▎         | 6/219 [00:03<01:52,  1.89it/s][A
  3%|▎         | 7/219 [00:04<01:49,  1.94it/s][A
  4%|▎         | 8/219 [00:04<01:45,  2.00it/s][A
  4%|▍         | 9/219 [00:05<01:42,  2.05it/s][A
  5%|▍         | 10/219 [00:05<01:40,  2.08it/s][A
  5%|▌         | 11/219 [00:06<01:38,  2.10it/s][A
  5%|▌         | 12/219 [00:06<01:37,  2.11it/s][A
  6%|▌         | 13/219 [00:07<01:37,  2.12it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.13it/s][A
  7%|▋         | 15/219 [00:07<01:35,  2.13it/s][A
  7%|▋         | 16/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.15it/s][A
  8%|▊         | 18/219 [00:09<01:33,  2.15it/s][A
  9%|▊         | 19/219 [00:0

epoch4



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<04:50,  1.33s/it][A
  1%|          | 2/219 [00:01<03:02,  1.19it/s][A
  1%|▏         | 3/219 [00:02<02:25,  1.48it/s][A
  2%|▏         | 4/219 [00:02<02:07,  1.68it/s][A
  2%|▏         | 5/219 [00:03<01:58,  1.81it/s][A
  3%|▎         | 6/219 [00:03<01:51,  1.91it/s][A
  3%|▎         | 7/219 [00:04<01:47,  1.98it/s][A
  4%|▎         | 8/219 [00:04<01:44,  2.03it/s][A
  4%|▍         | 9/219 [00:05<01:42,  2.05it/s][A
  5%|▍         | 10/219 [00:05<01:40,  2.08it/s][A
  5%|▌         | 11/219 [00:06<01:39,  2.10it/s][A
  5%|▌         | 12/219 [00:06<01:37,  2.11it/s][A
  6%|▌         | 13/219 [00:07<01:36,  2.12it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.13it/s][A
  7%|▋         | 15/219 [00:07<01:35,  2.13it/s][A
  7%|▋         | 16/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 18/219 [00:09<01:33,  2.14it/s][A
  9%|▊         | 19/219 [00:0

epoch5



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:56,  1.08s/it][A
  1%|          | 2/219 [00:01<02:39,  1.36it/s][A
  1%|▏         | 3/219 [00:02<02:11,  1.64it/s][A
  2%|▏         | 4/219 [00:02<01:59,  1.80it/s][A
  2%|▏         | 5/219 [00:02<01:51,  1.91it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.99it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.03it/s][A
  4%|▎         | 8/219 [00:04<01:41,  2.07it/s][A
  4%|▍         | 9/219 [00:04<01:39,  2.10it/s][A
  5%|▍         | 10/219 [00:05<01:38,  2.12it/s][A
  5%|▌         | 11/219 [00:05<01:37,  2.13it/s][A
  5%|▌         | 12/219 [00:06<01:37,  2.13it/s][A
  6%|▌         | 13/219 [00:06<01:36,  2.14it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.13it/s][A
  7%|▋         | 15/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 16/219 [00:08<01:35,  2.12it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 18/219 [00:09<01:33,  2.15it/s][A
  9%|▊         | 19/219 [00:0

epoch6



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:47,  1.04s/it][A
  1%|          | 2/219 [00:01<02:34,  1.40it/s][A
  1%|▏         | 3/219 [00:01<02:09,  1.67it/s][A
  2%|▏         | 4/219 [00:02<01:57,  1.83it/s][A
  2%|▏         | 5/219 [00:02<01:50,  1.94it/s][A
  3%|▎         | 6/219 [00:03<01:45,  2.02it/s][A
  3%|▎         | 7/219 [00:03<01:42,  2.06it/s][A
  4%|▎         | 8/219 [00:04<01:40,  2.09it/s][A
  4%|▍         | 9/219 [00:04<01:39,  2.11it/s][A
  5%|▍         | 10/219 [00:05<01:38,  2.12it/s][A
  5%|▌         | 11/219 [00:05<01:37,  2.13it/s][A
  5%|▌         | 12/219 [00:06<01:37,  2.13it/s][A
  6%|▌         | 13/219 [00:06<01:36,  2.14it/s][A
  6%|▋         | 14/219 [00:07<01:35,  2.14it/s][A
  7%|▋         | 15/219 [00:07<01:35,  2.15it/s][A
  7%|▋         | 16/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.15it/s][A
  8%|▊         | 18/219 [00:08<01:33,  2.15it/s][A
  9%|▊         | 19/219 [00:0

epoch7



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:48,  1.05s/it][A
  1%|          | 2/219 [00:01<02:37,  1.38it/s][A
  1%|▏         | 3/219 [00:02<02:11,  1.64it/s][A
  2%|▏         | 4/219 [00:02<01:58,  1.81it/s][A
  2%|▏         | 5/219 [00:02<01:51,  1.91it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.99it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.02it/s][A
  4%|▎         | 8/219 [00:04<01:42,  2.06it/s][A
  4%|▍         | 9/219 [00:04<01:41,  2.07it/s][A
  5%|▍         | 10/219 [00:05<01:40,  2.09it/s][A
  5%|▌         | 11/219 [00:05<01:39,  2.10it/s][A
  5%|▌         | 12/219 [00:06<01:37,  2.12it/s][A
  6%|▌         | 13/219 [00:06<01:37,  2.12it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 15/219 [00:07<01:35,  2.13it/s][A
  7%|▋         | 16/219 [00:08<01:35,  2.13it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 18/219 [00:09<01:34,  2.12it/s][A
  9%|▊         | 19/219 [00:0

epoch8



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:58,  1.10s/it][A
  1%|          | 2/219 [00:01<02:39,  1.36it/s][A
  1%|▏         | 3/219 [00:02<02:12,  1.63it/s][A
  2%|▏         | 4/219 [00:02<01:58,  1.81it/s][A
  2%|▏         | 5/219 [00:02<01:51,  1.92it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.99it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.03it/s][A
  4%|▎         | 8/219 [00:04<01:41,  2.07it/s][A
  4%|▍         | 9/219 [00:04<01:40,  2.09it/s][A
  5%|▍         | 10/219 [00:05<01:38,  2.12it/s][A
  5%|▌         | 11/219 [00:05<01:38,  2.12it/s][A
  5%|▌         | 12/219 [00:06<01:37,  2.13it/s][A
  6%|▌         | 13/219 [00:06<01:37,  2.12it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.13it/s][A
  7%|▋         | 15/219 [00:07<01:35,  2.14it/s][A
  7%|▋         | 16/219 [00:08<01:35,  2.13it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.13it/s][A
  8%|▊         | 18/219 [00:09<01:34,  2.12it/s][A
  9%|▊         | 19/219 [00:0

epoch9



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:46,  1.04s/it][A
  1%|          | 2/219 [00:01<02:34,  1.40it/s][A
  1%|▏         | 3/219 [00:01<02:09,  1.67it/s][A
  2%|▏         | 4/219 [00:02<01:57,  1.83it/s][A
  2%|▏         | 5/219 [00:02<01:50,  1.93it/s][A
  3%|▎         | 6/219 [00:03<01:46,  2.00it/s][A
  3%|▎         | 7/219 [00:03<01:43,  2.04it/s][A
  4%|▎         | 8/219 [00:04<01:41,  2.08it/s][A
  4%|▍         | 9/219 [00:04<01:40,  2.10it/s][A
  5%|▍         | 10/219 [00:05<01:39,  2.11it/s][A
  5%|▌         | 11/219 [00:05<01:37,  2.13it/s][A
  5%|▌         | 12/219 [00:06<01:37,  2.13it/s][A
  6%|▌         | 13/219 [00:06<01:36,  2.13it/s][A
  6%|▋         | 14/219 [00:07<01:35,  2.14it/s][A
  7%|▋         | 15/219 [00:07<01:35,  2.14it/s][A
  7%|▋         | 16/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 18/219 [00:08<01:34,  2.12it/s][A
  9%|▊         | 19/219 [00:0

epoch10



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:57,  1.09s/it][A
  1%|          | 2/219 [00:01<02:40,  1.35it/s][A
  1%|▏         | 3/219 [00:02<02:13,  1.61it/s][A
  2%|▏         | 4/219 [00:02<01:59,  1.79it/s][A
  2%|▏         | 5/219 [00:02<01:52,  1.90it/s][A
  3%|▎         | 6/219 [00:03<01:48,  1.97it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.02it/s][A
  4%|▎         | 8/219 [00:04<01:42,  2.06it/s][A
  4%|▍         | 9/219 [00:04<01:41,  2.07it/s][A
  5%|▍         | 10/219 [00:05<01:40,  2.09it/s][A
  5%|▌         | 11/219 [00:05<01:39,  2.10it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.10it/s][A
  6%|▌         | 13/219 [00:06<01:38,  2.09it/s][A
  6%|▋         | 14/219 [00:07<01:37,  2.10it/s][A
  7%|▋         | 15/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 16/219 [00:08<01:36,  2.10it/s][A
  8%|▊         | 17/219 [00:08<01:36,  2.10it/s][A
  8%|▊         | 18/219 [00:09<01:35,  2.11it/s][A
  9%|▊         | 19/219 [00:0

epoch11



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:00<03:29,  1.04it/s][A
  1%|          | 2/219 [00:01<02:28,  1.46it/s][A
  1%|▏         | 3/219 [00:01<02:05,  1.72it/s][A
  2%|▏         | 4/219 [00:02<01:55,  1.87it/s][A
  2%|▏         | 5/219 [00:02<01:49,  1.95it/s][A
  3%|▎         | 6/219 [00:03<01:45,  2.02it/s][A
  3%|▎         | 7/219 [00:03<01:43,  2.06it/s][A
  4%|▎         | 8/219 [00:04<01:40,  2.09it/s][A
  4%|▍         | 9/219 [00:04<01:39,  2.10it/s][A
  5%|▍         | 10/219 [00:05<01:39,  2.09it/s][A
  5%|▌         | 11/219 [00:05<01:39,  2.10it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.10it/s][A
  6%|▌         | 13/219 [00:06<01:38,  2.10it/s][A
  6%|▋         | 14/219 [00:07<01:37,  2.10it/s][A
  7%|▋         | 15/219 [00:07<01:36,  2.11it/s][A
  7%|▋         | 16/219 [00:08<01:36,  2.10it/s][A
  8%|▊         | 17/219 [00:08<01:36,  2.08it/s][A
  8%|▊         | 18/219 [00:09<01:36,  2.08it/s][A
  9%|▊         | 19/219 [00:0

epoch12



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:46,  1.04s/it][A
  1%|          | 2/219 [00:01<02:36,  1.38it/s][A
  1%|▏         | 3/219 [00:02<02:10,  1.66it/s][A
  2%|▏         | 4/219 [00:02<01:58,  1.82it/s][A
  2%|▏         | 5/219 [00:02<01:51,  1.91it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.98it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.02it/s][A
  4%|▎         | 8/219 [00:04<01:42,  2.06it/s][A
  4%|▍         | 9/219 [00:04<01:41,  2.08it/s][A
  5%|▍         | 10/219 [00:05<01:40,  2.08it/s][A
  5%|▌         | 11/219 [00:05<01:40,  2.08it/s][A
  5%|▌         | 12/219 [00:06<01:39,  2.08it/s][A
  6%|▌         | 13/219 [00:06<01:38,  2.09it/s][A
  6%|▋         | 14/219 [00:07<01:38,  2.07it/s][A
  7%|▋         | 15/219 [00:07<01:37,  2.10it/s][A
  7%|▋         | 16/219 [00:08<01:36,  2.11it/s][A
  8%|▊         | 17/219 [00:08<01:36,  2.10it/s][A
  8%|▊         | 18/219 [00:09<01:35,  2.11it/s][A
  9%|▊         | 19/219 [00:0

epoch13



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<04:09,  1.15s/it][A
  1%|          | 2/219 [00:01<02:46,  1.30it/s][A
  1%|▏         | 3/219 [00:02<02:18,  1.56it/s][A
  2%|▏         | 4/219 [00:02<02:03,  1.74it/s][A
  2%|▏         | 5/219 [00:03<01:54,  1.86it/s][A
  3%|▎         | 6/219 [00:03<01:48,  1.96it/s][A
  3%|▎         | 7/219 [00:04<01:45,  2.01it/s][A
  4%|▎         | 8/219 [00:04<01:42,  2.06it/s][A
  4%|▍         | 9/219 [00:04<01:41,  2.08it/s][A
  5%|▍         | 10/219 [00:05<01:39,  2.10it/s][A
  5%|▌         | 11/219 [00:05<01:38,  2.11it/s][A
  5%|▌         | 12/219 [00:06<01:37,  2.13it/s][A
  6%|▌         | 13/219 [00:06<01:36,  2.13it/s][A
  6%|▋         | 14/219 [00:07<01:35,  2.14it/s][A
  7%|▋         | 15/219 [00:07<01:34,  2.15it/s][A
  7%|▋         | 16/219 [00:08<01:35,  2.14it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 18/219 [00:09<01:34,  2.14it/s][A
  9%|▊         | 19/219 [00:0

epoch14



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<05:05,  1.40s/it][A
  1%|          | 2/219 [00:01<03:08,  1.15it/s][A
  1%|▏         | 3/219 [00:02<02:29,  1.44it/s][A
  2%|▏         | 4/219 [00:02<02:10,  1.65it/s][A
  2%|▏         | 5/219 [00:03<01:58,  1.80it/s][A
  3%|▎         | 6/219 [00:03<01:51,  1.90it/s][A
  3%|▎         | 7/219 [00:04<01:47,  1.97it/s][A
  4%|▎         | 8/219 [00:04<01:44,  2.01it/s][A
  4%|▍         | 9/219 [00:05<01:42,  2.04it/s][A
  5%|▍         | 10/219 [00:05<01:41,  2.05it/s][A
  5%|▌         | 11/219 [00:06<01:40,  2.08it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.10it/s][A
  6%|▌         | 13/219 [00:07<01:37,  2.11it/s][A
  6%|▋         | 14/219 [00:07<01:37,  2.10it/s][A
  7%|▋         | 15/219 [00:08<01:36,  2.11it/s][A
  7%|▋         | 16/219 [00:08<01:36,  2.10it/s][A
  8%|▊         | 17/219 [00:08<01:35,  2.12it/s][A
  8%|▊         | 18/219 [00:09<01:34,  2.13it/s][A
  9%|▊         | 19/219 [00:0

epoch15



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:41,  1.01s/it][A
  1%|          | 2/219 [00:01<02:32,  1.42it/s][A
  1%|▏         | 3/219 [00:01<02:08,  1.68it/s][A
  2%|▏         | 4/219 [00:02<01:57,  1.83it/s][A
  2%|▏         | 5/219 [00:02<01:50,  1.93it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.98it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.03it/s][A
  4%|▎         | 8/219 [00:04<01:42,  2.06it/s][A
  4%|▍         | 9/219 [00:04<01:40,  2.09it/s][A
  5%|▍         | 10/219 [00:05<01:39,  2.11it/s][A
  5%|▌         | 11/219 [00:05<01:38,  2.10it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.11it/s][A
  6%|▌         | 13/219 [00:06<01:37,  2.12it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.13it/s][A
  7%|▋         | 15/219 [00:07<01:35,  2.14it/s][A
  7%|▋         | 16/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 18/219 [00:08<01:33,  2.14it/s][A
  9%|▊         | 19/219 [00:0

epoch16



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:00<03:05,  1.18it/s][A
  1%|          | 2/219 [00:01<02:19,  1.56it/s][A
  1%|▏         | 3/219 [00:01<02:01,  1.78it/s][A
  2%|▏         | 4/219 [00:02<01:53,  1.90it/s][A
  2%|▏         | 5/219 [00:02<01:48,  1.98it/s][A
  3%|▎         | 6/219 [00:03<01:45,  2.03it/s][A
  3%|▎         | 7/219 [00:03<01:42,  2.07it/s][A
  4%|▎         | 8/219 [00:04<01:41,  2.09it/s][A
  4%|▍         | 9/219 [00:04<01:40,  2.10it/s][A
  5%|▍         | 10/219 [00:05<01:38,  2.11it/s][A
  5%|▌         | 11/219 [00:05<01:37,  2.12it/s][A
  5%|▌         | 12/219 [00:06<01:36,  2.14it/s][A
  6%|▌         | 13/219 [00:06<01:36,  2.13it/s][A
  6%|▋         | 14/219 [00:06<01:35,  2.14it/s][A
  7%|▋         | 15/219 [00:07<01:34,  2.15it/s][A
  7%|▋         | 16/219 [00:07<01:34,  2.14it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.13it/s][A
  8%|▊         | 18/219 [00:08<01:34,  2.12it/s][A
  9%|▊         | 19/219 [00:0

epoch17



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:37,  1.00it/s][A
  1%|          | 2/219 [00:01<02:31,  1.44it/s][A
  1%|▏         | 3/219 [00:01<02:08,  1.68it/s][A
  2%|▏         | 4/219 [00:02<01:57,  1.84it/s][A
  2%|▏         | 5/219 [00:02<01:50,  1.93it/s][A
  3%|▎         | 6/219 [00:03<01:46,  2.00it/s][A
  3%|▎         | 7/219 [00:03<01:43,  2.05it/s][A
  4%|▎         | 8/219 [00:04<01:42,  2.06it/s][A
  4%|▍         | 9/219 [00:04<01:41,  2.07it/s][A
  5%|▍         | 10/219 [00:05<01:40,  2.08it/s][A
  5%|▌         | 11/219 [00:05<01:39,  2.10it/s][A
  5%|▌         | 12/219 [00:06<01:39,  2.09it/s][A
  6%|▌         | 13/219 [00:06<01:38,  2.10it/s][A
  6%|▋         | 14/219 [00:07<01:37,  2.09it/s][A
  7%|▋         | 15/219 [00:07<01:37,  2.08it/s][A
  7%|▋         | 16/219 [00:08<01:37,  2.08it/s][A
  8%|▊         | 17/219 [00:08<01:36,  2.09it/s][A
  8%|▊         | 18/219 [00:09<01:36,  2.09it/s][A
  9%|▊         | 19/219 [00:0

epoch18



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:00<03:30,  1.04it/s][A
  1%|          | 2/219 [00:01<02:28,  1.46it/s][A
  1%|▏         | 3/219 [00:01<02:06,  1.70it/s][A
  2%|▏         | 4/219 [00:02<01:56,  1.85it/s][A
  2%|▏         | 5/219 [00:02<01:50,  1.94it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.99it/s][A
  3%|▎         | 7/219 [00:03<01:43,  2.04it/s][A
  4%|▎         | 8/219 [00:04<01:43,  2.04it/s][A
  4%|▍         | 9/219 [00:04<01:41,  2.06it/s][A
  5%|▍         | 10/219 [00:05<01:39,  2.09it/s][A
  5%|▌         | 11/219 [00:05<01:39,  2.10it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.10it/s][A
  6%|▌         | 13/219 [00:06<01:37,  2.12it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.13it/s][A
  7%|▋         | 15/219 [00:07<01:35,  2.13it/s][A
  7%|▋         | 16/219 [00:08<01:35,  2.13it/s][A
  8%|▊         | 17/219 [00:08<01:35,  2.11it/s][A
  8%|▊         | 18/219 [00:09<01:34,  2.12it/s][A
  9%|▊         | 19/219 [00:0

epoch19



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:49,  1.05s/it][A
  1%|          | 2/219 [00:01<02:37,  1.38it/s][A
  1%|▏         | 3/219 [00:02<02:12,  1.63it/s][A
  2%|▏         | 4/219 [00:02<01:59,  1.80it/s][A
  2%|▏         | 5/219 [00:02<01:52,  1.91it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.99it/s][A
  3%|▎         | 7/219 [00:03<01:43,  2.04it/s][A
  4%|▎         | 8/219 [00:04<01:41,  2.08it/s][A
  4%|▍         | 9/219 [00:04<01:40,  2.10it/s][A
  5%|▍         | 10/219 [00:05<01:39,  2.10it/s][A
  5%|▌         | 11/219 [00:05<01:38,  2.11it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.11it/s][A
  6%|▌         | 13/219 [00:06<01:37,  2.11it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 15/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 16/219 [00:08<01:37,  2.09it/s][A
  8%|▊         | 17/219 [00:08<01:36,  2.08it/s][A
  8%|▊         | 18/219 [00:09<01:35,  2.10it/s][A
  9%|▊         | 19/219 [00:0

epoch20



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:46,  1.04s/it][A
  1%|          | 2/219 [00:01<02:34,  1.41it/s][A
  1%|▏         | 3/219 [00:01<02:09,  1.67it/s][A
  2%|▏         | 4/219 [00:02<01:58,  1.82it/s][A
  2%|▏         | 5/219 [00:02<01:51,  1.92it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.98it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.03it/s][A
  4%|▎         | 8/219 [00:04<01:42,  2.06it/s][A
  4%|▍         | 9/219 [00:04<01:40,  2.08it/s][A
  5%|▍         | 10/219 [00:05<01:39,  2.10it/s][A
  5%|▌         | 11/219 [00:05<01:38,  2.12it/s][A
  5%|▌         | 12/219 [00:06<01:37,  2.13it/s][A
  6%|▌         | 13/219 [00:06<01:37,  2.11it/s][A
  6%|▋         | 14/219 [00:07<01:37,  2.11it/s][A
  7%|▋         | 15/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 16/219 [00:08<01:35,  2.12it/s][A
  8%|▊         | 17/219 [00:08<01:35,  2.12it/s][A
  8%|▊         | 18/219 [00:09<01:34,  2.13it/s][A
  9%|▊         | 19/219 [00:0

epoch21



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:46,  1.04s/it][A
  1%|          | 2/219 [00:01<02:34,  1.41it/s][A
  1%|▏         | 3/219 [00:01<02:09,  1.67it/s][A
  2%|▏         | 4/219 [00:02<01:57,  1.82it/s][A
  2%|▏         | 5/219 [00:02<01:51,  1.92it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.99it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.03it/s][A
  4%|▎         | 8/219 [00:04<01:42,  2.07it/s][A
  4%|▍         | 9/219 [00:04<01:41,  2.07it/s][A
  5%|▍         | 10/219 [00:05<01:40,  2.08it/s][A
  5%|▌         | 11/219 [00:05<01:39,  2.09it/s][A
  5%|▌         | 12/219 [00:06<01:39,  2.09it/s][A
  6%|▌         | 13/219 [00:06<01:37,  2.11it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.13it/s][A
  7%|▋         | 15/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 16/219 [00:08<01:35,  2.13it/s][A
  8%|▊         | 17/219 [00:08<01:34,  2.14it/s][A
  8%|▊         | 18/219 [00:09<01:33,  2.14it/s][A
  9%|▊         | 19/219 [00:0

epoch22



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<05:15,  1.45s/it][A
  1%|          | 2/219 [00:01<03:13,  1.12it/s][A
  1%|▏         | 3/219 [00:02<02:31,  1.42it/s][A
  2%|▏         | 4/219 [00:02<02:12,  1.62it/s][A
  2%|▏         | 5/219 [00:03<02:02,  1.75it/s][A
  3%|▎         | 6/219 [00:03<01:54,  1.86it/s][A
  3%|▎         | 7/219 [00:04<01:48,  1.96it/s][A
  4%|▎         | 8/219 [00:04<01:45,  1.99it/s][A
  4%|▍         | 9/219 [00:05<01:43,  2.04it/s][A
  5%|▍         | 10/219 [00:05<01:41,  2.07it/s][A
  5%|▌         | 11/219 [00:06<01:40,  2.07it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.10it/s][A
  6%|▌         | 13/219 [00:07<01:37,  2.11it/s][A
  6%|▋         | 14/219 [00:07<01:37,  2.10it/s][A
  7%|▋         | 15/219 [00:08<01:36,  2.10it/s][A
  7%|▋         | 16/219 [00:08<01:36,  2.11it/s][A
  8%|▊         | 17/219 [00:09<01:35,  2.13it/s][A
  8%|▊         | 18/219 [00:09<01:34,  2.13it/s][A
  9%|▊         | 19/219 [00:0

epoch23



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<05:29,  1.51s/it][A
  1%|          | 2/219 [00:01<03:16,  1.10it/s][A
  1%|▏         | 3/219 [00:02<02:33,  1.41it/s][A
  2%|▏         | 4/219 [00:02<02:12,  1.63it/s][A
  2%|▏         | 5/219 [00:03<02:00,  1.77it/s][A
  3%|▎         | 6/219 [00:03<01:53,  1.87it/s][A
  3%|▎         | 7/219 [00:04<01:48,  1.95it/s][A
  4%|▎         | 8/219 [00:04<01:45,  2.00it/s][A
  4%|▍         | 9/219 [00:05<01:42,  2.04it/s][A
  5%|▍         | 10/219 [00:05<01:41,  2.05it/s][A
  5%|▌         | 11/219 [00:06<01:40,  2.07it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.10it/s][A
  6%|▌         | 13/219 [00:07<01:37,  2.12it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 15/219 [00:08<01:36,  2.12it/s][A
  7%|▋         | 16/219 [00:08<01:35,  2.13it/s][A
  8%|▊         | 17/219 [00:09<01:34,  2.14it/s][A
  8%|▊         | 18/219 [00:09<01:33,  2.14it/s][A
  9%|▊         | 19/219 [00:0

epoch24



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:42,  1.02s/it][A
  1%|          | 2/219 [00:01<02:34,  1.41it/s][A
  1%|▏         | 3/219 [00:01<02:09,  1.67it/s][A
  2%|▏         | 4/219 [00:02<01:58,  1.81it/s][A
  2%|▏         | 5/219 [00:02<01:52,  1.90it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.99it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.04it/s][A
  4%|▎         | 8/219 [00:04<01:42,  2.06it/s][A
  4%|▍         | 9/219 [00:04<01:41,  2.07it/s][A
  5%|▍         | 10/219 [00:05<01:39,  2.09it/s][A
  5%|▌         | 11/219 [00:05<01:38,  2.10it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.10it/s][A
  6%|▌         | 13/219 [00:06<01:38,  2.08it/s][A
  6%|▋         | 14/219 [00:07<01:38,  2.09it/s][A
  7%|▋         | 15/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 16/219 [00:08<01:35,  2.12it/s][A
  8%|▊         | 17/219 [00:08<01:35,  2.12it/s][A
  8%|▊         | 18/219 [00:09<01:35,  2.12it/s][A
  9%|▊         | 19/219 [00:0

epoch25



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:52,  1.07s/it][A
  1%|          | 2/219 [00:01<02:38,  1.37it/s][A
  1%|▏         | 3/219 [00:02<02:11,  1.64it/s][A
  2%|▏         | 4/219 [00:02<01:59,  1.81it/s][A
  2%|▏         | 5/219 [00:02<01:52,  1.90it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.98it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.03it/s][A
  4%|▎         | 8/219 [00:04<01:43,  2.04it/s][A
  4%|▍         | 9/219 [00:04<01:41,  2.07it/s][A
  5%|▍         | 10/219 [00:05<01:40,  2.09it/s][A
  5%|▌         | 11/219 [00:05<01:39,  2.09it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.10it/s][A
  6%|▌         | 13/219 [00:06<01:38,  2.10it/s][A
  6%|▋         | 14/219 [00:07<01:37,  2.10it/s][A
  7%|▋         | 15/219 [00:07<01:37,  2.10it/s][A
  7%|▋         | 16/219 [00:08<01:36,  2.11it/s][A
  8%|▊         | 17/219 [00:08<01:35,  2.12it/s][A
  8%|▊         | 18/219 [00:09<01:34,  2.13it/s][A
  9%|▊         | 19/219 [00:0

epoch26



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:53,  1.07s/it][A
  1%|          | 2/219 [00:01<02:38,  1.37it/s][A
  1%|▏         | 3/219 [00:02<02:11,  1.65it/s][A
  2%|▏         | 4/219 [00:02<01:58,  1.81it/s][A
  2%|▏         | 5/219 [00:02<01:51,  1.91it/s][A
  3%|▎         | 6/219 [00:03<01:47,  1.98it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.04it/s][A
  4%|▎         | 8/219 [00:04<01:41,  2.07it/s][A
  4%|▍         | 9/219 [00:04<01:40,  2.09it/s][A
  5%|▍         | 10/219 [00:05<01:39,  2.09it/s][A
  5%|▌         | 11/219 [00:05<01:38,  2.11it/s][A
  5%|▌         | 12/219 [00:06<01:37,  2.13it/s][A
  6%|▌         | 13/219 [00:06<01:37,  2.12it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 15/219 [00:07<01:36,  2.11it/s][A
  7%|▋         | 16/219 [00:08<01:36,  2.09it/s][A
  8%|▊         | 17/219 [00:08<01:36,  2.10it/s][A
  8%|▊         | 18/219 [00:09<01:35,  2.10it/s][A
  9%|▊         | 19/219 [00:0

epoch27



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<03:39,  1.00s/it][A
  1%|          | 2/219 [00:01<02:32,  1.42it/s][A
  1%|▏         | 3/219 [00:01<02:08,  1.68it/s][A
  2%|▏         | 4/219 [00:02<01:56,  1.84it/s][A
  2%|▏         | 5/219 [00:02<01:50,  1.94it/s][A
  3%|▎         | 6/219 [00:03<01:46,  2.00it/s][A
  3%|▎         | 7/219 [00:03<01:44,  2.03it/s][A
  4%|▎         | 8/219 [00:04<01:42,  2.06it/s][A
  4%|▍         | 9/219 [00:04<01:40,  2.08it/s][A
  5%|▍         | 10/219 [00:05<01:39,  2.09it/s][A
  5%|▌         | 11/219 [00:05<01:38,  2.12it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.10it/s][A
  6%|▌         | 13/219 [00:06<01:38,  2.10it/s][A
  6%|▋         | 14/219 [00:07<01:37,  2.10it/s][A
  7%|▋         | 15/219 [00:07<01:36,  2.10it/s][A
  7%|▋         | 16/219 [00:08<01:36,  2.10it/s][A
  8%|▊         | 17/219 [00:08<01:35,  2.11it/s][A
  8%|▊         | 18/219 [00:09<01:34,  2.12it/s][A
  9%|▊         | 19/219 [00:0

epoch28



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<05:16,  1.45s/it][A
  1%|          | 2/219 [00:01<03:13,  1.12it/s][A
  1%|▏         | 3/219 [00:02<02:33,  1.41it/s][A
  2%|▏         | 4/219 [00:02<02:11,  1.63it/s][A
  2%|▏         | 5/219 [00:03<01:59,  1.79it/s][A
  3%|▎         | 6/219 [00:03<01:52,  1.89it/s][A
  3%|▎         | 7/219 [00:04<01:48,  1.95it/s][A
  4%|▎         | 8/219 [00:04<01:45,  2.00it/s][A
  4%|▍         | 9/219 [00:05<01:43,  2.03it/s][A
  5%|▍         | 10/219 [00:05<01:41,  2.05it/s][A
  5%|▌         | 11/219 [00:06<01:40,  2.06it/s][A
  5%|▌         | 12/219 [00:06<01:39,  2.08it/s][A
  6%|▌         | 13/219 [00:07<01:38,  2.10it/s][A
  6%|▋         | 14/219 [00:07<01:36,  2.12it/s][A
  7%|▋         | 15/219 [00:08<01:36,  2.11it/s][A
  7%|▋         | 16/219 [00:08<01:35,  2.12it/s][A
  8%|▊         | 17/219 [00:09<01:35,  2.12it/s][A
  8%|▊         | 18/219 [00:09<01:34,  2.13it/s][A
  9%|▊         | 19/219 [00:0

epoch29



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:01<05:34,  1.54s/it][A
  1%|          | 2/219 [00:02<03:20,  1.08it/s][A
  1%|▏         | 3/219 [00:02<02:36,  1.38it/s][A
  2%|▏         | 4/219 [00:03<02:15,  1.58it/s][A
  2%|▏         | 5/219 [00:03<02:03,  1.74it/s][A
  3%|▎         | 6/219 [00:03<01:54,  1.86it/s][A
  3%|▎         | 7/219 [00:04<01:49,  1.94it/s][A
  4%|▎         | 8/219 [00:04<01:45,  2.00it/s][A
  4%|▍         | 9/219 [00:05<01:42,  2.05it/s][A
  5%|▍         | 10/219 [00:05<01:40,  2.08it/s][A
  5%|▌         | 11/219 [00:06<01:39,  2.10it/s][A
  5%|▌         | 12/219 [00:06<01:37,  2.12it/s][A
  6%|▌         | 13/219 [00:07<01:37,  2.12it/s][A
  6%|▋         | 14/219 [00:07<01:37,  2.10it/s][A
  7%|▋         | 15/219 [00:08<01:36,  2.11it/s][A
  7%|▋         | 16/219 [00:08<01:36,  2.10it/s][A
  8%|▊         | 17/219 [00:09<01:35,  2.12it/s][A
  8%|▊         | 18/219 [00:09<01:34,  2.13it/s][A
  9%|▊         | 19/219 [00:1

epoch30



  0%|          | 0/219 [00:00<?, ?it/s][A
  0%|          | 1/219 [00:00<03:34,  1.02it/s][A
  1%|          | 2/219 [00:01<02:30,  1.44it/s][A
  1%|▏         | 3/219 [00:01<02:06,  1.70it/s][A
  2%|▏         | 4/219 [00:02<01:55,  1.86it/s][A
  2%|▏         | 5/219 [00:02<01:50,  1.94it/s][A
  3%|▎         | 6/219 [00:03<01:45,  2.01it/s][A
  3%|▎         | 7/219 [00:03<01:43,  2.05it/s][A
  4%|▎         | 8/219 [00:04<01:41,  2.08it/s][A
  4%|▍         | 9/219 [00:04<01:40,  2.08it/s][A
  5%|▍         | 10/219 [00:05<01:39,  2.10it/s][A
  5%|▌         | 11/219 [00:05<01:38,  2.11it/s][A
  5%|▌         | 12/219 [00:06<01:38,  2.10it/s][A
  6%|▌         | 13/219 [00:06<01:38,  2.10it/s][A
  6%|▋         | 14/219 [00:07<01:37,  2.10it/s][A
  7%|▋         | 15/219 [00:07<01:36,  2.10it/s][A
  7%|▋         | 16/219 [00:08<01:36,  2.09it/s][A
  8%|▊         | 17/219 [00:08<01:36,  2.08it/s][A
  8%|▊         | 18/219 [00:09<01:35,  2.10it/s][A
  9%|▊         | 19/219 [00:0

KeyboardInterrupt: ignored