## Imports

In [1]:
# !pip install kaggle

In [2]:
# from google.colab import files
# files.upload()

In [3]:
# !mkdir ~/.kaggle
# !cp kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json

In [4]:
  # !kaggle datasets download -d mihailchirobocea/ffhq-64-train-50k

In [5]:
# !unzip /content/ffhq-64-train-50k.zip

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms 
from torch.utils.data import DataLoader
from torch import optim
from torch.cuda.amp import autocast, GradScaler
import copy
import numpy as np
import math
from PIL import Image
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
import os

try:
    import cPickle as pickle
except ModuleNotFoundError:
    import pickle

## Utils

In [7]:
def plot_images(images):
    plt.figure(figsize=(4, 4))
    plt.imshow(torch.cat([torch.cat([i for i in images.cpu()], dim=-1),], dim=-2).permute(1, 2, 0).cpu())
    plt.show()


def save_images(images, path, **kwargs):
    grid = torchvision.utils.make_grid(images, **kwargs)
    ndarr = grid.permute(1, 2, 0).to('cpu').numpy()
    im = Image.fromarray(ndarr)
    im.save(path)

def conditional_resize(img, target_size):
#     print(target_size)
    if min(img.size) < target_size:
        return transforms.Resize(target_size)(img)
    elif max(img.size) >= 2 * target_size:
        return transforms.Resize(2 * target_size)(img)
    else:
        return img
    
def get_data(args):
    
    transform = transforms.Compose([
        transforms.Lambda(lambda img: conditional_resize(img, args.image_size)),
        transforms.RandomCrop(args.image_size),
        transforms.RandomHorizontalFlip(0.5),
        transforms.RandomVerticalFlip(0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5))
    ])
    dataset = torchvision.datasets.ImageFolder(args.dataset_path, transform=transform)
    dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
    return dataloader

## Learning Rate Schduler

In [8]:
from math import cos, pi, floor, sin

from torch.optim import lr_scheduler

def anneal_linear(start, end, proportion):
    return start + proportion * (end - start)


def anneal_cos(start, end, proportion):
    cos_val = cos(pi * proportion) + 1

    return end + (start - end) / 2 * cos_val


class Phase:
    def __init__(self, start, end, n_iter, anneal_fn):
        self.start, self.end = start, end
        self.n_iter = n_iter
        self.anneal_fn = anneal_fn
        self.n = 0

    def step(self):
        self.n += 1

        return self.anneal_fn(self.start, self.end, self.n / self.n_iter)

    def reset(self):
        self.n = 0

    @property
    def is_done(self):
        return self.n >= self.n_iter


class CycleScheduler:
    def __init__(
        self,
        optimizer,
        lr_max,
        n_iter,
        momentum=(0.95, 0.85),
        divider=25,
        warmup_proportion=0.1,
        phase=('linear', 'cos'),
    ):
        self.optimizer = optimizer

        phase1 = int(n_iter * warmup_proportion)
        phase2 = n_iter - phase1
        lr_min = lr_max / divider

        phase_map = {'linear': anneal_linear, 'cos': anneal_cos}

        self.lr_phase = [
            Phase(lr_min, lr_max, phase1, phase_map[phase[0]]),
            Phase(lr_max, lr_min / 1e4, phase2, phase_map[phase[1]]),
        ]

        self.momentum = momentum

        if momentum is not None:
            mom1, mom2 = momentum
            self.momentum_phase = [
                Phase(mom1, mom2, phase1, phase_map[phase[0]]),
                Phase(mom2, mom1, phase2, phase_map[phase[1]]),
            ]

        else:
            self.momentum_phase = []

        self.phase = 0

    def step(self):
        lr = self.lr_phase[self.phase].step()

        if self.momentum is not None:
            momentum = self.momentum_phase[self.phase].step()

        else:
            momentum = None

        for group in self.optimizer.param_groups:
            group['lr'] = lr

            if self.momentum is not None:
                if 'betas' in group:
                    group['betas'] = (momentum, group['betas'][1])

                else:
                    group['momentum'] = momentum

        if self.lr_phase[self.phase].is_done:
            self.phase += 1

        if self.phase >= len(self.lr_phase):
            for phase in self.lr_phase:
                phase.reset()

            for phase in self.momentum_phase:
                phase.reset()

            self.phase = 0

        return lr, momentum

## Modules

In [9]:
class EMA():
    def __init__(self, beta, step = 0):
        super().__init__()
        self.beta = beta
        self.step = step

    def update_model_average(self, ema_model, current_model):
        for current_params, ema_params in zip(current_model.parameters(), ema_model.parameters()):
            old_weight, up_weight = ema_params.data, current_params.data
            ema_params.data = self.update_average(old_weight, up_weight)

    def update_average(self, old, new):
        if old is None:
            return new
        return old * self.beta + (1 - self.beta) * new

    def step_ema(self, ema_model, model, step_start_ema=2000):
        if self.step < step_start_ema:
            self.reset_parameters(ema_model, model)
            self.step += 1
            return
        self.update_model_average(ema_model, model)
        self.step += 1

    def reset_parameters(self, ema_model, model):
        ema_model.load_state_dict(model.state_dict())
        
        

class HeadAttention(nn.Module):
    def __init__(self, channels):
        super(HeadAttention, self).__init__()
        self.channels = channels

        self.mha = nn.MultiheadAttention(channels, 4, batch_first=True)
        self.ln = nn.LayerNorm([channels])
        self.ff_self = nn.Sequential(
            nn.LayerNorm([channels]),
            nn.Linear(channels, channels),
            nn.GELU(),
            nn.Linear(channels, channels),
        )

    def forward(self, x):
        h, w = x.shape[-2:]
        x = x.view(-1, self.channels, h * w).swapaxes(1, 2)
        x_ln = self.ln(x)
        attention_value, _ = self.mha(x_ln, x_ln, x_ln)
        attention_value = attention_value + x
        attention_value = self.ff_self(attention_value) + attention_value
        return attention_value.swapaxes(2, 1).view(-1, self.channels, h, w)
    


class SkipAttention(nn.Module):
    def __init__(self, f_in_g, f_in_x, f_out):
        super().__init__()
        
        self.w_g = nn.Sequential(
            nn.Conv2d(f_in_g, f_out, kernel_size = 1, stride = 1, padding = 0),
            nn.BatchNorm2d(f_out)
        )
        
        self.w_x = nn.Sequential(
            nn.Conv2d(f_in_x, f_out, kernel_size = 1, stride = 1, padding = 0),
            nn.BatchNorm2d(f_out)
        )

        # self.g_up = nn.ConvTranspose2d(f_out, f_out, 4, stride=2, padding=1)

        self.relu = nn.ReLU(inplace=True)
        
        self.psi = nn.Sequential(
            nn.Conv2d(f_out, 1, kernel_size = 1, stride = 1, padding = 0),
            nn.BatchNorm2d(1),
            nn.Sigmoid()
        )
         
    def forward(self, g, x):
        g1 = self.w_g(g)
        x1 = self.w_x(x)
        # g1 = self.g_up(g1)
        psi = self.relu(g1+x1)
        psi = self.psi(psi)
        return psi*x
    
    
    
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels, mid_channels=None, residual=False, emb_dim=512):
        super().__init__()
        self.residual = residual
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.GroupNorm(1, mid_channels),
            nn.GELU(),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.GroupNorm(1, out_channels),
        )
        
        self.emb_layer = nn.Linear(emb_dim, out_channels)

    def forward(self, x, t = None):
        if self.residual:
            x = F.gelu(x + self.double_conv(x))
        else:
            x = self.double_conv(x)
        if t is not None:
            emb = self.emb_layer(t)[:, :, None, None].repeat(1, 1, x.shape[-2], x.shape[-1])
            x = x + emb
        return x
    
        
class Down(nn.Module):
    def __init__(self, in_channels, out_channels, emb_dim=512):
        super().__init__()

        self.down = nn.Conv2d(in_channels, in_channels, 4, stride=2, padding=1)
        
        self.conv1 = DoubleConv(in_channels, in_channels, residual=True)
        self.conv2 = DoubleConv(in_channels, out_channels)

        self.emb_layer = nn.Linear(emb_dim, out_channels)

        self.head_attention = HeadAttention(out_channels)

    def forward(self, x, t):
        x = self.down(x)
        x = self.conv1(x)
        x = self.conv2(x)
        emb = self.emb_layer(t)[:, :, None, None].repeat(1, 1, x.shape[-2], x.shape[-1])
        x = x + emb
        x = self.head_attention(x)
        return x


class Up(nn.Module):
    def __init__(self, in_channels, out_channels, gated_attention, emb_dim=512):
        super().__init__()

        self.gated_attention = gated_attention

        self.attention = SkipAttention(in_channels // 2, in_channels // 2, in_channels // 2)

        self.conv1 = DoubleConv(in_channels, in_channels, residual=True)
        self.conv2 = DoubleConv(in_channels, out_channels)

        self.up = nn.ConvTranspose2d(out_channels, out_channels, 4, stride=2, padding=1)

        self.emb_layer = nn.Linear(emb_dim, out_channels)
        
        self.head_attention = HeadAttention(out_channels)
        

    def forward(self, x, skip_x, t):
        
        if self.gated_attention:
            skip_x = self.attention(g=x, x=skip_x)
        
        x = torch.cat([skip_x, x], dim=1)
        x = self.conv1(x)
        x = self.conv2(x)
        emb = self.emb_layer(t)[:, :, None, None].repeat(1, 1, x.shape[-2], x.shape[-1])
        x = x + emb
        x = self.head_attention(x)
        x = self.up(x)
        return x
    
    
class In(nn.Module):
    def __init__(self, out_channels, in_channels = 3, emb_dim=512):
        super().__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False),
            nn.GroupNorm(1, out_channels),
            nn.GELU(),
        )
        self.conv2 = DoubleConv(out_channels, out_channels, residual = True)

        self.emb_layer = nn.Linear(emb_dim, out_channels)

    def forward(self, x, t):
        x = self.conv1(x)
        x = self.conv2(x)
        emb = self.emb_layer(t)[:, :, None, None].repeat(1, 1, x.shape[-2], x.shape[-1])
        x = x + emb
        return x
    
    
class Out(nn.Module):
    def __init__(self, in_channels, gated_attention, out_channels = 3, emb_dim=512):
        super().__init__()
        
        self.gated_attention = gated_attention

        self.attention = SkipAttention(in_channels // 2, in_channels // 2, in_channels // 2)
        
        self.conv1 = nn.Sequential(
            DoubleConv(in_channels, in_channels, residual = True),
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=True),
        )

    def forward(self, x, skip_x):
        
        if self.gated_attention:
            skip_x = self.attention(g=x, x=skip_x)
        
        x = torch.cat([skip_x, x], dim=1)
        x = self.conv1(x)
        return x
    
    
class UNet(nn.Module):
    def __init__(self, device, gated_attention = False, c_in=3, c_out=3, time_dim=512):
        super().__init__()

        self.channels = [128 * k for k in range(1,5)]
        self.device = device
        self.time_dim = time_dim
        self.time_embed = nn.Sequential(
            nn.Linear(time_dim, time_dim),
            nn.GELU(),
            nn.Linear(time_dim, time_dim),
            nn.GELU(),
        )

        self.inc = In(self.channels[0])
        
        self.down1 = Down(self.channels[0], self.channels[1])
        self.down2 = Down(self.channels[1], self.channels[2])
        self.down3 = Down(self.channels[2], self.channels[3])

        self.bot1 = DoubleConv(self.channels[3], self.channels[3], residual = True)
        self.bot2 = DoubleConv(self.channels[3], self.channels[3], residual = True)
        self.bot3 = DoubleConv(self.channels[3], self.channels[3], residual = True)
        self.bot4 = DoubleConv(self.channels[3], self.channels[3], residual = True)
        self.bot5 = DoubleConv(self.channels[3], self.channels[3], residual = True)

        self.up1 = Up(2*self.channels[3], self.channels[2], gated_attention)
        self.up2 = Up(2*self.channels[2], self.channels[1], gated_attention)
        self.up3 = Up(2*self.channels[1], self.channels[0], gated_attention)

        self.outc = Out(2*self.channels[0], gated_attention)

    def pos_encoding(self, t, channels):
        inv_freq = 1.0 / (10000 ** (torch.arange(0, channels, 2, device=self.device).float() / channels))
        pos_enc_a = torch.sin(t.repeat(1, channels // 2) * inv_freq)
        pos_enc_b = torch.cos(t.repeat(1, channels // 2) * inv_freq)
        pos_enc = torch.cat([pos_enc_a, pos_enc_b], dim=-1)
        return pos_enc

    def forward(self, x, t):
        t = t.unsqueeze(-1).type(torch.float)
        t = self.pos_encoding(t, self.time_dim)
        t = self.time_embed(t)

        x1 = self.inc(x, t)
        x2 = self.down1(x1, t)
        x3 = self.down2(x2, t)
        x4 = self.down3(x3, t)

        x5 = self.bot1(x4, t = t)
        x5 = self.bot2(x5, t = t)
        x5 = self.bot3(x5, t = t)
        x5 = self.bot4(x5, t = t)
        x5 = self.bot5(x5, t = t)

        x = self.up1(x5, x4, t)
        x = self.up2(x, x3, t)
        x = self.up3(x, x2, t)
        x = self.outc(x, x1)
        
        return x

## Diffusion

In [10]:
class Diffusion:
    def __init__(self, config):
        self.img_size = config.generate_img_size
        self.noise_steps = config.noise_steps
        self.device = config.device
        self.img_size = config.generate_img_size

        self.beta = self.prepare_noise_schedule(schedule=config.schedule, noise_steps=self.noise_steps).to(self.device)
        self.alpha = 1. - self.beta
        self.alpha_hat = torch.cumprod(self.alpha, dim=0)

    @staticmethod
    def prepare_noise_schedule(schedule, noise_steps, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):

        if schedule == "linear":
            betas = torch.linspace(linear_start, linear_end, noise_steps)

        # https://github.com/lucidrains/denoising-diffusion-pytorch/blob/main/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py
        elif schedule == "cosine":
            timesteps = (torch.arange(noise_steps + 1) / noise_steps + cosine_s)
            alphas = timesteps / (1 + cosine_s) * math.pi / 2
            alphas = torch.cos(alphas).pow(2)
            alphas = alphas / alphas[0]
            betas = 1 - alphas[1:] / alphas[:-1]
            betas = betas.clamp(max=0.999)

        return betas

    def noise_images(self, x, t):
        sqrt_alpha_hat = torch.sqrt(self.alpha_hat[t])[:, None, None, None]
        sqrt_one_minus_alpha_hat = torch.sqrt(1 - self.alpha_hat[t])[:, None, None, None]
        Ɛ = torch.randn_like(x)
        return sqrt_alpha_hat * x + sqrt_one_minus_alpha_hat * Ɛ, Ɛ

    def sample_timesteps(self, n):
        return torch.randint(low=1, high=self.noise_steps, size=(n,))

    @torch.no_grad()
    def sample(self, title, model, n, epoch):

        model.eval()

        x = torch.randn((n, 3, self.img_size, self.img_size)).to(self.device)
        for i in reversed(range(1, self.noise_steps)):
            t = (torch.ones(n) * i).long().to(self.device)
            predicted_noise = model(x, t)
            alpha = self.alpha[t][:, None, None, None]
            alpha_hat = self.alpha_hat[t][:, None, None, None]
            beta = self.beta[t][:, None, None, None]
            if i > 1:
                noise = torch.randn_like(x)
            else:
                noise = torch.zeros_like(x)
            x = 1 / torch.sqrt(alpha) * (x - ((1 - alpha) / (torch.sqrt(1 - alpha_hat))) * predicted_noise) + torch.sqrt(beta) * noise
        
        x = (x.clamp(-1, 1) + 1) / 2
        x = (x * 255).type(torch.uint8)

        save_images(x, f"sample{epoch}_{title}.jpg")
        plot_images(x)
        
        model.train()
        
        return x


def train(config, path = None, scheduler_path = None):
    
    dataloader = get_data(config)
    print("loaded")
    
    model = UNet(config.device, config.gated_attention).to(config.device)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    
    optimizer = optim.AdamW(model.parameters(), lr=config.lr)
    mse = nn.MSELoss()

    if config.sched == "cycle" and scheduler_path == None:
        print("sch")
        scheduler = CycleScheduler(
            optimizer,
            config.lr,
            n_iter= len(dataloader) * config.epochs,
        )
    
    losses = []
    start_epoch = 0
    
    if path is not None:
        checkpoint = torch.load(path)
        model.load_state_dict(checkpoint['model_state_dict'])
        config_dict = checkpoint["config"]
        image_size = config.image_size
        config.__dict__.update(config_dict)
        config.image_size = image_size
        start_epoch = checkpoint['epoch']
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        losses = checkpoint['loss']
        if config.sched == "cycle" and scheduler_path is not None:
            with open(scheduler_path, "rb") as file:
                scheduler = pickle.load(file)
            scheduler.optimizer = optimizer
        
        steps_ema = len(dataloader)*start_epoch
        ema = EMA(0.995, steps_ema)
        ema_model = copy.deepcopy(model).eval().requires_grad_(False)
        ema_model.load_state_dict(checkpoint['ema_model_state_dict'])
        
#         start_epoch = 0
        
        del checkpoint
        del config_dict
        
    else:
        ema = EMA(0.995)
        ema_model = copy.deepcopy(model).eval().requires_grad_(False)
        
    
    diffusion = Diffusion(config)

 
    if config.mix_precision:
        scaler = GradScaler()

    for epoch in range(start_epoch+1, 101):
        
        epoch_loss = []
        
        model.train()

        for k, (images, _) in enumerate(tqdm(dataloader)):
            
            optimizer.zero_grad()

            images = images.to(config.device)
            
            if config.mix_precision:
                with autocast():
                    t = diffusion.sample_timesteps(images.shape[0]).to(config.device)
                    x_t, noise = diffusion.noise_images(images, t)
                    predicted_noise = model(x_t, t)
                    loss = mse(noise, predicted_noise)           
                scaler.scale(loss).backward()
                if config.sched == "cycle":
                    scheduler.step()
                scaler.step(optimizer)
                scaler.update()
                ema.step_ema(ema_model, model)
            else:
                t = diffusion.sample_timesteps(images.shape[0]).to(config.device)
                x_t, noise = diffusion.noise_images(images, t)
                predicted_noise = model(x_t, t)
                loss = mse(noise, predicted_noise)           
                loss.backward()
                if config.sched == "cycle":
                    scheduler.step()
                optimizer.step()
                ema.step_ema(ema_model, model)
            

            if k%100 == 0:
                print(f"e{epoch}  |  b{k}  |  MSE {loss.item()}  |  Lr {optimizer.param_groups[0]['lr']}  |  t {t}")
                
            epoch_loss.append(loss.item())
            
        epoch_loss = np.array(epoch_loss).mean()
        print(f"Epoch {epoch} loss: {epoch_loss}")
        losses.append(epoch_loss)

        if epoch%5 == 0:

            path = f'my_diff_ffhq_v5_e{str(epoch)}.pth'

            if torch.cuda.device_count() > 1:
                torch.save({
                        'epoch': epoch,
                        'model_state_dict': model.module.state_dict(),
                        'ema_model_state_dict': ema_model.module.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'loss': losses,
                        'config': config.__dict__
                        }, path)
            else:
                torch.save({
                        'epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'ema_model_state_dict': ema_model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'loss': losses,
                        'config': config.__dict__
                        }, path)
                
            if config.sched == "cycle":
                with open(f"diff_v5_ffhq_sch_e{str(epoch)}.pkl", "wb") as file:
                    pickle.dump(scheduler, file, -1)


    
class ModelConfig:
    def __init__(self, batch_size=16, image_size=64, epochs=1000, lr=1e-4, 
                 device = torch.device("cuda" if torch.cuda.is_available() else "cpu"),
                 mix_precision = True,
                 gated_attention = True,
                 schedule = 'linear',
                 noise_steps = 4000,
                 generate_img_size = 64,
                 sched = None,
                 dataset_path = "/kaggle/input/abstract-37k/Abstract-jpg/Train"):
        
        self.batch_size = batch_size
        self.image_size = image_size
        self.epochs = epochs
        self.lr = lr
        self.device = device
        self.mix_precision = mix_precision
        self.gated_attention = gated_attention
        self.schedule = schedule
        self.noise_steps = noise_steps
        self.generate_img_size = generate_img_size
        self.sched = sched
        self.dataset_path = dataset_path

def launch(path = None, sched_path = None):
    config = ModelConfig()
    train(config, path, sched_path)

In [11]:
net = UNet(device="cpu", gated_attention = True)
print(sum([p.numel() for p in net.parameters()]))

96854159


In [12]:
!nvidia-smi

Sat Jun  3 07:26:05 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P0    27W / 250W |      2MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [13]:
launch("/kaggle/input/transfer-learning-imagenv5-32-to-64/my_diff_ffhq_v5_e60.pth")

loaded


  0%|          | 0/1563 [00:00<?, ?it/s]

e61  |  b0  |  MSE 0.007896794937551022  |  Lr 0.0001  |  t tensor([ 753, 3636, 3222, 2634, 1880, 2211, 3497,  636, 1731,  120, 2033, 3366,
        3929, 3238, 1161, 2947], device='cuda:0')
e61  |  b100  |  MSE 0.0018035774119198322  |  Lr 0.0001  |  t tensor([2343, 2935, 1460, 2475, 1373, 3966,  323, 2333, 2608,  568, 3626, 3675,
        1774, 3983, 3836, 1649], device='cuda:0')
e61  |  b200  |  MSE 0.0016344188479706645  |  Lr 0.0001  |  t tensor([ 946, 3225, 2335, 1647, 2398, 3445, 1156, 1631, 1189,  989, 3712, 3379,
        1371, 3194, 1837, 3383], device='cuda:0')
e61  |  b300  |  MSE 0.0039538429118692875  |  Lr 0.0001  |  t tensor([2033, 3683, 3609, 3224, 2348, 2345, 3500, 2928, 2155, 3439,  437,  131,
        3821,  751, 3325, 3718], device='cuda:0')
e61  |  b400  |  MSE 0.00271174730733037  |  Lr 0.0001  |  t tensor([3099, 1316, 1023, 3270, 2112, 1770, 1214, 3219, 3667, 2908,  357, 2524,
        3801, 3311, 2533, 3181], device='cuda:0')
e61  |  b500  |  MSE 0.00446809688583016



e61  |  b1300  |  MSE 0.003912542946636677  |  Lr 0.0001  |  t tensor([2517,  618, 1333, 2788,  407,  283, 1369, 3165, 2013, 3431, 2730, 2363,
        1045, 1667,  709,  967], device='cuda:0')
e61  |  b1400  |  MSE 0.005728320684283972  |  Lr 0.0001  |  t tensor([2779, 2967, 3097, 1195,  345,  499, 1070, 1628, 2754, 2540, 3949, 3079,
         601, 1434, 1883,  502], device='cuda:0')
e61  |  b1500  |  MSE 0.000882578722666949  |  Lr 0.0001  |  t tensor([3098, 2998,  963, 1883, 3530, 3446,  470, 1681, 1295, 3581, 1592, 2306,
        3195, 3106, 2275, 3408], device='cuda:0')
Epoch 61 loss: 0.00927799785570334


  0%|          | 0/1563 [00:00<?, ?it/s]

e62  |  b0  |  MSE 0.01255347765982151  |  Lr 0.0001  |  t tensor([1437,  160,  719, 1531, 1884, 1003,  682,  297, 3524, 2695,   14,  227,
         707, 3678,  594, 2730], device='cuda:0')
e62  |  b100  |  MSE 0.0027005556039512157  |  Lr 0.0001  |  t tensor([2254, 1020, 1013, 3401, 3209, 2248, 2729,  784, 1250, 2146, 1691,  940,
        3626, 2505, 1355,  696], device='cuda:0')
e62  |  b200  |  MSE 0.009241020306944847  |  Lr 0.0001  |  t tensor([2418,  616, 3249,  150, 1899, 2043, 3929, 1580, 1833, 3387, 2192, 3297,
        2205,  349, 1642, 2603], device='cuda:0')
e62  |  b300  |  MSE 0.006329736672341824  |  Lr 0.0001  |  t tensor([3863, 3467, 1661, 2906, 3408, 3662, 3650, 1079, 2285,  575, 2345, 3840,
        3690, 2241, 2336,  227], device='cuda:0')
e62  |  b400  |  MSE 0.011751811020076275  |  Lr 0.0001  |  t tensor([ 692, 1446, 1325, 2977, 1444, 2883,  938, 1121, 1477, 2248,   53,  117,
        2150,  691,  602, 2657], device='cuda:0')
e62  |  b500  |  MSE 0.003659893525764346 

  0%|          | 0/1563 [00:00<?, ?it/s]

e63  |  b0  |  MSE 0.010279141366481781  |  Lr 0.0001  |  t tensor([3907,   64, 1445, 3636, 2293, 3802, 2889,  668, 3780, 1295, 2349,  759,
        1936,  940, 3086,  791], device='cuda:0')
e63  |  b100  |  MSE 0.0042689889669418335  |  Lr 0.0001  |  t tensor([2397, 1020,  940,  657, 3490, 2983, 3650, 2298, 3822,  739, 1615, 3411,
        3862, 3046, 1983,  984], device='cuda:0')
e63  |  b200  |  MSE 0.01271836832165718  |  Lr 0.0001  |  t tensor([1220, 3402, 1419, 2552,  376, 3803, 2215, 2518, 1277, 3006, 1845,  137,
        1536, 1321, 1531,  715], device='cuda:0')
e63  |  b300  |  MSE 0.010700351558625698  |  Lr 0.0001  |  t tensor([2681, 3302,  646, 3351, 1809, 3332, 3087,  529, 2840, 2837,  127, 3216,
        3959,  405, 1732, 3192], device='cuda:0')
e63  |  b400  |  MSE 0.007902582176029682  |  Lr 0.0001  |  t tensor([ 359, 3618, 1683, 1635, 3602, 1790, 2068, 1953,  646,  349, 2066,  427,
        1260, 1120, 3994,  846], device='cuda:0')
e63  |  b500  |  MSE 0.029775602743029594 

  0%|          | 0/1563 [00:00<?, ?it/s]

e64  |  b0  |  MSE 0.0018628414254635572  |  Lr 0.0001  |  t tensor([1072, 3767, 2139, 3403,  975, 3148, 1942, 1208,  781, 3430, 2889, 1590,
        3790, 3570, 1189, 3743], device='cuda:0')
e64  |  b100  |  MSE 0.010384950786828995  |  Lr 0.0001  |  t tensor([  77,  512, 1648, 1544, 1587,  991, 3660, 2387, 2882,  527, 1961, 1082,
        2118, 2548, 1761, 2868], device='cuda:0')
e64  |  b200  |  MSE 0.015464944764971733  |  Lr 0.0001  |  t tensor([2050, 1933,  582,   38, 1770, 2688, 1908,  480, 2246, 1675, 2540, 2319,
        1265,  889,  767, 2985], device='cuda:0')
e64  |  b300  |  MSE 0.010835211724042892  |  Lr 0.0001  |  t tensor([1432, 3951, 2093, 2272, 2986,  685,  479,  349,  492,  695,  900,  274,
        3504, 3514, 2068, 2736], device='cuda:0')
e64  |  b400  |  MSE 0.00898386538028717  |  Lr 0.0001  |  t tensor([3722, 3801, 2582,  991, 2287,  316, 2548,  780, 3931,  219, 3380,  519,
        1457, 1841,  242, 1356], device='cuda:0')
e64  |  b500  |  MSE 0.016377169638872147 

  0%|          | 0/1563 [00:00<?, ?it/s]

e65  |  b0  |  MSE 0.015175691805779934  |  Lr 0.0001  |  t tensor([3513, 1621, 1554,   62,  518,  276, 1721,  698, 3829,  262, 3716, 1523,
        3458, 2927,  285, 3240], device='cuda:0')
e65  |  b100  |  MSE 0.008394381031394005  |  Lr 0.0001  |  t tensor([1680, 1405, 2687, 2496, 3329, 1610,  678,  903, 2191, 3959,  100, 3586,
        3502, 3363,  607,  451], device='cuda:0')
e65  |  b200  |  MSE 0.006707099732011557  |  Lr 0.0001  |  t tensor([ 535, 3443,   89, 2919, 1062, 3623,  749, 3883, 3035,  671, 3363, 3079,
          89,   90, 3756, 1546], device='cuda:0')
e65  |  b300  |  MSE 0.004784927703440189  |  Lr 0.0001  |  t tensor([2264, 2635, 2278,  116, 1880, 2993, 3384, 2879, 1469, 3058, 1713,  604,
        1410, 3887, 1483,  257], device='cuda:0')
e65  |  b400  |  MSE 0.01470252312719822  |  Lr 0.0001  |  t tensor([3844, 2919, 2996,  136,  613, 3991,  422, 2031,  697, 1444, 3217, 1404,
         854,  312,   98, 2349], device='cuda:0')
e65  |  b500  |  MSE 0.02025887370109558  |

  0%|          | 0/1563 [00:00<?, ?it/s]

e66  |  b0  |  MSE 0.0014525478472933173  |  Lr 0.0001  |  t tensor([1431, 3625, 1230, 2418, 2088, 1526,  973, 1076, 1900, 1485, 3276, 1659,
         654, 2037, 3655, 1768], device='cuda:0')
e66  |  b100  |  MSE 0.000825114082545042  |  Lr 0.0001  |  t tensor([3180, 1709, 3276, 2588, 2801,  701, 2066, 1001, 3933, 1659, 3273, 3220,
        2066, 3048, 1404, 1765], device='cuda:0')
e66  |  b200  |  MSE 0.0048126839101314545  |  Lr 0.0001  |  t tensor([3143, 2236, 3439,  804, 3146, 3428, 2319,  855, 2028,  639, 3139,  540,
        3683, 3408, 2088, 1865], device='cuda:0')
e66  |  b300  |  MSE 0.03828620910644531  |  Lr 0.0001  |  t tensor([1072,  763, 1804, 2749,  169, 1052,  963, 1658, 2743, 2038,   15, 2805,
         456, 3254,  893, 2781], device='cuda:0')
e66  |  b400  |  MSE 0.0044965073466300964  |  Lr 0.0001  |  t tensor([3242, 1188, 3894, 3154, 3268, 1735, 1102, 1706, 1133, 2528, 1988, 1604,
         479, 3941, 2140, 3572], device='cuda:0')
e66  |  b500  |  MSE 0.00977821089327335

  0%|          | 0/1563 [00:00<?, ?it/s]

e67  |  b0  |  MSE 0.016644176095724106  |  Lr 0.0001  |  t tensor([2754, 2354,  504,  510, 2017,  350,  558, 2277, 1905,  631, 3526, 2768,
        3847,  156, 3520, 3565], device='cuda:0')
e67  |  b100  |  MSE 0.015881285071372986  |  Lr 0.0001  |  t tensor([ 830, 1760,  715, 1972, 3901, 1514,   85, 3739, 1474, 3576, 2864,   83,
        2430, 2312,  784, 3510], device='cuda:0')
e67  |  b200  |  MSE 0.004221536219120026  |  Lr 0.0001  |  t tensor([3271, 1932, 3435, 2162, 1527,  818, 2840,  120, 3478, 3475, 1609,  393,
        1011, 3312, 3221,  821], device='cuda:0')
e67  |  b300  |  MSE 0.018904294818639755  |  Lr 0.0001  |  t tensor([3076,  388, 1535, 1967, 1847, 2168, 2782, 3116, 1070, 1279,  118, 1114,
         341, 2716, 2752, 2424], device='cuda:0')
e67  |  b400  |  MSE 0.008449625223875046  |  Lr 0.0001  |  t tensor([1749, 3227,  465,  251, 3677, 2189, 1438, 3781, 2663, 1167,  692,  362,
        1786, 1376, 2730, 2473], device='cuda:0')
e67  |  b500  |  MSE 0.0019600531086325645

  0%|          | 0/1563 [00:00<?, ?it/s]

e68  |  b0  |  MSE 0.011317690834403038  |  Lr 0.0001  |  t tensor([2420,  277, 1995, 2519, 3364, 2029, 2246, 3955,  183, 1356,  141, 3217,
        3934, 1857, 1966, 3172], device='cuda:0')
e68  |  b100  |  MSE 0.0032770985271781683  |  Lr 0.0001  |  t tensor([2805, 3765, 2752, 2091,  359, 3480, 1943, 2782, 3901, 3856, 3283, 2350,
        3979, 2373,  194, 3862], device='cuda:0')
e68  |  b200  |  MSE 0.003499856684356928  |  Lr 0.0001  |  t tensor([ 559, 2936,  977, 2657, 2760, 1625, 3690, 3346,  995, 1323, 2305,  934,
         891,  549, 1335, 1185], device='cuda:0')
e68  |  b300  |  MSE 0.005621639546006918  |  Lr 0.0001  |  t tensor([3393,   77,  278, 3013, 3225, 2623, 1897, 1781, 3263, 3985, 2837,  389,
        3695, 2811, 2106,  274], device='cuda:0')
e68  |  b400  |  MSE 0.009593024849891663  |  Lr 0.0001  |  t tensor([3534, 1846, 2507, 2346, 1495, 1919,  401, 1010, 3831,  475, 3234,  852,
         274,  184, 2112, 2100], device='cuda:0')
e68  |  b500  |  MSE 0.008741249330341816

  0%|          | 0/1563 [00:00<?, ?it/s]

e69  |  b0  |  MSE 0.0020608173217624426  |  Lr 0.0001  |  t tensor([2163, 3332, 3831, 3778, 2256,  446, 3440, 1915, 3196,  432, 3299, 3023,
        1771, 3506, 2674, 2706], device='cuda:0')
e69  |  b100  |  MSE 0.029128756374120712  |  Lr 0.0001  |  t tensor([  37,  684,  160, 1892,  896, 3638, 2956,  835,  211, 1834, 3257,  443,
         833,   57, 3470, 3177], device='cuda:0')
e69  |  b200  |  MSE 0.007136043161153793  |  Lr 0.0001  |  t tensor([2034, 1131, 1832, 1829,  998, 3867, 2956, 1538, 1613,  244, 3004, 2488,
        3495,  356,  142, 3013], device='cuda:0')
e69  |  b300  |  MSE 0.010333264246582985  |  Lr 0.0001  |  t tensor([2926, 2476,  175, 2807, 3283, 1386, 1327, 3393, 1815,  315, 2904, 2601,
        2246, 3778, 1577, 2975], device='cuda:0')
e69  |  b400  |  MSE 0.005810359492897987  |  Lr 0.0001  |  t tensor([ 315, 3334,  550, 3562, 2171, 1379,  680, 3820, 2722, 3546, 3024, 1817,
        1820, 3067, 2219, 3659], device='cuda:0')
e69  |  b500  |  MSE 0.002177073154598474

  0%|          | 0/1563 [00:00<?, ?it/s]

e70  |  b0  |  MSE 0.0036910693161189556  |  Lr 0.0001  |  t tensor([2254, 1813, 3609, 2170, 3161, 3944, 1361,  859, 1606,  414,  688, 2862,
        2668, 3669,  655, 1287], device='cuda:0')
e70  |  b100  |  MSE 0.002375383861362934  |  Lr 0.0001  |  t tensor([2796, 2139, 3363, 1171, 1361, 1666,  492, 3238, 2579, 1019,  776,  428,
        1855, 2794, 3192, 3680], device='cuda:0')
e70  |  b200  |  MSE 0.00830244179815054  |  Lr 0.0001  |  t tensor([ 303, 1267, 2405, 3611, 1740,  911,  917,  422,  231,  712, 3229, 1447,
        1414, 1037, 3705, 2236], device='cuda:0')
e70  |  b300  |  MSE 0.009824501350522041  |  Lr 0.0001  |  t tensor([3116,  275, 3252, 2532,  253, 3456, 1042, 3491, 3611, 3054, 1201, 1578,
         946,  113, 2040, 3371], device='cuda:0')
e70  |  b400  |  MSE 0.014201484620571136  |  Lr 0.0001  |  t tensor([3809,  340, 2935, 1681, 1915, 3702,  388,  245, 3809,   67, 3543, 2219,
        2823, 2812, 3628, 1607], device='cuda:0')
e70  |  b500  |  MSE 0.009861210361123085 

  0%|          | 0/1563 [00:00<?, ?it/s]

e71  |  b0  |  MSE 0.004110411275178194  |  Lr 0.0001  |  t tensor([3543, 1757, 2755,  465,  419, 3369, 1985,  321, 1874, 3060, 2731, 1901,
        1110, 3438, 1209, 1286], device='cuda:0')
e71  |  b100  |  MSE 0.003251889254897833  |  Lr 0.0001  |  t tensor([2493, 3749, 2003,  321, 2518, 3405,  483,  251, 2015, 2833,  614, 2577,
        3392,  904, 1910, 2271], device='cuda:0')
e71  |  b200  |  MSE 0.024728357791900635  |  Lr 0.0001  |  t tensor([2597,  381, 2250, 2330, 3180, 3795,  270, 3893, 1481,  337, 3018,  586,
         766,  215, 1159,   14], device='cuda:0')
e71  |  b300  |  MSE 0.020701337605714798  |  Lr 0.0001  |  t tensor([3984,   11, 1612, 3712,   14,  535, 2005,  351, 1204, 2749, 2756,  289,
        1671, 3272, 3039, 2938], device='cuda:0')
e71  |  b400  |  MSE 0.009615420363843441  |  Lr 0.0001  |  t tensor([ 302, 3858,  441, 1250, 2383,  124, 1875, 1945, 1189, 3568, 2981, 3894,
         450, 3365, 3514, 3736], device='cuda:0')
e71  |  b500  |  MSE 0.007410803344100714 

  0%|          | 0/1563 [00:00<?, ?it/s]

e72  |  b0  |  MSE 0.004842184949666262  |  Lr 0.0001  |  t tensor([1708,  417, 3714,  552, 1844,  817, 1821,  511, 2984, 1527,  146, 2776,
        3628,  657, 3610, 1087], device='cuda:0')
e72  |  b100  |  MSE 0.004431618377566338  |  Lr 0.0001  |  t tensor([ 268, 3537, 1745, 2131,  516,  556, 1947, 2636,  821, 1333, 2554, 3463,
        1346,  254, 1247,  169], device='cuda:0')
e72  |  b200  |  MSE 0.026340564712882042  |  Lr 0.0001  |  t tensor([3214, 3899, 1865, 3774,   13,  800, 1392,   92, 1047, 2972,  649, 3173,
         465, 3092, 1353, 1845], device='cuda:0')
e72  |  b300  |  MSE 0.021116267889738083  |  Lr 0.0001  |  t tensor([2971,   82, 2281,  882, 3681, 3460, 3454,   76, 1706,  204, 1193,  770,
        3179, 3351, 2488,  694], device='cuda:0')
e72  |  b400  |  MSE 0.001758788013830781  |  Lr 0.0001  |  t tensor([2230, 3896,  664, 2590, 3527,  437, 1654, 3979, 3724, 1681, 2858, 1853,
        1205,  577, 2507, 2917], device='cuda:0')
e72  |  b500  |  MSE 0.012798432260751724 

  0%|          | 0/1563 [00:00<?, ?it/s]

e73  |  b0  |  MSE 0.008144961670041084  |  Lr 0.0001  |  t tensor([2580, 1331, 2140, 1684, 3218, 3176, 1211, 2838, 2184, 1795,  282, 2066,
         885, 2044, 1199, 2131], device='cuda:0')
e73  |  b100  |  MSE 0.003952345810830593  |  Lr 0.0001  |  t tensor([1303, 1873, 3493, 1494, 3608,  827, 1078, 3591,  665, 2832, 3599,  307,
         872, 3428, 2860, 2220], device='cuda:0')
e73  |  b200  |  MSE 0.006090778857469559  |  Lr 0.0001  |  t tensor([2366, 3536,  469, 1496, 3017, 1265,  472,  950,  979, 3468, 2357,  476,
        3160, 3010, 1497,  695], device='cuda:0')
e73  |  b300  |  MSE 0.004912906792014837  |  Lr 0.0001  |  t tensor([ 366,   72, 1871, 3361,  652, 3260, 2932, 1070, 1848, 1252, 3556, 1474,
         675, 3527,  169, 2627], device='cuda:0')
e73  |  b400  |  MSE 0.0034018405713140965  |  Lr 0.0001  |  t tensor([1661, 1951, 3467, 2810, 2040, 3313, 1358, 1320,  346, 2443, 3007, 1166,
        1388, 1708, 3338, 1384], device='cuda:0')
e73  |  b500  |  MSE 0.014093656092882156

  0%|          | 0/1563 [00:00<?, ?it/s]

e74  |  b0  |  MSE 0.011967701837420464  |  Lr 0.0001  |  t tensor([ 331,  135,  437, 3624,  715, 1034,  840, 3753,  133, 3924,  955, 1726,
        1629, 1630, 2182,  709], device='cuda:0')
e74  |  b100  |  MSE 0.0027417684905231  |  Lr 0.0001  |  t tensor([ 614, 1843,  508,  964, 1944, 1825, 1055, 2868, 2782, 1956, 3161, 1364,
        3547,  204,  752, 3549], device='cuda:0')
e74  |  b200  |  MSE 0.01355113834142685  |  Lr 0.0001  |  t tensor([ 585, 3084, 3702, 3411, 2290, 3558, 2456, 1805, 1473, 3816,  596,  834,
          60,  274, 3087, 3125], device='cuda:0')
e74  |  b300  |  MSE 0.007713520433753729  |  Lr 0.0001  |  t tensor([ 727,  219,  472, 3196, 2553, 1714,  927,  978, 3887,  970, 3043, 3686,
         523, 3165,  965,  550], device='cuda:0')
e74  |  b400  |  MSE 0.014678491279482841  |  Lr 0.0001  |  t tensor([3292, 3250, 2805, 3473, 3154, 1034,  333, 3407, 1301, 2703, 3687,   81,
        2052,  160, 1974, 2423], device='cuda:0')
e74  |  b500  |  MSE 0.01280561089515686  |  

  0%|          | 0/1563 [00:00<?, ?it/s]

e75  |  b0  |  MSE 0.0015210327692329884  |  Lr 0.0001  |  t tensor([1210, 2509, 2750, 1241, 3714, 3838, 3276, 1991, 1342, 1684, 3298, 3967,
        3319, 2779, 2631,  381], device='cuda:0')
e75  |  b100  |  MSE 0.02140127122402191  |  Lr 0.0001  |  t tensor([1593, 3067,  234, 3665, 3992, 3343,   52, 1375, 1454,   21,  231,  144,
          72, 3950, 2869, 3616], device='cuda:0')
e75  |  b200  |  MSE 0.009897314012050629  |  Lr 0.0001  |  t tensor([  20, 1131, 1387, 2337, 2955, 3671, 2875,  824, 3328, 1703, 3050,  551,
         853, 3119,  694,  164], device='cuda:0')
e75  |  b300  |  MSE 0.020513411611318588  |  Lr 0.0001  |  t tensor([1401,  495,   64, 1162,  220, 3245, 1362, 3903, 2549, 1395, 3758, 3127,
         984, 2600, 2851,  229], device='cuda:0')
e75  |  b400  |  MSE 0.001016087830066681  |  Lr 0.0001  |  t tensor([3165, 3945, 3098, 1340, 1962, 3714, 1587, 3254, 3099, 1745, 1589,  634,
        2707, 2944, 1183, 1856], device='cuda:0')
e75  |  b500  |  MSE 0.002456966321915388 

  0%|          | 0/1563 [00:00<?, ?it/s]

e76  |  b0  |  MSE 0.0018898805137723684  |  Lr 0.0001  |  t tensor([3275, 2454, 2598, 3268, 2606,  222,  825, 1455, 1182, 1394, 2291, 2689,
        2405, 2211, 3147, 1083], device='cuda:0')
e76  |  b100  |  MSE 0.02118457481265068  |  Lr 0.0001  |  t tensor([3479, 1815, 2528, 1564, 1384, 2439,  142,  939, 2279,  879,   35,  876,
        1796,  752, 3809,   99], device='cuda:0')
e76  |  b200  |  MSE 0.011812656186521053  |  Lr 0.0001  |  t tensor([ 582, 2631, 1127, 2913, 1957,  342,  499, 2717, 3943, 2043, 1519, 3909,
        2768,  156, 2941,  607], device='cuda:0')
e76  |  b300  |  MSE 0.01266107615083456  |  Lr 0.0001  |  t tensor([2770, 3405, 1888, 2344, 2723,  235,   59, 2234, 2418,  248,  980,  339,
         955, 3414, 2115,  244], device='cuda:0')
e76  |  b400  |  MSE 0.008253112435340881  |  Lr 0.0001  |  t tensor([1374,   17, 2831, 1826, 1267, 3136, 3980, 3364, 2874,  194,  990, 3045,
        1111, 3843, 1078, 2978], device='cuda:0')
e76  |  b500  |  MSE 0.0065626539289951324 

  0%|          | 0/1563 [00:00<?, ?it/s]

e77  |  b0  |  MSE 0.009546270594000816  |  Lr 0.0001  |  t tensor([ 450,  159, 3559,  519,   68,  415, 3997, 1116, 2718, 1685, 1247, 3135,
        1001, 1577, 1016, 1502], device='cuda:0')
e77  |  b100  |  MSE 0.02056252583861351  |  Lr 0.0001  |  t tensor([2371, 2537, 1567, 1641,  401,  573, 2016, 3475, 1870,  124,   42, 1160,
        2979, 2889, 1458, 1978], device='cuda:0')
e77  |  b200  |  MSE 0.005297699477523565  |  Lr 0.0001  |  t tensor([1862,  314, 3852, 1651,  508, 2876, 3372, 3909, 2332, 3689,  719, 1891,
        3782, 2596,   85,  214], device='cuda:0')
e77  |  b300  |  MSE 0.002473661210387945  |  Lr 0.0001  |  t tensor([1398,  613, 2872, 3888,  353, 3524, 1905, 1159, 3191, 3675, 3498, 3171,
        1650, 3023, 2514,  944], device='cuda:0')
e77  |  b400  |  MSE 0.012215232476592064  |  Lr 0.0001  |  t tensor([3431,  373, 1010, 2935, 1100,  119,  342, 1163, 2046, 2313,  348, 1484,
        3511, 1295,  487,  664], device='cuda:0')
e77  |  b500  |  MSE 0.0036872215569019318 

  0%|          | 0/1563 [00:00<?, ?it/s]

e78  |  b0  |  MSE 0.044630229473114014  |  Lr 0.0001  |  t tensor([ 222, 1124, 3673, 1972, 3359, 1295, 1621, 1017, 1243, 2787,   57, 1186,
        1074,   51, 2865,  883], device='cuda:0')
e78  |  b100  |  MSE 0.007916289381682873  |  Lr 0.0001  |  t tensor([2274, 2614, 1928, 3580,  710, 2664,  390,  947, 3285, 3680,  558,  171,
        3884,  949, 3313, 2435], device='cuda:0')
e78  |  b200  |  MSE 0.0018546230858191848  |  Lr 0.0001  |  t tensor([2362, 1875, 1960, 3583, 1226, 2998,  616, 2399, 1677, 3440, 3191, 3217,
        2207, 3689,  649, 3905], device='cuda:0')
e78  |  b300  |  MSE 0.002009500516578555  |  Lr 0.0001  |  t tensor([3925, 1327, 1135, 1031, 2388, 2208, 1865, 1907, 1208, 1979, 3608, 3397,
         383, 3385, 3166,  400], device='cuda:0')
e78  |  b400  |  MSE 0.004768741317093372  |  Lr 0.0001  |  t tensor([2090, 3516, 3079, 1053, 3162, 3656, 3527, 1461, 2716,  665,   41, 3159,
        1128, 1006,  560, 1145], device='cuda:0')
e78  |  b500  |  MSE 0.006402001716196537

  0%|          | 0/1563 [00:00<?, ?it/s]

e79  |  b0  |  MSE 0.007332965731620789  |  Lr 0.0001  |  t tensor([1519, 2257,  203, 2169,  389,  574,  449,  337,  187, 3655,  260, 3084,
        1479, 1401,  735, 2798], device='cuda:0')
e79  |  b100  |  MSE 0.009101773612201214  |  Lr 0.0001  |  t tensor([1317, 3065, 3342,  470, 3393, 1401, 2802,  345,  283,   76, 3855, 2314,
        3172, 3460,  501, 3224], device='cuda:0')
e79  |  b200  |  MSE 0.010883135721087456  |  Lr 0.0001  |  t tensor([1907, 1206,  656, 3765, 2311, 2270,  172, 2313,   96, 2745, 3854, 2736,
        2109, 1714,  995, 3548], device='cuda:0')
e79  |  b300  |  MSE 0.0178228672593832  |  Lr 0.0001  |  t tensor([2020, 3270,  666, 3700,  360, 1743, 2636, 3732,  728,    4, 3508, 2186,
        1166, 1012, 2634, 2312], device='cuda:0')
e79  |  b400  |  MSE 0.017980050295591354  |  Lr 0.0001  |  t tensor([ 187, 3374, 3844, 2308,  284,  558,  662, 2480, 1630,  696, 1669,   57,
        1408,   93, 2352, 3815], device='cuda:0')
e79  |  b500  |  MSE 0.007060730829834938  |

  0%|          | 0/1563 [00:00<?, ?it/s]

e80  |  b0  |  MSE 0.005771049298346043  |  Lr 0.0001  |  t tensor([1628, 2346,  676, 2564, 2103, 3888,  156, 1922, 2490, 1271, 3075, 1395,
        2828, 1600, 2510,  306], device='cuda:0')
e80  |  b100  |  MSE 0.0030494919046759605  |  Lr 0.0001  |  t tensor([1844, 1464, 2482,  631, 1532, 3410, 2234, 2033, 2466,  452, 2167,  446,
        1110, 3289, 2256, 3486], device='cuda:0')
e80  |  b200  |  MSE 0.005899528972804546  |  Lr 0.0001  |  t tensor([3501, 3092,  220, 2513, 2451, 1510, 1880, 3403,  406, 2503,  541,  300,
        1483, 1993, 2628,  515], device='cuda:0')
e80  |  b300  |  MSE 0.01601448468863964  |  Lr 0.0001  |  t tensor([ 173, 2215, 1261,  677, 2414, 3112,  212,  897, 3571,  478, 3737, 1856,
        2938,  247, 2317, 3112], device='cuda:0')
e80  |  b400  |  MSE 0.00286100129596889  |  Lr 0.0001  |  t tensor([3239, 2705, 3163,  762, 1901, 2065, 1631, 2974,  756, 3140,  720, 3748,
        1898, 3671, 3327, 1596], device='cuda:0')
e80  |  b500  |  MSE 0.022330395877361298  

  0%|          | 0/1563 [00:00<?, ?it/s]

e81  |  b0  |  MSE 0.007377504836767912  |  Lr 0.0001  |  t tensor([ 601,  679, 3784, 2375, 2440, 1680,  924, 1418,  243,  431, 2652, 1002,
        2951, 1771, 3329, 3727], device='cuda:0')
e81  |  b100  |  MSE 0.0011081609409302473  |  Lr 0.0001  |  t tensor([2942, 2444, 3430, 1341,  177, 2452, 1074,  556, 3204, 3510,  780, 2225,
        2645, 2780, 3799, 3967], device='cuda:0')
e81  |  b200  |  MSE 0.011382218450307846  |  Lr 0.0001  |  t tensor([2772, 3745, 3273,  869, 2705,   82, 2756, 1983, 1190, 1628, 3971, 1723,
        1405,  748,  537, 3408], device='cuda:0')
e81  |  b300  |  MSE 0.002840514061972499  |  Lr 0.0001  |  t tensor([1112, 1288, 1486, 3030, 1694, 1035,  345, 2581, 2477, 2522, 2165,  374,
        2031, 2846, 3607,  411], device='cuda:0')
e81  |  b400  |  MSE 0.013781117275357246  |  Lr 0.0001  |  t tensor([3738, 3009, 3423, 2137, 1517,  280, 2334, 3218,  769, 1808, 1523, 1642,
          68,  998, 3313, 3483], device='cuda:0')
e81  |  b500  |  MSE 0.00830017775297165 

  0%|          | 0/1563 [00:00<?, ?it/s]

e82  |  b0  |  MSE 0.0029584281146526337  |  Lr 0.0001  |  t tensor([1474, 2500, 3249, 3800, 1418, 1887, 2920, 1166, 2705, 3714, 1150, 3857,
        2236,  498, 2861,  331], device='cuda:0')
e82  |  b100  |  MSE 0.003578615840524435  |  Lr 0.0001  |  t tensor([3329,  668, 2226, 1096, 1532, 2490, 3499, 1390, 3445, 1960, 2522, 1620,
        3484, 2345, 1518,  530], device='cuda:0')
e82  |  b200  |  MSE 0.009611701592803001  |  Lr 0.0001  |  t tensor([ 201, 2324, 2968,  429, 3531, 1121,  344,  327,  762, 1979, 3893,  481,
        3038, 1166, 1843, 2849], device='cuda:0')
e82  |  b300  |  MSE 0.005109175108373165  |  Lr 0.0001  |  t tensor([3094,  493, 3113,  622, 1468, 3412,  176, 3809, 2158, 1028, 2485, 3444,
        2914,  710,   82, 2832], device='cuda:0')
e82  |  b400  |  MSE 0.00025102129438892007  |  Lr 0.0001  |  t tensor([3338, 3560, 2864, 3623, 2230, 3985, 2702, 2715, 3306, 1898, 3314, 1188,
        3694, 1551, 2342, 2434], device='cuda:0')
e82  |  b500  |  MSE 0.0089057153090834

  0%|          | 0/1563 [00:00<?, ?it/s]

e83  |  b0  |  MSE 0.03338342532515526  |  Lr 0.0001  |  t tensor([3845, 3602,  137,  996, 3308,  829, 2036, 2894,  594,  642, 3672,   42,
        2423, 3578,   11,   79], device='cuda:0')
e83  |  b100  |  MSE 0.0018181758932769299  |  Lr 0.0001  |  t tensor([1747, 2338, 1753, 3333, 2655,  110, 3851, 3468, 3090, 1970, 2768,  721,
        1375, 3624, 3643,  695], device='cuda:0')
e83  |  b200  |  MSE 0.000842287321574986  |  Lr 0.0001  |  t tensor([3474, 2238,  987, 2019, 3229, 2950,  443, 1607, 1556, 1691, 3094, 1060,
        1434, 3300, 2861, 3286], device='cuda:0')
e83  |  b300  |  MSE 0.0065669286996126175  |  Lr 0.0001  |  t tensor([1533,  482, 1431, 2142,  686, 2308,   50, 3220,  346, 1304,  952,  188,
        2476, 2697, 3087, 1039], device='cuda:0')
e83  |  b400  |  MSE 0.004109418019652367  |  Lr 0.0001  |  t tensor([3229,  407, 2820, 2756, 2988, 3107, 2543, 3935, 3665, 3377,  268, 3412,
        3148, 1084, 3359, 3876], device='cuda:0')
e83  |  b500  |  MSE 0.005133121274411678

  0%|          | 0/1563 [00:00<?, ?it/s]

e84  |  b0  |  MSE 0.02166840061545372  |  Lr 0.0001  |  t tensor([ 909, 1507, 3361,  136, 3291, 2840,  193, 2858, 2127, 1657, 2567, 3974,
        1395,  401,  818,  498], device='cuda:0')
e84  |  b100  |  MSE 0.0064058806747198105  |  Lr 0.0001  |  t tensor([1711, 1909,  565,  203, 1761, 3711, 3068, 1661, 3949,   10, 2388,  766,
        1154, 1858, 1790, 1617], device='cuda:0')
e84  |  b200  |  MSE 0.0013576308265328407  |  Lr 0.0001  |  t tensor([1533, 3919, 2960, 3884,  940, 3743, 2716, 3396,  634, 2917, 1899, 3206,
         978, 2981, 2311, 1579], device='cuda:0')
e84  |  b300  |  MSE 0.0022735577076673508  |  Lr 0.0001  |  t tensor([ 517, 2221, 2694, 3200, 2672, 1397,  877, 1615, 3268, 3936, 2474, 2461,
        3203, 2464, 1624, 3568], device='cuda:0')
e84  |  b400  |  MSE 0.004210805520415306  |  Lr 0.0001  |  t tensor([3030, 2054,  130, 2298,  338, 3740, 3613,  264, 1486, 2472, 3462, 3439,
        3550, 2670, 3107, 3571], device='cuda:0')
e84  |  b500  |  MSE 0.00895901490002870

  0%|          | 0/1563 [00:00<?, ?it/s]

e85  |  b0  |  MSE 0.006354810204356909  |  Lr 0.0001  |  t tensor([3418, 3439, 3785, 3320,  541, 1410,  168, 1872, 1520, 2289, 2900,  637,
        3808, 2849, 2184,   81], device='cuda:0')
e85  |  b100  |  MSE 0.00208780774846673  |  Lr 0.0001  |  t tensor([1218, 1278, 1778, 1039, 1523, 3732, 1773, 1171,  336, 3973, 3805,  573,
        1096, 2658, 2511, 1529], device='cuda:0')
e85  |  b200  |  MSE 0.010267859324812889  |  Lr 0.0001  |  t tensor([2301, 1021, 2547, 3487, 3984,  321, 3766, 3944, 1344, 2658, 3383, 2091,
         167, 1630, 3139, 2438], device='cuda:0')
e85  |  b300  |  MSE 0.002850446617230773  |  Lr 0.0001  |  t tensor([1753,  992, 1974,  846,  784, 1506,  726, 2144, 1236, 2700, 2506, 3537,
        2640, 3927, 2649, 3318], device='cuda:0')
e85  |  b400  |  MSE 0.006840936839580536  |  Lr 0.0001  |  t tensor([2260,  535, 2616,  444, 1187, 1142, 3907, 2823, 3975, 2079, 2472, 1157,
        1509, 3080, 3401, 3733], device='cuda:0')
e85  |  b500  |  MSE 0.008165635168552399  

  0%|          | 0/1563 [00:00<?, ?it/s]

e86  |  b0  |  MSE 0.007000168785452843  |  Lr 0.0001  |  t tensor([ 535,  482, 3896,  842, 2212, 3553, 1344, 1497, 2925, 1311, 3942, 2045,
        1796,  160, 1475, 2212], device='cuda:0')
e86  |  b100  |  MSE 0.0075984736904501915  |  Lr 0.0001  |  t tensor([ 339,  270, 1108, 3590, 3153,  539, 3918, 3989, 1200, 1916,  824, 1942,
        2132, 1497, 3630, 2563], device='cuda:0')
e86  |  b200  |  MSE 0.0032478095963597298  |  Lr 0.0001  |  t tensor([2211, 3075, 1736, 3963, 2767,  778, 1605, 3250, 1479, 2372,  341,  606,
        2863, 1154,  827, 1232], device='cuda:0')
e86  |  b300  |  MSE 0.009447479620575905  |  Lr 0.0001  |  t tensor([2961,  742, 2143, 2856, 2955, 3961,  358, 3541, 1201, 1411,  490, 3570,
        3732, 1002,  825,  396], device='cuda:0')
e86  |  b400  |  MSE 0.007451603189110756  |  Lr 0.0001  |  t tensor([ 339, 3424, 3372, 3015, 3852,  235, 3254,  871,  653, 1424, 3763, 3021,
        2765, 2879, 3524, 3076], device='cuda:0')
e86  |  b500  |  MSE 0.00229194457642734

  0%|          | 0/1563 [00:00<?, ?it/s]

e87  |  b0  |  MSE 0.005875870585441589  |  Lr 0.0001  |  t tensor([ 801, 2477,  188, 3688, 1246, 1795,   27, 1685, 3361,  842, 3536,  751,
        2217, 2486, 3519, 1099], device='cuda:0')
e87  |  b100  |  MSE 0.012404399923980236  |  Lr 0.0001  |  t tensor([ 145,  153, 2703,  341, 2215,  295, 1292, 2347, 1526, 2328, 3219, 1936,
         754, 1038, 3345, 1562], device='cuda:0')
e87  |  b200  |  MSE 0.005418609827756882  |  Lr 0.0001  |  t tensor([3176, 2496, 1483,  782,  772, 2169,  104, 2973, 2885,  842,  677, 2607,
        1904, 3893,  374, 2539], device='cuda:0')
e87  |  b300  |  MSE 0.02573990635573864  |  Lr 0.0001  |  t tensor([2160, 1472, 1472, 3753, 2899, 1718, 1901, 2183, 2237, 2854, 2317,   25,
        3225, 2123,  827, 2523], device='cuda:0')
e87  |  b400  |  MSE 0.030564017593860626  |  Lr 0.0001  |  t tensor([ 128,  202, 1441, 1634, 2509, 3353, 3147,  508,   16, 1446, 2937, 1781,
        2241, 2968, 2256, 3577], device='cuda:0')
e87  |  b500  |  MSE 0.012762396596372128  

  0%|          | 0/1563 [00:00<?, ?it/s]

e88  |  b0  |  MSE 0.006290986202657223  |  Lr 0.0001  |  t tensor([3519, 3171,  310,  907, 2974, 1720, 3083, 3454, 2977,  253, 3209, 2299,
        2182, 3323, 2420,  584], device='cuda:0')
e88  |  b100  |  MSE 0.0029861945658922195  |  Lr 0.0001  |  t tensor([1850, 1430, 1474, 1789, 1493, 1548, 1349, 1647,  909, 3050, 3372,  566,
        1626, 3151, 2346,  246], device='cuda:0')
e88  |  b200  |  MSE 0.006150887347757816  |  Lr 0.0001  |  t tensor([1445, 3744,  776, 2041, 2214, 2178, 3077, 1268,  202, 1590, 1280, 3092,
         786, 3489,  302, 1342], device='cuda:0')
e88  |  b300  |  MSE 0.0019267717143520713  |  Lr 0.0001  |  t tensor([ 928, 1803, 3728, 2538, 1170,  423, 2918,  953, 1151, 2188, 2020, 2016,
         961, 3853, 1442, 2706], device='cuda:0')
e88  |  b400  |  MSE 0.0057305870577692986  |  Lr 0.0001  |  t tensor([2376,  560, 3522, 2866, 3850, 1719,   16,  299, 3200, 2735, 2579, 3065,
        1769, 3928, 1149, 2080], device='cuda:0')
e88  |  b500  |  MSE 0.0040253857150673

  0%|          | 0/1563 [00:00<?, ?it/s]

e89  |  b0  |  MSE 0.004079979844391346  |  Lr 0.0001  |  t tensor([1859, 2097,  292,  643, 2028, 3970,  421, 1565, 2779, 1405, 2483, 1780,
        3906, 1871, 3100, 2508], device='cuda:0')
e89  |  b100  |  MSE 0.0038055027835071087  |  Lr 0.0001  |  t tensor([2493, 1894, 1229, 2531, 1549, 2369, 2729, 3456, 1222, 3145,  770, 1203,
         743, 3082,  543,  282], device='cuda:0')
e89  |  b200  |  MSE 0.002104851184412837  |  Lr 0.0001  |  t tensor([1286, 2961, 1733,  429,  443, 3812, 2416, 1008, 3245, 3376, 2882, 3828,
        1235, 3001, 1329, 1982], device='cuda:0')
e89  |  b300  |  MSE 0.0022865296341478825  |  Lr 0.0001  |  t tensor([2951, 3644, 2046,  388,  283, 3466, 3656, 2185,  739, 2720, 3380, 2085,
        2131, 1291, 3020, 3345], device='cuda:0')
e89  |  b400  |  MSE 0.04388146847486496  |  Lr 0.0001  |  t tensor([1826, 3247, 2235,  225, 3588, 3323,    2,  352, 2366, 1787, 2610,   18,
         774, 3201, 2977, 3498], device='cuda:0')
e89  |  b500  |  MSE 0.000671580317430198

  0%|          | 0/1563 [00:00<?, ?it/s]

e90  |  b0  |  MSE 0.02461874857544899  |  Lr 0.0001  |  t tensor([ 744, 1810, 1675,  528,   62, 2713, 1222, 3949, 1928, 2839, 2582, 2867,
        1358,  141, 1783, 2018], device='cuda:0')
e90  |  b100  |  MSE 0.00920550525188446  |  Lr 0.0001  |  t tensor([2954, 2312, 3683,  846, 1993,  738, 3339, 3020, 2982, 1405, 2968, 1338,
        2457, 2428,   63, 1885], device='cuda:0')
e90  |  b200  |  MSE 0.006081167608499527  |  Lr 0.0001  |  t tensor([3449, 1789, 1047, 3097, 1942, 3811, 2713,  406, 3707,  440, 1978, 1720,
         403, 1264, 2097,  286], device='cuda:0')
e90  |  b300  |  MSE 0.0009933309629559517  |  Lr 0.0001  |  t tensor([2469, 3980, 1142, 2786, 1275, 1950, 2207, 2513,  594, 1746, 2949, 2599,
        1142, 2627, 2691, 3762], device='cuda:0')
e90  |  b400  |  MSE 0.016989953815937042  |  Lr 0.0001  |  t tensor([1363, 2353, 2537, 3901, 3561, 2624,   39,  165, 3236, 1771, 2966, 1123,
        2515,  302, 1891, 2877], device='cuda:0')
e90  |  b500  |  MSE 0.003408792894333601  

  0%|          | 0/1563 [00:00<?, ?it/s]

e91  |  b0  |  MSE 0.005918923765420914  |  Lr 0.0001  |  t tensor([1638, 3399, 3250, 1596, 3224, 1732, 3048,  702, 1883,  699,  197, 3417,
        1141, 2926,  428, 3199], device='cuda:0')
e91  |  b100  |  MSE 0.0035700525622814894  |  Lr 0.0001  |  t tensor([ 591, 1418, 3982,  601,  718, 3954, 3483,  682, 2104, 2609, 2257, 2893,
        2284, 3834,  283, 3113], device='cuda:0')
e91  |  b200  |  MSE 0.036886077374219894  |  Lr 0.0001  |  t tensor([3045, 2968,  385, 3895, 2997,    3, 2417, 3485, 3100,  502, 2329, 1408,
        1617,   18, 2922, 2519], device='cuda:0')
e91  |  b300  |  MSE 0.002930180635303259  |  Lr 0.0001  |  t tensor([3806, 3619, 1785, 2421, 2868, 3379,  492, 1153, 3638, 1269, 2789,  763,
        2546, 2709, 3055, 3810], device='cuda:0')
e91  |  b400  |  MSE 0.011441554874181747  |  Lr 0.0001  |  t tensor([ 401, 1720,  147, 1447, 3916, 1772, 2019, 1379,  914,  950, 1940, 2408,
        3246,  231, 1801, 3164], device='cuda:0')
e91  |  b500  |  MSE 0.006417015567421913

  0%|          | 0/1563 [00:00<?, ?it/s]

e92  |  b0  |  MSE 0.004496071487665176  |  Lr 0.0001  |  t tensor([3571, 1863,  332,  684,  875, 3699, 1258,  758,  159, 2154, 1137, 3003,
        2651, 1817, 2213, 2741], device='cuda:0')
e92  |  b100  |  MSE 0.003243108280003071  |  Lr 0.0001  |  t tensor([3604, 3279, 2159, 1799, 2482, 2229, 3777, 2688, 1635,  860,  160, 3699,
        3506, 3852, 2420, 3734], device='cuda:0')
e92  |  b200  |  MSE 0.02440624311566353  |  Lr 0.0001  |  t tensor([3947,  999, 2491, 1007, 3188, 2057,  424,   93, 2154,   30, 1266,  260,
        1964, 2375, 1416, 2761], device='cuda:0')
e92  |  b300  |  MSE 0.003881265176460147  |  Lr 0.0001  |  t tensor([ 769,  917, 2823, 1740, 3193,  948, 1909, 1846, 3203, 3364,   44, 1807,
        3171, 3588,  806, 3435], device='cuda:0')
e92  |  b400  |  MSE 0.007900964468717575  |  Lr 0.0001  |  t tensor([2799,  244, 2301, 1677, 1665, 3064, 1705,  557,  619,  990,  868, 3424,
        1238, 2255,  456, 3556], device='cuda:0')
e92  |  b500  |  MSE 0.006769093684852123  

  0%|          | 0/1563 [00:00<?, ?it/s]

e93  |  b0  |  MSE 0.003605425823479891  |  Lr 0.0001  |  t tensor([2798, 2605, 3689,  251, 1742, 2341, 1903, 3004,  332, 1956,  531, 3276,
         660, 3419, 2442, 3332], device='cuda:0')
e93  |  b100  |  MSE 0.008546467870473862  |  Lr 0.0001  |  t tensor([3712, 1774, 3096, 1587, 2435, 1069, 1208, 1839, 2236, 3023, 2519,  685,
         517, 2615, 2837,   50], device='cuda:0')
e93  |  b200  |  MSE 0.0038458541966974735  |  Lr 0.0001  |  t tensor([ 381, 2211,  654, 1546, 3691, 1464, 2315,  585, 2734, 1029, 3134, 2824,
        3657, 2778, 3825, 2526], device='cuda:0')
e93  |  b300  |  MSE 0.008935585618019104  |  Lr 0.0001  |  t tensor([ 700, 3468,  322,  980,  660,  301,  704, 2499, 1478, 2058,  200, 1651,
         154,  294, 1867, 2345], device='cuda:0')
e93  |  b400  |  MSE 0.017265893518924713  |  Lr 0.0001  |  t tensor([3147, 3224,  453,  104,  257, 2772, 1213, 3223, 2480, 1712, 3741, 3846,
         374,  929, 1597, 3899], device='cuda:0')
e93  |  b500  |  MSE 0.007575224153697491

  0%|          | 0/1563 [00:00<?, ?it/s]

e94  |  b0  |  MSE 0.008865360170602798  |  Lr 0.0001  |  t tensor([3003, 3098, 1159, 3070,  387,  894, 1933, 3262, 1900, 3413, 1548, 3538,
         229, 3212, 3779,   35], device='cuda:0')
e94  |  b100  |  MSE 0.0013072158908471465  |  Lr 0.0001  |  t tensor([3553, 3115, 3282, 2567, 1043, 1461, 2872, 3145, 1808,  362, 2640, 2528,
        2151, 3783, 2426,  816], device='cuda:0')
e94  |  b200  |  MSE 0.006951179355382919  |  Lr 0.0001  |  t tensor([1204,  871,  258, 1842, 2452, 2324, 1149,  557, 3325, 3158, 1267, 1908,
        2209, 3358, 3851, 2971], device='cuda:0')
e94  |  b300  |  MSE 0.004255197010934353  |  Lr 0.0001  |  t tensor([ 711,  370, 1545, 3311,  575, 2472, 3442,  801, 2839, 3286,  835, 3948,
        2204,  561, 2082, 1336], device='cuda:0')
e94  |  b400  |  MSE 0.005329688545316458  |  Lr 0.0001  |  t tensor([3944, 1637, 1490, 2498, 1549,  571, 2976, 1546, 1008,  468, 2346,  402,
        3800, 1480, 1190, 1127], device='cuda:0')
e94  |  b500  |  MSE 0.013109346851706505

  0%|          | 0/1563 [00:00<?, ?it/s]

e95  |  b0  |  MSE 0.006365802139043808  |  Lr 0.0001  |  t tensor([3282, 3954, 1414, 2937,  777, 3127, 3626, 2341, 2701,  386,  802,  725,
        1562, 1598, 3477,  298], device='cuda:0')
e95  |  b100  |  MSE 0.004973389208316803  |  Lr 0.0001  |  t tensor([1804, 2198,  192, 3718, 2777, 3634, 2267, 1713, 3921, 2464, 2127, 2545,
         342, 1283, 1753, 2546], device='cuda:0')
e95  |  b200  |  MSE 0.012833843939006329  |  Lr 0.0001  |  t tensor([2296, 1060, 1455,  543, 1141, 1068, 3245, 2265, 1233, 2612, 1930, 2808,
         985, 2211,   22, 3901], device='cuda:0')
e95  |  b300  |  MSE 0.004625878296792507  |  Lr 0.0001  |  t tensor([1462, 1242, 1346, 2531, 3802, 1481, 2305, 1348,  361, 2903, 3444,  745,
        1188, 2105, 2364, 3780], device='cuda:0')
e95  |  b400  |  MSE 0.013693375512957573  |  Lr 0.0001  |  t tensor([ 581,  141,  420, 2994,  356,  971,  323,  730, 1444, 3041, 2922, 3677,
        2400, 1399,  437, 3458], device='cuda:0')
e95  |  b500  |  MSE 0.00831291452050209  

  0%|          | 0/1563 [00:00<?, ?it/s]

e96  |  b0  |  MSE 0.0037240139208734035  |  Lr 0.0001  |  t tensor([3027, 2656, 1347, 3915,   58, 1296, 3951,  805,  740, 3382,  542,  927,
         560, 3926, 3087,  677], device='cuda:0')
e96  |  b100  |  MSE 0.0008148247725330293  |  Lr 0.0001  |  t tensor([3624, 1969, 1882, 2719,  835, 3407, 1711, 1730, 3035, 1677, 1875, 2105,
        2953, 2256, 1672, 1294], device='cuda:0')
e96  |  b200  |  MSE 0.005672580562531948  |  Lr 0.0001  |  t tensor([2346, 1093,  513, 3659,  609, 2495,  336,  887, 2553, 3449, 3717,  404,
        2686,  732,  723, 3371], device='cuda:0')
e96  |  b300  |  MSE 0.011558289639651775  |  Lr 0.0001  |  t tensor([1737,  273, 1675, 1251, 3349, 2098, 1202,  122, 1145,  899,  126, 3870,
         923,  311, 2426, 2088], device='cuda:0')
e96  |  b400  |  MSE 0.029237043112516403  |  Lr 0.0001  |  t tensor([ 587, 2952, 2066, 2094, 1354, 2326,  121, 2064,   51, 3375, 2495,  542,
        3074,  653, 2525, 1563], device='cuda:0')
e96  |  b500  |  MSE 0.00321779446676373

  0%|          | 0/1563 [00:00<?, ?it/s]

e97  |  b0  |  MSE 0.006377407815307379  |  Lr 0.0001  |  t tensor([1084,  893, 3718, 1027,  356, 1043,  745, 3199,  214, 1501, 3868, 1148,
         242, 3624, 2765, 2405], device='cuda:0')
e97  |  b100  |  MSE 0.008478634059429169  |  Lr 0.0001  |  t tensor([2619,  329, 3279, 2077, 2560, 3310, 3824, 3437,   52, 1969, 2017,  498,
        2925, 1851, 1175, 2361], device='cuda:0')
e97  |  b200  |  MSE 0.01244016271084547  |  Lr 0.0001  |  t tensor([3938, 1148, 2892,  806, 1376,  426,   41, 3266, 3986, 1900, 3497,  723,
        2164, 3475, 1096, 3198], device='cuda:0')
e97  |  b300  |  MSE 0.005363688804209232  |  Lr 0.0001  |  t tensor([2029, 2844, 1076, 3766, 1458, 2386,  108, 2930, 3875, 3942, 2909, 1403,
        2191, 1826, 2272, 2005], device='cuda:0')
e97  |  b400  |  MSE 0.014290621504187584  |  Lr 0.0001  |  t tensor([2454, 3782, 3932, 3588, 2904,  118, 3708, 3033,  935, 3316, 2349,   53,
         834, 2556, 3270, 2009], device='cuda:0')
e97  |  b500  |  MSE 0.008765826001763344  

  0%|          | 0/1563 [00:00<?, ?it/s]

e98  |  b0  |  MSE 0.007064541801810265  |  Lr 0.0001  |  t tensor([3042, 2681,   38,  685, 1898,  541, 3556, 3320, 3813, 3912, 1486,  691,
        1868,  181, 3480, 3569], device='cuda:0')
e98  |  b100  |  MSE 0.008807527832686901  |  Lr 0.0001  |  t tensor([ 428,  718,  342, 1440, 1444, 2846,  724, 3500, 3231,  882, 2013,  872,
        3683, 2195, 3730, 2359], device='cuda:0')
e98  |  b200  |  MSE 0.008840812370181084  |  Lr 0.0001  |  t tensor([2091,   27, 2723, 2110,  554, 1094,  400, 2103,  837, 1805, 2060,  393,
         661, 2137, 3987, 1944], device='cuda:0')
e98  |  b300  |  MSE 0.02037571370601654  |  Lr 0.0001  |  t tensor([ 429,  535,  337,   59, 2984,  851, 2655, 2904,  939,  184, 2184, 1877,
        2354, 2154, 1742, 1466], device='cuda:0')
e98  |  b400  |  MSE 0.008523145690560341  |  Lr 0.0001  |  t tensor([2603, 3707,  958, 1451, 1638,   43, 2244, 3648, 3560, 2680, 2795, 1279,
        1642, 1980, 1178, 3001], device='cuda:0')
e98  |  b500  |  MSE 0.013119678013026714  

  0%|          | 0/1563 [00:00<?, ?it/s]

e99  |  b0  |  MSE 0.025882786139845848  |  Lr 0.0001  |  t tensor([3678,  361, 3577,    5, 1406, 1297, 3983, 3330, 3929,  264, 1948, 3044,
        2979, 3636, 3817,  459], device='cuda:0')
e99  |  b100  |  MSE 0.009701849892735481  |  Lr 0.0001  |  t tensor([ 478, 2676, 3734, 2458, 3907, 3495, 3850,  903,   91,  653,  586, 1872,
        3623, 3091, 2571, 1908], device='cuda:0')
e99  |  b200  |  MSE 0.0030473018996417522  |  Lr 0.0001  |  t tensor([ 667, 2298, 1877, 1896, 1456, 3117,  468, 2579, 2611, 3361, 1360, 1565,
        3963, 1316, 2606, 1186], device='cuda:0')
e99  |  b300  |  MSE 0.009228190407156944  |  Lr 0.0001  |  t tensor([3695, 2265,  777, 3966, 3327, 1084, 1578, 2700,   64, 3297,  885, 1405,
        3261, 3482, 1675,  734], device='cuda:0')
e99  |  b400  |  MSE 0.004205795936286449  |  Lr 0.0001  |  t tensor([1817, 3333, 1195, 1272, 2908, 2853, 3935,  541, 3018, 1000, 2343, 1812,
        3593,  891,  762, 1906], device='cuda:0')
e99  |  b500  |  MSE 0.003315946087241173

  0%|          | 0/1563 [00:00<?, ?it/s]

e100  |  b0  |  MSE 0.029378741979599  |  Lr 0.0001  |  t tensor([2738, 1478,    9, 1279,  495, 1287, 3470, 2265, 2381, 3336, 3912,   74,
        1564, 2191, 2896,  353], device='cuda:0')
e100  |  b100  |  MSE 0.0019313896773383021  |  Lr 0.0001  |  t tensor([2547,  333, 1515,  579,  939, 1537, 1625, 3585, 2237, 1523, 2916, 2200,
        3435, 2845, 2790, 2223], device='cuda:0')
e100  |  b200  |  MSE 0.041563086211681366  |  Lr 0.0001  |  t tensor([2598,  518, 3477, 1225,   69,  404,   75, 3493, 1606, 3931, 3221,   18,
        3022, 2059,   10,   66], device='cuda:0')
e100  |  b300  |  MSE 0.017618104815483093  |  Lr 0.0001  |  t tensor([2132, 2456, 1848,  693, 2328,   38, 2606, 2990,  238, 1677, 1270, 2113,
        2986,   12, 2635, 1099], device='cuda:0')
e100  |  b400  |  MSE 0.001118199434131384  |  Lr 0.0001  |  t tensor([1535, 2078, 2558, 2932, 1058, 2007, 2832,  754, 2528, 3831,  640, 2708,
         737, 2535, 3419,  761], device='cuda:0')
e100  |  b500  |  MSE 0.004221659153699