In [1]:
import copy
import random
from functools import wraps

import torch
from torch import nn
import torch.nn.functional as F
import torch.distributed as dist

from tqdm import tqdm
from torchvision import transforms as T

# Loss fn in BYOL paper
def loss_fn(x, y):
    x = F.normalize(x, dim=-1, p=2)
    y = F.normalize(y, dim=-1, p=2)
    return 2 - 2 * (x * y).sum(dim=-1)

In [2]:
resnet_path = './style-resnet-ds-norm.pt'
learner_path = './style-learner-net-ds-norm.pt'

In [3]:
class RandomApply(nn.Module):
    def __init__(self, fn, p):
        super().__init__()
        self.fn = fn
        self.p = p
    def forward(self, x):
        if random.random() > self.p:
            return x
        return self.fn(x)

In [4]:
def default(val, def_val):
    return def_val if val is None else val

def flatten(t):
    return t.reshape(t.shape[0], -1)

def singleton(cache_key):
    def inner_fn(fn):
        @wraps(fn)
        def wrapper(self, *args, **kwargs):
            instance = getattr(self, cache_key)
            if instance is not None:
                return instance

            instance = fn(self, *args, **kwargs)
            setattr(self, cache_key, instance)
            return instance
        return wrapper
    return inner_fn

def get_module_device(module):
    return next(module.parameters()).device

def set_requires_grad(model, val):
    for p in model.parameters():
        p.requires_grad = val

def MaybeSyncBatchnorm(is_distributed = None):
    is_distributed = default(is_distributed, dist.is_initialized() and dist.get_world_size() > 1)
    return nn.SyncBatchNorm if is_distributed else nn.BatchNorm1d

# loss fn

# augmentation utils

class RandomApply(nn.Module):
    def __init__(self, fn, p):
        super().__init__()
        self.fn = fn
        self.p = p
    def forward(self, x):
        if random.random() > self.p:
            return x
        return self.fn(x)

# exponential moving average

class EMA():
    def __init__(self, beta):
        super().__init__()
        self.beta = beta

    def update_average(self, old, new):
        if old is None:
            return new
        return old * self.beta + (1 - self.beta) * new

def update_moving_average(ema_updater, ma_model, current_model):
    for current_params, ma_params in zip(current_model.parameters(), ma_model.parameters()):
        old_weight, up_weight = ma_params.data, current_params.data
        ma_params.data = ema_updater.update_average(old_weight, up_weight)

# MLP class for projector and predictor

def MLP(dim, projection_size, hidden_size=4096, sync_batchnorm=None):
    return nn.Sequential(
        nn.Linear(dim, hidden_size),
        MaybeSyncBatchnorm(sync_batchnorm)(hidden_size),
        nn.ReLU(inplace=True),
        nn.Linear(hidden_size, projection_size)
    )

def SimSiamMLP(dim, projection_size, hidden_size=4096, sync_batchnorm=None):
    return nn.Sequential(
        nn.Linear(dim, hidden_size, bias=False),
        MaybeSyncBatchnorm(sync_batchnorm)(hidden_size),
        nn.ReLU(inplace=True),
        nn.Linear(hidden_size, hidden_size, bias=False),
        MaybeSyncBatchnorm(sync_batchnorm)(hidden_size),
        nn.ReLU(inplace=True),
        nn.Linear(hidden_size, projection_size, bias=False),
        MaybeSyncBatchnorm(sync_batchnorm)(projection_size, affine=False)
    )

# a wrapper class for the base neural network
# will manage the interception of the hidden layer output
# and pipe it into the projecter and predictor nets

class NetWrapper(nn.Module):
    def __init__(self, net, projection_size, projection_hidden_size, layer = -2, use_simsiam_mlp = False, sync_batchnorm = None):
        super().__init__()
        self.net = net
        self.layer = layer

        self.projector = None
        self.projection_size = projection_size
        self.projection_hidden_size = projection_hidden_size

        self.use_simsiam_mlp = use_simsiam_mlp
        self.sync_batchnorm = sync_batchnorm

        self.hidden = {}
        self.hook_registered = False

    def _find_layer(self):
        if type(self.layer) == str:
            modules = dict([*self.net.named_modules()])
            return modules.get(self.layer, None)
        elif type(self.layer) == int:
            children = [*self.net.children()]
            return children[self.layer]
        return None

    def _hook(self, _, input, output):
        device = input[0].device
        self.hidden[device] = flatten(output)

    def _register_hook(self):
        layer = self._find_layer()
        assert layer is not None, f'hidden layer ({self.layer}) not found'
        handle = layer.register_forward_hook(self._hook)
        self.hook_registered = True

    @singleton('projector')
    def _get_projector(self, hidden):
        _, dim = hidden.shape
        create_mlp_fn = MLP if not self.use_simsiam_mlp else SimSiamMLP
        projector = create_mlp_fn(dim, self.projection_size, self.projection_hidden_size, sync_batchnorm = self.sync_batchnorm)
        return projector.to(hidden)

    def get_representation(self, x):
        if self.layer == -1:
            return self.net(x)

        if not self.hook_registered:
            self._register_hook()

        self.hidden.clear()
        _ = self.net(x)
        hidden = self.hidden[x.device]
        self.hidden.clear()

        assert hidden is not None, f'hidden layer {self.layer} never emitted an output'
        return hidden

    def forward(self, x, return_projection = True):
        representation = self.get_representation(x)

        if not return_projection:
            return representation

        projector = self._get_projector(representation)
        projection = projector(representation)
        return projection, representation


In [5]:
image_size = 256
DEFAULT_AUG = torch.nn.Sequential(
            RandomApply(
                T.ColorJitter(0.8, 0.8, 0.8, 0.2),
                p = 0.3
            ),
            T.RandomGrayscale(p=0.2),
            T.RandomHorizontalFlip(),
            RandomApply(
                T.GaussianBlur((3, 3), (1.0, 2.0)),
                p = 0.2
            ),
            T.RandomResizedCrop((image_size, image_size)),
            T.Normalize(
                mean=torch.tensor([0.485, 0.456, 0.406]),
                std=torch.tensor([0.229, 0.224, 0.225])),
        )

In [6]:
def loss_fn(x1, x2, temperature=0.7):
    # Normalize input embeddings
    x1_normalized = F.normalize(x1, dim=-1)
    x2_normalized = F.normalize(x2, dim=-1)
    
    # Calculate cosine similarity matrix
    similarity_matrix = torch.matmul(x1_normalized, x2_normalized.T)
    
    # Calculate logits
    logits = similarity_matrix / temperature
    
    # Calculate diagonal terms for positive samples
    diag_terms = torch.diag(logits)
    
    # Calculate numerator (positive term)
    numerator = torch.exp(diag_terms / temperature)
    
    # Calculate denominator (positive and negative terms)
    denominator = torch.sum(torch.exp(logits), dim=1) + torch.exp(diag_terms / temperature)
    
    # Calculate loss
    loss = -torch.mean(torch.log(numerator / denominator))
    
    return loss


In [7]:
class BYOL(nn.Module):
    def __init__(
        self,
        net,
        image_size,
        hidden_layer = -2,
        projection_size = 256,
        projection_hidden_size = 4096,
        augment_fn = None,
        augment_fn2 = None,
        moving_average_decay = 0.99,
        use_momentum = True,
        sync_batchnorm = None
    ):
        super().__init__()
        self.net = net

        # default SimCLR augmentation

        DEFAULT_AUG = torch.nn.Sequential(
            RandomApply(
                T.ColorJitter(0.8, 0.8, 0.8, 0.2),
                p = 0.3
            ),
            T.RandomGrayscale(p=0.2),
            T.RandomHorizontalFlip(),
            RandomApply(
                T.GaussianBlur((3, 3), (1.0, 2.0)),
                p = 0.2
            ),
            T.RandomResizedCrop((image_size, image_size)),
            T.Normalize(
                mean=torch.tensor([0.485, 0.456, 0.406]),
                std=torch.tensor([0.229, 0.224, 0.225])),
        )

        self.augment1 = default(augment_fn, DEFAULT_AUG)
        self.augment2 = default(augment_fn2, self.augment1)

        self.online_encoder = NetWrapper(
            net,
            projection_size,
            projection_hidden_size,
            layer = hidden_layer,
            use_simsiam_mlp = not use_momentum,
            sync_batchnorm = sync_batchnorm
        )

        self.use_momentum = use_momentum
        self.target_encoder = None
        self.target_ema_updater = EMA(moving_average_decay)

        self.online_predictor = MLP(projection_size, projection_size, projection_hidden_size)

        # get device of network and make wrapper same device
        device = get_module_device(net)
        self.to(device)

        # send a mock image tensor to instantiate singleton parameters
        self.forward(torch.randn(2, 3, image_size, image_size, device=device) )

    @singleton('target_encoder')
    def _get_target_encoder(self):
        target_encoder = copy.deepcopy(self.online_encoder)
        set_requires_grad(target_encoder, False)
        return target_encoder

    def reset_moving_average(self):
        del self.target_encoder
        self.target_encoder = None

    def update_moving_average(self):
        assert self.use_momentum, 'you do not need to update the moving average, since you have turned off momentum for the target encoder'
        assert self.target_encoder is not None, 'target encoder has not been created yet'
        update_moving_average(self.target_ema_updater, self.target_encoder, self.online_encoder)

    def forward(
        self,
        x,
        # y,
        return_embedding = False,
        return_projection = True
    ):
        assert not (self.training and x.shape[0] == 1), 'you must have greater than 1 sample when training, due to the batchnorm in the projection layer'

        if return_embedding:
            return self.online_encoder(x, return_projection = return_projection)

        image_one, image_two = self.augment1(x), self.augment2(x)
        # print("Image shapes:", image_one.shape, image_two.shape)
        images = torch.cat((image_one, image_two), dim = 0)
        # labels = torch.cat((), dim=0)
        # print(images.shape)
        online_projections, _ = self.online_encoder(images)
        online_predictions = self.online_predictor(online_projections)
        # print("Online Projections = online_encoder(aug_1 + aug_2) shapes:", online_projections.shape, _.shape, online_predictions.shape)
        online_pred_one, online_pred_two = online_predictions.chunk(2, dim = 0)
        # print("Pred (chunked) shapes:", online_pred_one.shape, online_pred_two.shape)
        with torch.no_grad():
            target_encoder = self._get_target_encoder() if self.use_momentum else self.online_encoder

            target_projections, _ = target_encoder(images)
            target_projections = target_projections.detach()

            target_proj_one, target_proj_two = target_projections.chunk(2, dim = 0)

        loss_one = loss_fn(online_pred_one, target_proj_two.detach())
        loss_two = loss_fn(online_pred_two, target_proj_one.detach())

        loss = loss_one + loss_two
        return loss.mean()

In [9]:
import torchvision

img_path = "dataset/style_final"

# def normalize

data_transform = T.Compose([
    T.Resize((256, 256)),
    T.ToTensor(),
    T.Lambda(lambda img: img / 255.0)
    # T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_data = torchvision.datasets.ImageFolder(root=img_path, transform=data_transform)
# test_data = torchvision.datasets.ImageFolder(root=img_path+"test", transform=data_transform)

In [10]:
BATCH_SIZE = 64

train_dl =  torch.utils.data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
# test_dl =  torch.utils.data.DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=False)

In [11]:
resnet = torchvision.models.resnet50(pretrained=True)
device = 'cuda'
resnet = resnet.to(device)

learner = BYOL(
    resnet,
    image_size = 256,
    hidden_layer = 'avgpool',
    use_momentum=False
)

opt = torch.optim.Adam(learner.parameters(), lr=3e-4)



In [12]:
opt = torch.optim.Adam(learner.parameters(), lr=2e-5)

In [13]:
training = True
# resnet.load_state_dict(torch.load(resnet_path))
# learner.load_state_dict(torch.load(learner_path))

In [None]:
epochs = 500
for epoch in range(epochs):
    # print("Starting Epoch: {}/{}".format(_, 50))
    for images, labels in tqdm(train_dl, desc=f'Epoch {epoch}/{epochs}', leave=True):
        images = images.to(device)
        loss = learner(images)
        opt.zero_grad()
        loss.backward()
        opt.step()
        # learner.update_moving_average() # update moving average of target encoder
    # save your improved network
    print("Epoch {}/{} - Loss: {}".format(epoch, epochs, loss.item()))
    torch.save(resnet.state_dict(), resnet_path)
    torch.save(learner.state_dict(), learner_path)

Epoch 0/500: 100%|██████████| 1/1 [00:12<00:00, 12.90s/it]


Epoch 0/500 - Loss: 5.897350311279297


Epoch 1/500: 100%|██████████| 1/1 [00:09<00:00,  9.90s/it]


Epoch 1/500 - Loss: 5.9697418212890625


Epoch 2/500: 100%|██████████| 1/1 [00:10<00:00, 10.35s/it]


Epoch 2/500 - Loss: 5.890384674072266


Epoch 3/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 3/500 - Loss: 5.90400505065918


Epoch 4/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 4/500 - Loss: 5.817604064941406


Epoch 5/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 5/500 - Loss: 5.825617790222168


Epoch 6/500: 100%|██████████| 1/1 [00:09<00:00,  9.82s/it]


Epoch 6/500 - Loss: 5.666055679321289


Epoch 7/500: 100%|██████████| 1/1 [00:09<00:00,  9.97s/it]


Epoch 7/500 - Loss: 5.78070068359375


Epoch 8/500: 100%|██████████| 1/1 [00:09<00:00,  9.52s/it]


Epoch 8/500 - Loss: 5.750113487243652


Epoch 9/500: 100%|██████████| 1/1 [00:09<00:00,  9.54s/it]


Epoch 9/500 - Loss: 5.5328779220581055


Epoch 10/500: 100%|██████████| 1/1 [00:09<00:00,  9.79s/it]


Epoch 10/500 - Loss: 5.711633205413818


Epoch 11/500: 100%|██████████| 1/1 [00:09<00:00,  9.95s/it]


Epoch 11/500 - Loss: 5.572642803192139


Epoch 12/500: 100%|██████████| 1/1 [00:09<00:00,  9.90s/it]


Epoch 12/500 - Loss: 5.665275573730469


Epoch 13/500: 100%|██████████| 1/1 [00:09<00:00,  9.81s/it]


Epoch 13/500 - Loss: 5.780118942260742


Epoch 14/500: 100%|██████████| 1/1 [00:09<00:00,  9.90s/it]


Epoch 14/500 - Loss: 5.614511966705322


Epoch 15/500: 100%|██████████| 1/1 [00:09<00:00,  9.94s/it]


Epoch 15/500 - Loss: 5.553437232971191


Epoch 16/500: 100%|██████████| 1/1 [00:09<00:00,  9.83s/it]


Epoch 16/500 - Loss: 5.72652530670166


Epoch 17/500: 100%|██████████| 1/1 [00:09<00:00,  9.84s/it]


Epoch 17/500 - Loss: 5.249677658081055


Epoch 18/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 18/500 - Loss: 5.766371250152588


Epoch 19/500: 100%|██████████| 1/1 [00:09<00:00,  9.79s/it]


Epoch 19/500 - Loss: 5.615584373474121


Epoch 20/500: 100%|██████████| 1/1 [00:09<00:00,  9.90s/it]


Epoch 20/500 - Loss: 5.254729270935059


Epoch 21/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 21/500 - Loss: 5.446163654327393


Epoch 22/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 22/500 - Loss: 5.180924892425537


Epoch 23/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 23/500 - Loss: 5.246274948120117


Epoch 24/500: 100%|██████████| 1/1 [00:09<00:00,  9.56s/it]


Epoch 24/500 - Loss: 5.490667343139648


Epoch 25/500: 100%|██████████| 1/1 [00:09<00:00,  9.56s/it]


Epoch 25/500 - Loss: 5.048002243041992


Epoch 26/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 26/500 - Loss: 5.262448310852051


Epoch 27/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 27/500 - Loss: 5.73847770690918


Epoch 28/500: 100%|██████████| 1/1 [00:09<00:00,  9.56s/it]


Epoch 28/500 - Loss: 5.808350563049316


Epoch 29/500: 100%|██████████| 1/1 [00:09<00:00,  9.99s/it]


Epoch 29/500 - Loss: 5.421507835388184


Epoch 30/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 30/500 - Loss: 4.736999988555908


Epoch 31/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 31/500 - Loss: 4.711628437042236


Epoch 32/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 32/500 - Loss: 4.973506927490234


Epoch 33/500: 100%|██████████| 1/1 [00:10<00:00, 10.07s/it]


Epoch 33/500 - Loss: 5.610576629638672


Epoch 34/500: 100%|██████████| 1/1 [00:09<00:00,  9.98s/it]


Epoch 34/500 - Loss: 5.365024089813232


Epoch 35/500: 100%|██████████| 1/1 [00:10<00:00, 10.00s/it]


Epoch 35/500 - Loss: 4.405601501464844


Epoch 36/500: 100%|██████████| 1/1 [00:09<00:00,  9.95s/it]


Epoch 36/500 - Loss: 4.666004657745361


Epoch 37/500: 100%|██████████| 1/1 [00:09<00:00,  9.81s/it]


Epoch 37/500 - Loss: 4.960057735443115


Epoch 38/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 38/500 - Loss: 4.419883728027344


Epoch 39/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 39/500 - Loss: 5.721592903137207


Epoch 40/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 40/500 - Loss: 4.629874229431152


Epoch 41/500: 100%|██████████| 1/1 [00:09<00:00,  9.78s/it]


Epoch 41/500 - Loss: 4.744990348815918


Epoch 42/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 42/500 - Loss: 4.884401798248291


Epoch 43/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 43/500 - Loss: 5.604958534240723


Epoch 44/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 44/500 - Loss: 4.773942947387695


Epoch 45/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 45/500 - Loss: 4.190783500671387


Epoch 46/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 46/500 - Loss: 4.798724174499512


Epoch 47/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 47/500 - Loss: 5.036530494689941


Epoch 48/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 48/500 - Loss: 5.40147066116333


Epoch 49/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 49/500 - Loss: 4.654935836791992


Epoch 50/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 50/500 - Loss: 4.783891677856445


Epoch 51/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 51/500 - Loss: 4.220571041107178


Epoch 52/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 52/500 - Loss: 4.102947235107422


Epoch 53/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 53/500 - Loss: 3.7926416397094727


Epoch 54/500: 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]


Epoch 54/500 - Loss: 4.285870552062988


Epoch 55/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 55/500 - Loss: 3.929166793823242


Epoch 56/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 56/500 - Loss: 3.702747106552124


Epoch 57/500: 100%|██████████| 1/1 [00:09<00:00,  9.80s/it]


Epoch 57/500 - Loss: 5.386216163635254


Epoch 58/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 58/500 - Loss: 4.291421890258789


Epoch 59/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 59/500 - Loss: 4.369606018066406


Epoch 60/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 60/500 - Loss: 5.285954475402832


Epoch 61/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 61/500 - Loss: 4.890412330627441


Epoch 62/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 62/500 - Loss: 3.6153738498687744


Epoch 63/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 63/500 - Loss: 4.512798309326172


Epoch 64/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 64/500 - Loss: 4.360828399658203


Epoch 65/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 65/500 - Loss: 4.3358259201049805


Epoch 66/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 66/500 - Loss: 3.5651378631591797


Epoch 67/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 67/500 - Loss: 4.227756023406982


Epoch 68/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 68/500 - Loss: 4.592677116394043


Epoch 69/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 69/500 - Loss: 3.4437999725341797


Epoch 70/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 70/500 - Loss: 3.671192169189453


Epoch 71/500: 100%|██████████| 1/1 [00:09<00:00,  9.88s/it]


Epoch 71/500 - Loss: 3.5477662086486816


Epoch 72/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 72/500 - Loss: 4.094664573669434


Epoch 73/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 73/500 - Loss: 3.5678293704986572


Epoch 74/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 74/500 - Loss: 4.336552619934082


Epoch 75/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 75/500 - Loss: 4.161468982696533


Epoch 76/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 76/500 - Loss: 4.4081621170043945


Epoch 77/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 77/500 - Loss: 4.0129804611206055


Epoch 78/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 78/500 - Loss: 3.775698661804199


Epoch 79/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 79/500 - Loss: 3.4749865531921387


Epoch 80/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 80/500 - Loss: 3.3478024005889893


Epoch 81/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 81/500 - Loss: 3.597289562225342


Epoch 82/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 82/500 - Loss: 3.970592975616455


Epoch 83/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 83/500 - Loss: 3.289597511291504


Epoch 84/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 84/500 - Loss: 3.5427770614624023


Epoch 85/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 85/500 - Loss: 3.646167278289795


Epoch 86/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 86/500 - Loss: 3.371422290802002


Epoch 87/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 87/500 - Loss: 3.638678550720215


Epoch 88/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 88/500 - Loss: 4.41029167175293


Epoch 89/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 89/500 - Loss: 4.458765506744385


Epoch 90/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 90/500 - Loss: 3.8736371994018555


Epoch 91/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 91/500 - Loss: 3.354750156402588


Epoch 92/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 92/500 - Loss: 3.534824848175049


Epoch 93/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 93/500 - Loss: 3.7498743534088135


Epoch 94/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 94/500 - Loss: 3.2104380130767822


Epoch 95/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 95/500 - Loss: 3.2165439128875732


Epoch 96/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 96/500 - Loss: 3.168447971343994


Epoch 97/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 97/500 - Loss: 3.7331533432006836


Epoch 98/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 98/500 - Loss: 3.1108031272888184


Epoch 99/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 99/500 - Loss: 3.5143792629241943


Epoch 100/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 100/500 - Loss: 3.115452766418457


Epoch 101/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 101/500 - Loss: 4.5650129318237305


Epoch 102/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 102/500 - Loss: 3.3309895992279053


Epoch 103/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 103/500 - Loss: 3.1584978103637695


Epoch 104/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 104/500 - Loss: 3.7407517433166504


Epoch 105/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 105/500 - Loss: 3.0752012729644775


Epoch 106/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 106/500 - Loss: 4.532215118408203


Epoch 107/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 107/500 - Loss: 3.4260053634643555


Epoch 108/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 108/500 - Loss: 3.066845417022705


Epoch 109/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 109/500 - Loss: 3.2337236404418945


Epoch 110/500: 100%|██████████| 1/1 [00:09<00:00,  9.79s/it]


Epoch 110/500 - Loss: 3.275339126586914


Epoch 111/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 111/500 - Loss: 3.0832180976867676


Epoch 112/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 112/500 - Loss: 3.396979331970215


Epoch 113/500: 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]


Epoch 113/500 - Loss: 3.381363868713379


Epoch 114/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 114/500 - Loss: 4.29885196685791


Epoch 115/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 115/500 - Loss: 3.010899543762207


Epoch 116/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 116/500 - Loss: 3.2213268280029297


Epoch 117/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 117/500 - Loss: 3.137953281402588


Epoch 118/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 118/500 - Loss: 3.2586557865142822


Epoch 119/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 119/500 - Loss: 3.6821908950805664


Epoch 120/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 120/500 - Loss: 2.95184326171875


Epoch 121/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 121/500 - Loss: 3.7314205169677734


Epoch 122/500: 100%|██████████| 1/1 [00:09<00:00,  9.76s/it]


Epoch 122/500 - Loss: 3.0219931602478027


Epoch 123/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 123/500 - Loss: 3.6016721725463867


Epoch 124/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 124/500 - Loss: 3.1130776405334473


Epoch 125/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 125/500 - Loss: 3.194413185119629


Epoch 126/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 126/500 - Loss: 3.7457799911499023


Epoch 127/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 127/500 - Loss: 3.414876699447632


Epoch 128/500: 100%|██████████| 1/1 [00:09<00:00,  9.78s/it]


Epoch 128/500 - Loss: 3.423476457595825


Epoch 129/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 129/500 - Loss: 3.1979894638061523


Epoch 130/500: 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]


Epoch 130/500 - Loss: 3.8397531509399414


Epoch 131/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 131/500 - Loss: 2.91129732131958


Epoch 132/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 132/500 - Loss: 3.0947577953338623


Epoch 133/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 133/500 - Loss: 3.159127712249756


Epoch 134/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 134/500 - Loss: 3.3236420154571533


Epoch 135/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 135/500 - Loss: 3.371744394302368


Epoch 136/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 136/500 - Loss: 3.0669989585876465


Epoch 137/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 137/500 - Loss: 2.9847278594970703


Epoch 138/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 138/500 - Loss: 3.362494468688965


Epoch 139/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 139/500 - Loss: 3.521240711212158


Epoch 140/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 140/500 - Loss: 3.2125024795532227


Epoch 141/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 141/500 - Loss: 3.0280115604400635


Epoch 142/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 142/500 - Loss: 3.0478997230529785


Epoch 143/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 143/500 - Loss: 3.0670108795166016


Epoch 144/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 144/500 - Loss: 3.089048385620117


Epoch 145/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 145/500 - Loss: 3.1389479637145996


Epoch 146/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 146/500 - Loss: 4.563903331756592


Epoch 147/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 147/500 - Loss: 3.6677560806274414


Epoch 148/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 148/500 - Loss: 3.9598240852355957


Epoch 149/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 149/500 - Loss: 3.2550249099731445


Epoch 150/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 150/500 - Loss: 3.4098358154296875


Epoch 151/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 151/500 - Loss: 3.018202304840088


Epoch 152/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 152/500 - Loss: 3.831040859222412


Epoch 153/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 153/500 - Loss: 3.259152412414551


Epoch 154/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 154/500 - Loss: 4.118176460266113


Epoch 155/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 155/500 - Loss: 2.85762357711792


Epoch 156/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 156/500 - Loss: 2.988875389099121


Epoch 157/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 157/500 - Loss: 2.862454414367676


Epoch 158/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 158/500 - Loss: 3.3703863620758057


Epoch 159/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 159/500 - Loss: 3.006621837615967


Epoch 160/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 160/500 - Loss: 3.2009782791137695


Epoch 161/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 161/500 - Loss: 3.2109878063201904


Epoch 162/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 162/500 - Loss: 3.387211799621582


Epoch 163/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 163/500 - Loss: 2.8422489166259766


Epoch 164/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 164/500 - Loss: 3.6161742210388184


Epoch 165/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 165/500 - Loss: 2.8604109287261963


Epoch 166/500: 100%|██████████| 1/1 [00:09<00:00,  9.78s/it]


Epoch 166/500 - Loss: 3.5932822227478027


Epoch 167/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 167/500 - Loss: 2.9482409954071045


Epoch 168/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 168/500 - Loss: 3.4854955673217773


Epoch 169/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 169/500 - Loss: 3.2746777534484863


Epoch 170/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 170/500 - Loss: 2.8674824237823486


Epoch 171/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 171/500 - Loss: 3.2630844116210938


Epoch 172/500: 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]


Epoch 172/500 - Loss: 2.859719753265381


Epoch 173/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 173/500 - Loss: 3.1111650466918945


Epoch 174/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 174/500 - Loss: 3.147412061691284


Epoch 175/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 175/500 - Loss: 2.9185752868652344


Epoch 176/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 176/500 - Loss: 3.6147961616516113


Epoch 177/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 177/500 - Loss: 3.1198573112487793


Epoch 178/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 178/500 - Loss: 3.12180233001709


Epoch 179/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 179/500 - Loss: 3.038843870162964


Epoch 180/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 180/500 - Loss: 2.8110926151275635


Epoch 181/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 181/500 - Loss: 3.07033634185791


Epoch 182/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 182/500 - Loss: 3.178279399871826


Epoch 183/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 183/500 - Loss: 3.3109633922576904


Epoch 184/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 184/500 - Loss: 2.911869525909424


Epoch 185/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 185/500 - Loss: 2.952613115310669


Epoch 186/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 186/500 - Loss: 2.7879018783569336


Epoch 187/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 187/500 - Loss: 3.6056365966796875


Epoch 188/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 188/500 - Loss: 2.927908420562744


Epoch 189/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 189/500 - Loss: 3.0578343868255615


Epoch 190/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 190/500 - Loss: 2.939296007156372


Epoch 191/500: 100%|██████████| 1/1 [00:09<00:00,  9.82s/it]


Epoch 191/500 - Loss: 3.1932272911071777


Epoch 192/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 192/500 - Loss: 3.534574031829834


Epoch 193/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 193/500 - Loss: 3.2695260047912598


Epoch 194/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 194/500 - Loss: 2.9679884910583496


Epoch 195/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 195/500 - Loss: 2.865597724914551


Epoch 196/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 196/500 - Loss: 2.779805898666382


Epoch 197/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 197/500 - Loss: 2.8661699295043945


Epoch 198/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 198/500 - Loss: 3.0624642372131348


Epoch 199/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 199/500 - Loss: 2.9131131172180176


Epoch 200/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 200/500 - Loss: 2.9371156692504883


Epoch 201/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 201/500 - Loss: 2.9265594482421875


Epoch 202/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 202/500 - Loss: 3.471264600753784


Epoch 203/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 203/500 - Loss: 2.8170528411865234


Epoch 204/500: 100%|██████████| 1/1 [00:09<00:00,  9.61s/it]


Epoch 204/500 - Loss: 2.9345688819885254


Epoch 205/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 205/500 - Loss: 3.364992380142212


Epoch 206/500: 100%|██████████| 1/1 [00:09<00:00,  9.61s/it]


Epoch 206/500 - Loss: 2.924801826477051


Epoch 207/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 207/500 - Loss: 2.834195137023926


Epoch 208/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 208/500 - Loss: 2.797559976577759


Epoch 209/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 209/500 - Loss: 3.239504337310791


Epoch 210/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 210/500 - Loss: 2.947770118713379


Epoch 211/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 211/500 - Loss: 2.770526170730591


Epoch 212/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 212/500 - Loss: 2.7613272666931152


Epoch 213/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 213/500 - Loss: 2.7940361499786377


Epoch 214/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 214/500 - Loss: 2.9512171745300293


Epoch 215/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 215/500 - Loss: 2.995709180831909


Epoch 216/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 216/500 - Loss: 2.7699623107910156


Epoch 217/500: 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]


Epoch 217/500 - Loss: 3.2260403633117676


Epoch 218/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 218/500 - Loss: 2.836799144744873


Epoch 219/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 219/500 - Loss: 4.095697402954102


Epoch 220/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 220/500 - Loss: 2.8424928188323975


Epoch 221/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 221/500 - Loss: 3.107403039932251


Epoch 222/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 222/500 - Loss: 3.2654314041137695


Epoch 223/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 223/500 - Loss: 3.051654815673828


Epoch 224/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 224/500 - Loss: 3.2776951789855957


Epoch 225/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 225/500 - Loss: 2.8402137756347656


Epoch 226/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 226/500 - Loss: 2.9417128562927246


Epoch 227/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 227/500 - Loss: 2.779813766479492


Epoch 228/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 228/500 - Loss: 3.1517269611358643


Epoch 229/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 229/500 - Loss: 3.3782033920288086


Epoch 230/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 230/500 - Loss: 3.519777297973633


Epoch 231/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 231/500 - Loss: 2.8965697288513184


Epoch 232/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 232/500 - Loss: 2.7408089637756348


Epoch 233/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 233/500 - Loss: 3.108736991882324


Epoch 234/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 234/500 - Loss: 2.8105521202087402


Epoch 235/500: 100%|██████████| 1/1 [00:10<00:00, 10.60s/it]


Epoch 235/500 - Loss: 2.7899205684661865


Epoch 236/500: 100%|██████████| 1/1 [00:10<00:00, 10.63s/it]


Epoch 236/500 - Loss: 3.4795122146606445


Epoch 237/500: 100%|██████████| 1/1 [00:10<00:00, 10.45s/it]


Epoch 237/500 - Loss: 2.958056926727295


Epoch 238/500: 100%|██████████| 1/1 [00:09<00:00,  9.98s/it]


Epoch 238/500 - Loss: 3.0232577323913574


Epoch 239/500: 100%|██████████| 1/1 [00:10<00:00, 10.02s/it]


Epoch 239/500 - Loss: 3.9339261054992676


Epoch 240/500: 100%|██████████| 1/1 [00:10<00:00, 10.05s/it]


Epoch 240/500 - Loss: 3.5483973026275635


Epoch 241/500: 100%|██████████| 1/1 [00:10<00:00, 10.14s/it]


Epoch 241/500 - Loss: 2.842510223388672


Epoch 242/500: 100%|██████████| 1/1 [00:09<00:00,  9.92s/it]


Epoch 242/500 - Loss: 3.046619176864624


Epoch 243/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 243/500 - Loss: 2.8215999603271484


Epoch 244/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 244/500 - Loss: 2.9949426651000977


Epoch 245/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 245/500 - Loss: 2.859191417694092


Epoch 246/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 246/500 - Loss: 2.859365940093994


Epoch 247/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 247/500 - Loss: 2.7487902641296387


Epoch 248/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 248/500 - Loss: 3.1994781494140625


Epoch 249/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 249/500 - Loss: 2.793325662612915


Epoch 250/500: 100%|██████████| 1/1 [00:09<00:00,  9.80s/it]


Epoch 250/500 - Loss: 3.83504056930542


Epoch 251/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 251/500 - Loss: 2.848682403564453


Epoch 252/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 252/500 - Loss: 2.935999870300293


Epoch 253/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 253/500 - Loss: 2.804577350616455


Epoch 254/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 254/500 - Loss: 2.9140820503234863


Epoch 255/500: 100%|██████████| 1/1 [00:09<00:00,  9.76s/it]


Epoch 255/500 - Loss: 2.79299259185791


Epoch 256/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 256/500 - Loss: 3.2025742530822754


Epoch 257/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 257/500 - Loss: 3.0113067626953125


Epoch 258/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 258/500 - Loss: 3.8314766883850098


Epoch 259/500: 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]


Epoch 259/500 - Loss: 3.2535791397094727


Epoch 260/500: 100%|██████████| 1/1 [00:10<00:00, 10.59s/it]


Epoch 260/500 - Loss: 3.747516632080078


Epoch 261/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 261/500 - Loss: 3.565950393676758


Epoch 262/500: 100%|██████████| 1/1 [00:10<00:00, 10.19s/it]


Epoch 262/500 - Loss: 2.7243123054504395


Epoch 263/500: 100%|██████████| 1/1 [00:10<00:00, 10.53s/it]


Epoch 263/500 - Loss: 2.9999442100524902


Epoch 264/500: 100%|██████████| 1/1 [00:10<00:00, 10.22s/it]


Epoch 264/500 - Loss: 3.0087740421295166


Epoch 265/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 265/500 - Loss: 3.42783784866333


Epoch 266/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 266/500 - Loss: 2.9311089515686035


Epoch 267/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 267/500 - Loss: 2.7390100955963135


Epoch 268/500: 100%|██████████| 1/1 [00:10<00:00, 10.36s/it]


Epoch 268/500 - Loss: 2.9586548805236816


Epoch 269/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 269/500 - Loss: 3.5356199741363525


Epoch 270/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 270/500 - Loss: 3.1699767112731934


Epoch 271/500: 100%|██████████| 1/1 [00:10<00:00, 10.54s/it]


Epoch 271/500 - Loss: 3.0467967987060547


Epoch 272/500: 100%|██████████| 1/1 [00:09<00:00,  9.60s/it]


Epoch 272/500 - Loss: 2.8464150428771973


Epoch 273/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 273/500 - Loss: 2.8488831520080566


Epoch 274/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 274/500 - Loss: 2.749969005584717


Epoch 275/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 275/500 - Loss: 2.9370639324188232


Epoch 276/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 276/500 - Loss: 2.8179261684417725


Epoch 277/500: 100%|██████████| 1/1 [00:09<00:00,  9.61s/it]


Epoch 277/500 - Loss: 2.7600975036621094


Epoch 278/500: 100%|██████████| 1/1 [00:09<00:00,  9.58s/it]


Epoch 278/500 - Loss: 2.9930338859558105


Epoch 279/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 279/500 - Loss: 2.937972068786621


Epoch 280/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 280/500 - Loss: 2.721849203109741


Epoch 281/500: 100%|██████████| 1/1 [00:09<00:00,  9.57s/it]


Epoch 281/500 - Loss: 3.353121042251587


Epoch 282/500: 100%|██████████| 1/1 [00:10<00:00, 10.21s/it]


Epoch 282/500 - Loss: 2.7276759147644043


Epoch 283/500: 100%|██████████| 1/1 [00:10<00:00, 10.39s/it]


Epoch 283/500 - Loss: 2.8155112266540527


Epoch 284/500: 100%|██████████| 1/1 [00:10<00:00, 10.42s/it]


Epoch 284/500 - Loss: 2.8239450454711914


Epoch 285/500: 100%|██████████| 1/1 [00:09<00:00,  9.80s/it]


Epoch 285/500 - Loss: 2.869354248046875


Epoch 286/500: 100%|██████████| 1/1 [00:09<00:00,  9.60s/it]


Epoch 286/500 - Loss: 3.2782232761383057


Epoch 287/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 287/500 - Loss: 2.835653781890869


Epoch 288/500: 100%|██████████| 1/1 [00:10<00:00, 10.39s/it]


Epoch 288/500 - Loss: 2.8645706176757812


Epoch 289/500: 100%|██████████| 1/1 [00:10<00:00, 10.59s/it]


Epoch 289/500 - Loss: 3.7594757080078125


Epoch 290/500: 100%|██████████| 1/1 [00:09<00:00,  9.89s/it]


Epoch 290/500 - Loss: 2.8371965885162354


Epoch 291/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 291/500 - Loss: 3.1241257190704346


Epoch 292/500: 100%|██████████| 1/1 [00:09<00:00,  9.98s/it]


Epoch 292/500 - Loss: 3.463249921798706


Epoch 293/500: 100%|██████████| 1/1 [00:10<00:00, 10.46s/it]


Epoch 293/500 - Loss: 2.7593905925750732


Epoch 294/500: 100%|██████████| 1/1 [00:10<00:00, 10.54s/it]


Epoch 294/500 - Loss: 3.3252172470092773


Epoch 295/500: 100%|██████████| 1/1 [00:10<00:00, 10.53s/it]


Epoch 295/500 - Loss: 2.9252572059631348


Epoch 296/500: 100%|██████████| 1/1 [00:09<00:00,  9.79s/it]


Epoch 296/500 - Loss: 2.8322715759277344


Epoch 297/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 297/500 - Loss: 2.957634925842285


Epoch 298/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 298/500 - Loss: 3.445585250854492


Epoch 299/500: 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]


Epoch 299/500 - Loss: 2.970898151397705


Epoch 300/500: 100%|██████████| 1/1 [00:09<00:00,  9.76s/it]


Epoch 300/500 - Loss: 2.712404251098633


Epoch 301/500: 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]


Epoch 301/500 - Loss: 2.9052510261535645


Epoch 302/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 302/500 - Loss: 2.9869933128356934


Epoch 303/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 303/500 - Loss: 2.757075071334839


Epoch 304/500: 100%|██████████| 1/1 [00:09<00:00,  9.76s/it]


Epoch 304/500 - Loss: 3.574089527130127


Epoch 305/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 305/500 - Loss: 2.7887349128723145


Epoch 306/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 306/500 - Loss: 2.7231640815734863


Epoch 307/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 307/500 - Loss: 2.786111354827881


Epoch 308/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 308/500 - Loss: 2.732832670211792


Epoch 309/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 309/500 - Loss: 2.7832770347595215


Epoch 310/500: 100%|██████████| 1/1 [00:09<00:00,  9.74s/it]


Epoch 310/500 - Loss: 2.7644591331481934


Epoch 311/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 311/500 - Loss: 2.7333431243896484


Epoch 312/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 312/500 - Loss: 3.292661190032959


Epoch 313/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 313/500 - Loss: 2.753662109375


Epoch 314/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 314/500 - Loss: 2.738039970397949


Epoch 315/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 315/500 - Loss: 2.7615256309509277


Epoch 316/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 316/500 - Loss: 2.7087202072143555


Epoch 317/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 317/500 - Loss: 2.773414134979248


Epoch 318/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 318/500 - Loss: 2.958008050918579


Epoch 319/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 319/500 - Loss: 2.7106285095214844


Epoch 320/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 320/500 - Loss: 2.7522568702697754


Epoch 321/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 321/500 - Loss: 2.746800422668457


Epoch 322/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 322/500 - Loss: 2.8380932807922363


Epoch 323/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 323/500 - Loss: 3.2350497245788574


Epoch 324/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 324/500 - Loss: 3.210728645324707


Epoch 325/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 325/500 - Loss: 2.764833688735962


Epoch 326/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 326/500 - Loss: 2.7731881141662598


Epoch 327/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 327/500 - Loss: 2.8166663646698


Epoch 328/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 328/500 - Loss: 2.8624279499053955


Epoch 329/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 329/500 - Loss: 3.173020362854004


Epoch 330/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 330/500 - Loss: 2.695984125137329


Epoch 331/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 331/500 - Loss: 2.7019808292388916


Epoch 332/500: 100%|██████████| 1/1 [00:10<00:00, 10.32s/it]


Epoch 332/500 - Loss: 2.8023433685302734


Epoch 333/500: 100%|██████████| 1/1 [00:10<00:00, 10.55s/it]


Epoch 333/500 - Loss: 2.7044901847839355


Epoch 334/500: 100%|██████████| 1/1 [00:10<00:00, 10.61s/it]


Epoch 334/500 - Loss: 2.7501392364501953


Epoch 335/500: 100%|██████████| 1/1 [00:10<00:00, 10.10s/it]


Epoch 335/500 - Loss: 2.746845245361328


Epoch 336/500: 100%|██████████| 1/1 [00:10<00:00, 10.04s/it]


Epoch 336/500 - Loss: 2.7118096351623535


Epoch 337/500: 100%|██████████| 1/1 [00:10<00:00, 10.04s/it]


Epoch 337/500 - Loss: 2.7516894340515137


Epoch 338/500: 100%|██████████| 1/1 [00:10<00:00, 10.03s/it]


Epoch 338/500 - Loss: 2.8745603561401367


Epoch 339/500: 100%|██████████| 1/1 [00:09<00:00,  9.93s/it]


Epoch 339/500 - Loss: 2.905771493911743


Epoch 340/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 340/500 - Loss: 2.7963364124298096


Epoch 341/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 341/500 - Loss: 2.8522064685821533


Epoch 342/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 342/500 - Loss: 2.8590455055236816


Epoch 343/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 343/500 - Loss: 2.9323549270629883


Epoch 344/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 344/500 - Loss: 2.8010001182556152


Epoch 345/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 345/500 - Loss: 2.7467808723449707


Epoch 346/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 346/500 - Loss: 2.8421967029571533


Epoch 347/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 347/500 - Loss: 2.7927770614624023


Epoch 348/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 348/500 - Loss: 2.9889533519744873


Epoch 349/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 349/500 - Loss: 2.736271858215332


Epoch 350/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 350/500 - Loss: 2.994462490081787


Epoch 351/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 351/500 - Loss: 2.84773325920105


Epoch 352/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 352/500 - Loss: 2.773735523223877


Epoch 353/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 353/500 - Loss: 2.7815799713134766


Epoch 354/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 354/500 - Loss: 2.7589364051818848


Epoch 355/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 355/500 - Loss: 2.806964874267578


Epoch 356/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 356/500 - Loss: 2.7004570960998535


Epoch 357/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 357/500 - Loss: 3.3669490814208984


Epoch 358/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 358/500 - Loss: 2.927474021911621


Epoch 359/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 359/500 - Loss: 3.100041151046753


Epoch 360/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 360/500 - Loss: 2.70493221282959


Epoch 361/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 361/500 - Loss: 2.7127959728240967


Epoch 362/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 362/500 - Loss: 2.811323404312134


Epoch 363/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 363/500 - Loss: 2.90725040435791


Epoch 364/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 364/500 - Loss: 3.5131583213806152


Epoch 365/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 365/500 - Loss: 3.2297821044921875


Epoch 366/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 366/500 - Loss: 3.0509274005889893


Epoch 367/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 367/500 - Loss: 2.7400496006011963


Epoch 368/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 368/500 - Loss: 2.9628498554229736


Epoch 369/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 369/500 - Loss: 2.7226133346557617


Epoch 370/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 370/500 - Loss: 2.8328707218170166


Epoch 371/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 371/500 - Loss: 2.685241222381592


Epoch 372/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 372/500 - Loss: 2.7314391136169434


Epoch 373/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 373/500 - Loss: 2.827564239501953


Epoch 374/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 374/500 - Loss: 2.7716898918151855


Epoch 375/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 375/500 - Loss: 2.76267147064209


Epoch 376/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 376/500 - Loss: 2.6872658729553223


Epoch 377/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 377/500 - Loss: 3.5444555282592773


Epoch 378/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 378/500 - Loss: 2.7195630073547363


Epoch 379/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 379/500 - Loss: 3.1394996643066406


Epoch 380/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 380/500 - Loss: 3.27344012260437


Epoch 381/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 381/500 - Loss: 2.7526111602783203


Epoch 382/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 382/500 - Loss: 2.685624599456787


Epoch 383/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 383/500 - Loss: 2.7252731323242188


Epoch 384/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 384/500 - Loss: 2.698246479034424


Epoch 385/500: 100%|██████████| 1/1 [00:09<00:00,  9.80s/it]


Epoch 385/500 - Loss: 2.789682388305664


Epoch 386/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 386/500 - Loss: 2.8724279403686523


Epoch 387/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 387/500 - Loss: 3.0121452808380127


Epoch 388/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 388/500 - Loss: 2.7608633041381836


Epoch 389/500: 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]


Epoch 389/500 - Loss: 3.206364631652832


Epoch 390/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 390/500 - Loss: 2.7211475372314453


Epoch 391/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 391/500 - Loss: 2.7744832038879395


Epoch 392/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 392/500 - Loss: 2.8280608654022217


Epoch 393/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 393/500 - Loss: 2.721406936645508


Epoch 394/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 394/500 - Loss: 2.676577568054199


Epoch 395/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 395/500 - Loss: 2.8002190589904785


Epoch 396/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 396/500 - Loss: 2.702548027038574


Epoch 397/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 397/500 - Loss: 2.717754364013672


Epoch 398/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 398/500 - Loss: 2.7190985679626465


Epoch 399/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 399/500 - Loss: 2.708200693130493


Epoch 400/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 400/500 - Loss: 2.720822811126709


Epoch 401/500: 100%|██████████| 1/1 [00:09<00:00,  9.76s/it]


Epoch 401/500 - Loss: 2.986845016479492


Epoch 402/500: 100%|██████████| 1/1 [00:09<00:00,  9.76s/it]


Epoch 402/500 - Loss: 2.6777267456054688


Epoch 403/500: 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]


Epoch 403/500 - Loss: 3.351271152496338


Epoch 404/500: 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]


Epoch 404/500 - Loss: 2.7074153423309326


Epoch 405/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 405/500 - Loss: 2.7904367446899414


Epoch 406/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 406/500 - Loss: 2.8376572132110596


Epoch 407/500: 100%|██████████| 1/1 [00:09<00:00,  9.67s/it]


Epoch 407/500 - Loss: 3.1871824264526367


Epoch 408/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 408/500 - Loss: 3.6638453006744385


Epoch 409/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 409/500 - Loss: 2.7203829288482666


Epoch 410/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 410/500 - Loss: 2.7422733306884766


Epoch 411/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 411/500 - Loss: 2.7906837463378906


Epoch 412/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 412/500 - Loss: 2.7266147136688232


Epoch 413/500: 100%|██████████| 1/1 [00:09<00:00,  9.65s/it]


Epoch 413/500 - Loss: 2.7204127311706543


Epoch 414/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 414/500 - Loss: 2.7770118713378906


Epoch 415/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 415/500 - Loss: 2.734851837158203


Epoch 416/500: 100%|██████████| 1/1 [00:09<00:00,  9.61s/it]


Epoch 416/500 - Loss: 2.766965866088867


Epoch 417/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 417/500 - Loss: 2.726764678955078


Epoch 418/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 418/500 - Loss: 2.694937229156494


Epoch 419/500: 100%|██████████| 1/1 [00:09<00:00,  9.70s/it]


Epoch 419/500 - Loss: 2.6842856407165527


Epoch 420/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 420/500 - Loss: 2.809370994567871


Epoch 421/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 421/500 - Loss: 2.745640754699707


Epoch 422/500: 100%|██████████| 1/1 [00:09<00:00,  9.79s/it]


Epoch 422/500 - Loss: 2.673630952835083


Epoch 423/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 423/500 - Loss: 2.6785874366760254


Epoch 424/500: 100%|██████████| 1/1 [00:09<00:00,  9.91s/it]


Epoch 424/500 - Loss: 2.7825822830200195


Epoch 425/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 425/500 - Loss: 2.6936941146850586


Epoch 426/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 426/500 - Loss: 4.090689659118652


Epoch 427/500: 100%|██████████| 1/1 [00:09<00:00,  9.66s/it]


Epoch 427/500 - Loss: 2.96290922164917


Epoch 428/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 428/500 - Loss: 2.668640613555908


Epoch 429/500: 100%|██████████| 1/1 [00:09<00:00,  9.69s/it]


Epoch 429/500 - Loss: 2.87504506111145


Epoch 430/500: 100%|██████████| 1/1 [00:09<00:00,  9.73s/it]


Epoch 430/500 - Loss: 2.710510730743408


Epoch 431/500: 100%|██████████| 1/1 [00:09<00:00,  9.71s/it]


Epoch 431/500 - Loss: 2.6787023544311523


Epoch 432/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 432/500 - Loss: 2.7146973609924316


Epoch 433/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 433/500 - Loss: 2.7000796794891357


Epoch 434/500: 100%|██████████| 1/1 [00:09<00:00,  9.72s/it]


Epoch 434/500 - Loss: 2.883669376373291


Epoch 435/500: 100%|██████████| 1/1 [00:09<00:00,  9.85s/it]


Epoch 435/500 - Loss: 2.7452378273010254


Epoch 436/500: 100%|██████████| 1/1 [00:09<00:00,  9.68s/it]


Epoch 436/500 - Loss: 2.7664237022399902


Epoch 437/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 437/500 - Loss: 2.6677768230438232


Epoch 438/500: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]


Epoch 438/500 - Loss: 2.7522764205932617


Epoch 439/500: 100%|██████████| 1/1 [00:09<00:00,  9.61s/it]


Epoch 439/500 - Loss: 2.8991947174072266


Epoch 440/500: 100%|██████████| 1/1 [00:09<00:00,  9.62s/it]


Epoch 440/500 - Loss: 4.385024547576904


Epoch 441/500: 100%|██████████| 1/1 [00:09<00:00,  9.64s/it]


Epoch 441/500 - Loss: 2.7058658599853516


Epoch 442/500: 100%|██████████| 1/1 [00:09<00:00,  9.61s/it]


Epoch 442/500 - Loss: 2.668532371520996


Epoch 443/500:   0%|          | 0/1 [00:00<?, ?it/s]