<a href="https://colab.research.google.com/github/Strugoeden121/swinAutoDecoder/blob/main/TransformerDecoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Install dependencies (if necessary)
!pip install torch torchvision
!pip install tqdm

from typing import Optional
import torch
from torch import Tensor
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision import datasets, models

# Step 2: Dataset Preparation
class ImageSequenceDataset(Dataset):
    """Dataset for image sequences"""
    def __init__(self, transform=None, seq_len=10):
        self.seq_len = seq_len
        self.transform = transform
        # Using MNIST dataset here (you can replace it with other datasets)
        self.data = datasets.MNIST(root='./data', download=True, train=True)

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        frames = []
        for i in range(self.seq_len):
            img, _ = self.data[idx + i]
            if self.transform:
                img = self.transform(img)
            frames.append(img)

        frames = torch.stack(frames)  # (seq_len, channels, height, width)
        return frames, frames[-1]  # Using target sequence as shifted sequence for simplicity

# Step 3: CNN Encoder (to extract features from each frame)
class CNNEncoder(nn.Module):
    def __init__(self, input_channels=1, feature_dim=256):  # Set input_channels to 3 for CIFAR-10
        super(CNNEncoder, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, feature_dim, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc = nn.Linear(feature_dim * 3 * 3, feature_dim)  # Adjust based on output size after pooling

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)  # (batch_size, feature_dim)
        return x


# Step 4: Causal Transformer Decoder Layer and Decoder
class CausalTransformerDecoderLayer(nn.TransformerDecoderLayer):
    def forward(
        self,
        tgt: Tensor,
        memory: Optional[Tensor] = None,
        memory_mask: Optional[Tensor] = None,
        tgt_key_padding_mask: Optional[Tensor] = None,
        memory_key_padding_mask: Optional[Tensor] = None,
    ) -> Tensor:
        if self.training:
            return super().forward(
                tgt,
                memory,
                tgt_mask=generate_square_subsequent_mask(tgt.size(0), tgt.device),
                memory_mask=memory_mask,
                tgt_key_padding_mask=tgt_key_padding_mask,
                memory_key_padding_mask=memory_key_padding_mask,
            )

        tgt_last_tok = tgt[-1:, :, :]
        tmp_tgt = self.self_attn(
            tgt_last_tok,
            tgt,
            tgt,
            attn_mask=None,
            key_padding_mask=tgt_key_padding_mask,
        )[0]
        tgt_last_tok = tgt_last_tok + self.dropout1(tmp_tgt)
        tgt_last_tok = self.norm1(tgt_last_tok)

        if memory is not None:
            tmp_tgt = self.multihead_attn(
                tgt_last_tok,
                memory,
                memory,
                attn_mask=memory_mask,
                key_padding_mask=memory_key_padding_mask,
            )[0]
            tgt_last_tok = tgt_last_tok + self.dropout2(tmp_tgt)
            tgt_last_tok = self.norm2(tgt_last_tok)

        tmp_tgt = self.linear2(self.dropout(self.activation(self.linear1(tgt_last_tok))))
        tgt_last_tok = tgt_last_tok + self.dropout3(tmp_tgt)
        tgt_last_tok = self.norm3(tgt_last_tok)

        return tgt_last_tok

class CausalTransformerDecoder(nn.TransformerDecoder):
    def forward(
        self,
        tgt: Tensor,
        memory: Optional[Tensor] = None,
        cache: Optional[Tensor] = None,
        memory_mask: Optional[Tensor] = None,
        tgt_key_padding_mask: Optional[Tensor] = None,
        memory_key_padding_mask: Optional[Tensor] = None,
    ) -> Tensor:
        output = tgt
        if self.training:
            if cache is not None:
                raise ValueError("cache parameter should be None in training mode")
            for mod in self.layers:
                output = mod(output, memory, memory_mask=memory_mask, tgt_key_padding_mask=tgt_key_padding_mask)
            return output

        new_token_cache = []
        for i, mod in enumerate(self.layers):
            output = mod(output, memory)
            new_token_cache.append(output)
            if cache is not None:
                output = torch.cat([cache[i], output], dim=0)

        if cache is not None:
            new_cache = torch.cat([cache, torch.stack(new_token_cache, dim=0)], dim=1)
        else:
            new_cache = torch.stack(new_token_cache, dim=0)

        return output, new_cache

def generate_square_subsequent_mask(sz: int, device: str = "cpu") -> torch.Tensor:
    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
    mask = mask.float().masked_fill(mask == 0, float("-inf")).masked_fill(mask == 1, float(0.0))
    return mask.to(device=device)

# Step 5: Training Setup
class ImageSequenceModel(nn.Module):
    def __init__(self, seq_len=10, feature_dim=256, output_dim=256):
        super(ImageSequenceModel, self).__init__()
        self.encoder = CNNEncoder(input_channels=1, feature_dim=feature_dim)  # CIFAR-10 has 3 channels
        self.decoder_layer = CausalTransformerDecoderLayer(d_model=feature_dim, nhead=4)
        self.decoder = CausalTransformerDecoder(self.decoder_layer, num_layers=6)
        self.fc_out = nn.Linear(feature_dim, 1 * 28 * 28)  # Output should match CIFAR-10 size

    def forward(self, x):
        batch_size, seq_len, C, H, W = x.size()
        memory = []
        for i in range(seq_len):
            frame_features = self.encoder(x[:, i, :, :, :])
            memory.append(frame_features)
        memory = torch.stack(memory, dim=0)  # (seq_len, batch_size, feature_dim)

        tgt = memory  # Use memory as the target as well for simplicity
        output = self.decoder(tgt, memory)[0]  # (seq_len, batch_size, feature_dim)

        # Flatten and map to output size (batch_size, seq_len, 3, 28, 28)
        output = self.fc_out(output)  # (seq_len, batch_size, 3 * 28 * 28)
        output = output.view(batch_size, 1, 28, 28)  # Reshape to (seq_len, batch_size, 3, 28, 28)

        return output
from tqdm import tqdm

# Step 6: Training Loop with Debugging Prints and tqdm
def train(model, dataloader, criterion, optimizer, epochs=10):
    model.train()
    print("started training")
    for epoch in range(epochs):
        running_loss = 0.0
        # Adding tqdm for batch progress
        for i, (inputs, targets) in enumerate(tqdm(dataloader, desc=f"Epoch {epoch+1}", leave=False)):
            # Debugging prints
            print(f"Epoch {epoch+1}, Batch {i+1}/{len(dataloader)}")

            optimizer.zero_grad()
            outputs = model(inputs)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch+1}, Loss: {running_loss/len(dataloader)}")

# Step 7: Testing Loop with Debugging Prints and tqdm
def test(model, dataloader):
    model.eval()
    print("started testing")
    with torch.no_grad():
        total_loss = 0.0
        # Adding tqdm for batch progress
        for i, (inputs, targets) in enumerate(tqdm(dataloader, desc="Testing", leave=False)):
            # Debugging prints
            print(f"Test Batch {i+1}/{len(dataloader)}")

            outputs = model(inputs)

            # Debugging prints for output shape

            loss = nn.MSELoss()(outputs, targets)
            total_loss += loss.item()

        print(f"Test Loss: {total_loss / len(dataloader)}")

# Step 8: Main Setup (for Google Colab)
if __name__ == "__main__":
    # Data augmentation and transformation for the dataset
    transform = transforms.Compose([transforms.Resize((28, 28)), transforms.ToTensor()])

    # Load the MNIST dataset (can be replaced with a custom dataset for sequences)
    dataset = ImageSequenceDataset(transform=transform, seq_len=10)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

    # Initialize model, loss function, and optimizer
    model = ImageSequenceModel(seq_len=10, feature_dim=256, output_dim=256)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    # Train and test the model
    print("Starting training...")
    train(model, dataloader, criterion, optimizer, epochs=10)
    print("Training complete. Starting testing...")
    test(model, dataloader)


# Step 8: Main Setup (for Google Colab)
if __name__ == "__main__":
    # Data augmentation and transformation for the dataset
    transform = transforms.Compose([transforms.Resize((28, 28)), transforms.ToTensor()])

    # Load the MNIST dataset (can be replaced with a custom dataset for sequences)
    dataset = ImageSequenceDataset(transform=transform, seq_len=10)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

    # Initialize model, loss function, and optimizer
    model = ImageSequenceModel(seq_len=10, feature_dim=256, output_dim=256)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    # Train and test the model
    train(model, dataloader, criterion, optimizer, epochs=10)
    test(model, dataloader)


Starting training...
started training


Epoch 1:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch 1, Batch 1/1875


Epoch 1:   0%|          | 1/1875 [00:00<24:04,  1.30it/s]

Epoch 1, Batch 2/1875


Epoch 1:   0%|          | 2/1875 [00:01<23:27,  1.33it/s]

Epoch 1, Batch 3/1875


Epoch 1:   0%|          | 3/1875 [00:02<22:58,  1.36it/s]

Epoch 1, Batch 4/1875


Epoch 1:   0%|          | 4/1875 [00:02<22:36,  1.38it/s]

Epoch 1, Batch 5/1875


Epoch 1:   0%|          | 5/1875 [00:03<22:23,  1.39it/s]

Epoch 1, Batch 6/1875


Epoch 1:   0%|          | 6/1875 [00:04<22:27,  1.39it/s]

Epoch 1, Batch 7/1875


Epoch 1:   0%|          | 7/1875 [00:05<22:28,  1.39it/s]

Epoch 1, Batch 8/1875


Epoch 1:   0%|          | 8/1875 [00:05<22:27,  1.39it/s]

Epoch 1, Batch 9/1875


Epoch 1:   0%|          | 9/1875 [00:06<22:20,  1.39it/s]

Epoch 1, Batch 10/1875


Epoch 1:   1%|          | 10/1875 [00:07<22:14,  1.40it/s]

Epoch 1, Batch 11/1875


Epoch 1:   1%|          | 11/1875 [00:07<22:13,  1.40it/s]

Epoch 1, Batch 12/1875


Epoch 1:   1%|          | 12/1875 [00:08<24:15,  1.28it/s]

Epoch 1, Batch 13/1875


Epoch 1:   1%|          | 13/1875 [00:09<26:18,  1.18it/s]

Epoch 1, Batch 14/1875


Epoch 1:   1%|          | 14/1875 [00:10<28:46,  1.08it/s]

Epoch 1, Batch 15/1875


Epoch 1:   1%|          | 15/1875 [00:11<27:43,  1.12it/s]

Epoch 1, Batch 16/1875


Epoch 1:   1%|          | 16/1875 [00:12<26:51,  1.15it/s]

Epoch 1, Batch 17/1875


Epoch 1:   1%|          | 17/1875 [00:13<25:14,  1.23it/s]

Epoch 1, Batch 18/1875


Epoch 1:   1%|          | 18/1875 [00:14<24:23,  1.27it/s]

Epoch 1, Batch 19/1875


Epoch 1:   1%|          | 19/1875 [00:14<23:52,  1.30it/s]

Epoch 1, Batch 20/1875


Epoch 1:   1%|          | 20/1875 [00:15<23:23,  1.32it/s]

Epoch 1, Batch 21/1875


Epoch 1:   1%|          | 21/1875 [00:16<22:57,  1.35it/s]

Epoch 1, Batch 22/1875


Epoch 1:   1%|          | 22/1875 [00:16<22:30,  1.37it/s]

Epoch 1, Batch 23/1875


Epoch 1:   1%|          | 23/1875 [00:17<22:05,  1.40it/s]

Epoch 1, Batch 24/1875


Epoch 1:   1%|▏         | 24/1875 [00:18<21:54,  1.41it/s]

Epoch 1, Batch 25/1875


Epoch 1:   1%|▏         | 25/1875 [00:18<21:51,  1.41it/s]

Epoch 1, Batch 26/1875


Epoch 1:   1%|▏         | 26/1875 [00:19<21:43,  1.42it/s]

Epoch 1, Batch 27/1875


Epoch 1:   1%|▏         | 27/1875 [00:20<21:35,  1.43it/s]

Epoch 1, Batch 28/1875


Epoch 1:   1%|▏         | 28/1875 [00:21<22:40,  1.36it/s]

Epoch 1, Batch 29/1875


Epoch 1:   2%|▏         | 29/1875 [00:22<24:49,  1.24it/s]

Epoch 1, Batch 30/1875


Epoch 1:   2%|▏         | 30/1875 [00:23<26:47,  1.15it/s]

Epoch 1, Batch 31/1875


Epoch 1:   2%|▏         | 31/1875 [00:24<26:21,  1.17it/s]

Epoch 1, Batch 32/1875


Epoch 1:   2%|▏         | 32/1875 [00:24<24:51,  1.24it/s]

Epoch 1, Batch 33/1875


Epoch 1:   2%|▏         | 33/1875 [00:25<23:51,  1.29it/s]

Epoch 1, Batch 34/1875


Epoch 1:   2%|▏         | 34/1875 [00:26<23:06,  1.33it/s]

Epoch 1, Batch 35/1875


Epoch 1:   2%|▏         | 35/1875 [00:26<22:36,  1.36it/s]

Epoch 1, Batch 36/1875


Epoch 1:   2%|▏         | 36/1875 [00:27<22:13,  1.38it/s]

Epoch 1, Batch 37/1875


Epoch 1:   2%|▏         | 37/1875 [00:28<22:05,  1.39it/s]

Epoch 1, Batch 38/1875


Epoch 1:   2%|▏         | 38/1875 [00:28<21:39,  1.41it/s]

Epoch 1, Batch 39/1875


Epoch 1:   2%|▏         | 39/1875 [00:29<21:31,  1.42it/s]

Epoch 1, Batch 40/1875


Epoch 1:   2%|▏         | 40/1875 [00:30<21:40,  1.41it/s]

Epoch 1, Batch 41/1875


Epoch 1:   2%|▏         | 41/1875 [00:30<21:20,  1.43it/s]

Epoch 1, Batch 42/1875


Epoch 1:   2%|▏         | 42/1875 [00:31<21:48,  1.40it/s]

Epoch 1, Batch 43/1875


Epoch 1:   2%|▏         | 43/1875 [00:32<21:44,  1.40it/s]

Epoch 1, Batch 44/1875


Epoch 1:   2%|▏         | 44/1875 [00:33<21:27,  1.42it/s]

Epoch 1, Batch 45/1875


Epoch 1:   2%|▏         | 45/1875 [00:33<22:40,  1.34it/s]

Epoch 1, Batch 46/1875


Epoch 1:   2%|▏         | 46/1875 [00:34<25:06,  1.21it/s]

Epoch 1, Batch 47/1875


Epoch 1:   3%|▎         | 47/1875 [00:35<26:33,  1.15it/s]

Epoch 1, Batch 48/1875


Epoch 1:   3%|▎         | 48/1875 [00:36<26:08,  1.16it/s]

Epoch 1, Batch 49/1875


Epoch 1:   3%|▎         | 49/1875 [00:37<24:41,  1.23it/s]

Epoch 1, Batch 50/1875


Epoch 1:   3%|▎         | 50/1875 [00:38<23:36,  1.29it/s]

Epoch 1, Batch 51/1875


Epoch 1:   3%|▎         | 51/1875 [00:38<22:59,  1.32it/s]

Epoch 1, Batch 52/1875


Epoch 1:   3%|▎         | 52/1875 [00:39<22:56,  1.32it/s]

Epoch 1, Batch 53/1875


Epoch 1:   3%|▎         | 53/1875 [00:40<22:30,  1.35it/s]

Epoch 1, Batch 54/1875


Epoch 1:   3%|▎         | 54/1875 [00:41<22:05,  1.37it/s]

Epoch 1, Batch 55/1875


Epoch 1:   3%|▎         | 55/1875 [00:41<21:41,  1.40it/s]

Epoch 1, Batch 56/1875


Epoch 1:   3%|▎         | 56/1875 [00:42<21:22,  1.42it/s]

Epoch 1, Batch 57/1875


Epoch 1:   3%|▎         | 57/1875 [00:43<21:16,  1.42it/s]

Epoch 1, Batch 58/1875


Epoch 1:   3%|▎         | 58/1875 [00:43<21:27,  1.41it/s]

Epoch 1, Batch 59/1875


Epoch 1:   3%|▎         | 59/1875 [00:44<21:19,  1.42it/s]

Epoch 1, Batch 60/1875


Epoch 1:   3%|▎         | 60/1875 [00:45<21:33,  1.40it/s]

Epoch 1, Batch 61/1875


Epoch 1:   3%|▎         | 61/1875 [00:45<21:32,  1.40it/s]

Epoch 1, Batch 62/1875


Epoch 1:   3%|▎         | 62/1875 [00:46<22:24,  1.35it/s]

Epoch 1, Batch 63/1875


Epoch 1:   3%|▎         | 63/1875 [00:47<24:21,  1.24it/s]

Epoch 1, Batch 64/1875


Epoch 1:   3%|▎         | 64/1875 [00:48<25:59,  1.16it/s]

Epoch 1, Batch 65/1875


Epoch 1:   3%|▎         | 65/1875 [00:49<25:39,  1.18it/s]

Epoch 1, Batch 66/1875


Epoch 1:   4%|▎         | 66/1875 [00:50<24:36,  1.23it/s]

Epoch 1, Batch 67/1875


Epoch 1:   4%|▎         | 67/1875 [00:50<23:34,  1.28it/s]

Epoch 1, Batch 68/1875


Epoch 1:   4%|▎         | 68/1875 [00:51<22:40,  1.33it/s]

Epoch 1, Batch 69/1875


Epoch 1:   4%|▎         | 69/1875 [00:52<22:10,  1.36it/s]

Epoch 1, Batch 70/1875


Epoch 1:   4%|▎         | 70/1875 [00:53<21:52,  1.38it/s]

Epoch 1, Batch 71/1875


Epoch 1:   4%|▍         | 71/1875 [00:53<21:32,  1.40it/s]

Epoch 1, Batch 72/1875


Epoch 1:   4%|▍         | 72/1875 [00:54<21:19,  1.41it/s]

Epoch 1, Batch 73/1875


Epoch 1:   4%|▍         | 73/1875 [00:55<21:59,  1.37it/s]

Epoch 1, Batch 74/1875


Epoch 1:   4%|▍         | 74/1875 [00:55<21:50,  1.37it/s]

Epoch 1, Batch 75/1875


Epoch 1:   4%|▍         | 75/1875 [00:56<21:41,  1.38it/s]

Epoch 1, Batch 76/1875


Epoch 1:   4%|▍         | 76/1875 [00:57<21:34,  1.39it/s]

Epoch 1, Batch 77/1875


Epoch 1:   4%|▍         | 77/1875 [00:58<21:34,  1.39it/s]

Epoch 1, Batch 78/1875


Epoch 1:   4%|▍         | 78/1875 [00:59<23:14,  1.29it/s]

Epoch 1, Batch 79/1875


Epoch 1:   4%|▍         | 79/1875 [01:00<30:11,  1.01s/it]

Epoch 1, Batch 80/1875


Epoch 1:   4%|▍         | 80/1875 [01:01<31:47,  1.06s/it]

Epoch 1, Batch 81/1875


Epoch 1:   4%|▍         | 81/1875 [01:02<28:55,  1.03it/s]

Epoch 1, Batch 82/1875


Epoch 1:   4%|▍         | 82/1875 [01:03<26:30,  1.13it/s]

Epoch 1, Batch 83/1875


Epoch 1:   4%|▍         | 83/1875 [01:03<24:57,  1.20it/s]

Epoch 1, Batch 84/1875


Epoch 1:   4%|▍         | 84/1875 [01:04<23:44,  1.26it/s]

Epoch 1, Batch 85/1875


Epoch 1:   5%|▍         | 85/1875 [01:05<22:52,  1.30it/s]

Epoch 1, Batch 86/1875


Epoch 1:   5%|▍         | 86/1875 [01:06<22:27,  1.33it/s]

Epoch 1, Batch 87/1875


Epoch 1:   5%|▍         | 87/1875 [01:06<22:02,  1.35it/s]

Epoch 1, Batch 88/1875


Epoch 1:   5%|▍         | 88/1875 [01:07<21:59,  1.35it/s]

Epoch 1, Batch 89/1875


Epoch 1:   5%|▍         | 89/1875 [01:08<21:40,  1.37it/s]

Epoch 1, Batch 90/1875


Epoch 1:   5%|▍         | 90/1875 [01:08<21:24,  1.39it/s]

Epoch 1, Batch 91/1875


Epoch 1:   5%|▍         | 91/1875 [01:09<21:14,  1.40it/s]

Epoch 1, Batch 92/1875


Epoch 1:   5%|▍         | 92/1875 [01:10<20:59,  1.42it/s]

Epoch 1, Batch 93/1875


Epoch 1:   5%|▍         | 93/1875 [01:10<20:55,  1.42it/s]

Epoch 1, Batch 94/1875


Epoch 1:   5%|▌         | 94/1875 [01:11<20:48,  1.43it/s]

Epoch 1, Batch 95/1875


Epoch 1:   5%|▌         | 95/1875 [01:12<22:21,  1.33it/s]

Epoch 1, Batch 96/1875


Epoch 1:   5%|▌         | 96/1875 [01:13<24:09,  1.23it/s]

Epoch 1, Batch 97/1875


Epoch 1:   5%|▌         | 97/1875 [01:14<25:58,  1.14it/s]

Epoch 1, Batch 98/1875


Epoch 1:   5%|▌         | 98/1875 [01:15<24:48,  1.19it/s]

Epoch 1, Batch 99/1875


Epoch 1:   5%|▌         | 99/1875 [01:15<23:44,  1.25it/s]

Epoch 1, Batch 100/1875


Epoch 1:   5%|▌         | 100/1875 [01:16<22:45,  1.30it/s]

Epoch 1, Batch 101/1875


Epoch 1:   5%|▌         | 101/1875 [01:17<22:07,  1.34it/s]

Epoch 1, Batch 102/1875


Epoch 1:   5%|▌         | 102/1875 [01:18<21:41,  1.36it/s]

Epoch 1, Batch 103/1875


Epoch 1:   5%|▌         | 103/1875 [01:18<21:40,  1.36it/s]

Epoch 1, Batch 104/1875


Epoch 1:   6%|▌         | 104/1875 [01:19<21:33,  1.37it/s]

Epoch 1, Batch 105/1875


Epoch 1:   6%|▌         | 105/1875 [01:20<21:23,  1.38it/s]

Epoch 1, Batch 106/1875


Epoch 1:   6%|▌         | 106/1875 [01:20<21:11,  1.39it/s]

Epoch 1, Batch 107/1875


Epoch 1:   6%|▌         | 107/1875 [01:21<21:03,  1.40it/s]

Epoch 1, Batch 108/1875


Epoch 1:   6%|▌         | 108/1875 [01:22<20:58,  1.40it/s]

Epoch 1, Batch 109/1875


Epoch 1:   6%|▌         | 109/1875 [01:23<20:48,  1.41it/s]

Epoch 1, Batch 110/1875


Epoch 1:   6%|▌         | 110/1875 [01:23<20:34,  1.43it/s]

Epoch 1, Batch 111/1875


Epoch 1:   6%|▌         | 111/1875 [01:24<20:43,  1.42it/s]

Epoch 1, Batch 112/1875


Epoch 1:   6%|▌         | 112/1875 [01:25<22:34,  1.30it/s]

Epoch 1, Batch 113/1875


Epoch 1:   6%|▌         | 113/1875 [01:26<24:30,  1.20it/s]

Epoch 1, Batch 114/1875


Epoch 1:   6%|▌         | 114/1875 [01:27<26:16,  1.12it/s]

Epoch 1, Batch 115/1875


Epoch 1:   6%|▌         | 115/1875 [01:28<24:46,  1.18it/s]

Epoch 1, Batch 116/1875


Epoch 1:   6%|▌         | 116/1875 [01:28<23:40,  1.24it/s]

Epoch 1, Batch 117/1875


Epoch 1:   6%|▌         | 117/1875 [01:29<23:01,  1.27it/s]

Epoch 1, Batch 118/1875


Epoch 1:   6%|▋         | 118/1875 [01:30<22:29,  1.30it/s]

Epoch 1, Batch 119/1875


Epoch 1:   6%|▋         | 119/1875 [01:31<22:05,  1.32it/s]

Epoch 1, Batch 120/1875


Epoch 1:   6%|▋         | 120/1875 [01:31<21:42,  1.35it/s]

Epoch 1, Batch 121/1875


Epoch 1:   6%|▋         | 121/1875 [01:32<21:21,  1.37it/s]

Epoch 1, Batch 122/1875


Epoch 1:   7%|▋         | 122/1875 [01:33<21:10,  1.38it/s]

Epoch 1, Batch 123/1875


Epoch 1:   7%|▋         | 123/1875 [01:33<21:09,  1.38it/s]

Epoch 1, Batch 124/1875


Epoch 1:   7%|▋         | 124/1875 [01:34<21:01,  1.39it/s]

Epoch 1, Batch 125/1875


Epoch 1:   7%|▋         | 125/1875 [01:35<21:12,  1.38it/s]

Epoch 1, Batch 126/1875


Epoch 1:   7%|▋         | 126/1875 [01:36<21:06,  1.38it/s]

Epoch 1, Batch 127/1875


Epoch 1:   7%|▋         | 127/1875 [01:36<21:06,  1.38it/s]

Epoch 1, Batch 128/1875


Epoch 1:   7%|▋         | 128/1875 [01:37<20:57,  1.39it/s]

Epoch 1, Batch 129/1875


Epoch 1:   7%|▋         | 129/1875 [01:38<23:26,  1.24it/s]

Epoch 1, Batch 130/1875


Epoch 1:   7%|▋         | 130/1875 [01:39<25:11,  1.15it/s]

Epoch 1, Batch 131/1875


Epoch 1:   7%|▋         | 131/1875 [01:40<25:55,  1.12it/s]

Epoch 1, Batch 132/1875


Epoch 1:   7%|▋         | 132/1875 [01:41<24:41,  1.18it/s]

Epoch 1, Batch 133/1875


Epoch 1:   7%|▋         | 133/1875 [01:41<23:39,  1.23it/s]

Epoch 1, Batch 134/1875


Epoch 1:   7%|▋         | 134/1875 [01:42<22:32,  1.29it/s]

Epoch 1, Batch 135/1875


Epoch 1:   7%|▋         | 135/1875 [01:43<22:00,  1.32it/s]

Epoch 1, Batch 136/1875


Epoch 1:   7%|▋         | 136/1875 [01:44<21:42,  1.34it/s]

Epoch 1, Batch 137/1875


Epoch 1:   7%|▋         | 137/1875 [01:44<21:14,  1.36it/s]

Epoch 1, Batch 138/1875


Epoch 1:   7%|▋         | 138/1875 [01:45<21:04,  1.37it/s]

Epoch 1, Batch 139/1875


Epoch 1:   7%|▋         | 139/1875 [01:46<20:59,  1.38it/s]

Epoch 1, Batch 140/1875


Epoch 1:   7%|▋         | 140/1875 [01:46<20:43,  1.40it/s]

Epoch 1, Batch 141/1875


Epoch 1:   8%|▊         | 141/1875 [01:47<20:40,  1.40it/s]

Epoch 1, Batch 142/1875


Epoch 1:   8%|▊         | 142/1875 [01:48<20:36,  1.40it/s]

Epoch 1, Batch 143/1875


Epoch 1:   8%|▊         | 143/1875 [01:49<20:32,  1.41it/s]

Epoch 1, Batch 144/1875


Epoch 1:   8%|▊         | 144/1875 [01:49<20:32,  1.40it/s]

Epoch 1, Batch 145/1875


Epoch 1:   8%|▊         | 145/1875 [01:50<21:11,  1.36it/s]

Epoch 1, Batch 146/1875


Epoch 1:   8%|▊         | 146/1875 [01:51<23:22,  1.23it/s]

Epoch 1, Batch 147/1875


Epoch 1:   8%|▊         | 147/1875 [01:52<25:01,  1.15it/s]

Epoch 1, Batch 148/1875


Epoch 1:   8%|▊         | 148/1875 [01:53<25:15,  1.14it/s]

Epoch 1, Batch 149/1875


Epoch 1:   8%|▊         | 149/1875 [01:54<23:53,  1.20it/s]

Epoch 1, Batch 150/1875


Epoch 1:   8%|▊         | 150/1875 [01:54<23:00,  1.25it/s]

Epoch 1, Batch 151/1875


Epoch 1:   8%|▊         | 151/1875 [01:55<22:21,  1.28it/s]

Epoch 1, Batch 152/1875


Epoch 1:   8%|▊         | 152/1875 [01:56<21:56,  1.31it/s]

Epoch 1, Batch 153/1875


Epoch 1:   8%|▊         | 153/1875 [01:57<21:39,  1.33it/s]

Epoch 1, Batch 154/1875


Epoch 1:   8%|▊         | 154/1875 [01:57<21:26,  1.34it/s]

Epoch 1, Batch 155/1875


Epoch 1:   8%|▊         | 155/1875 [01:58<21:09,  1.35it/s]

Epoch 1, Batch 156/1875


Epoch 1:   8%|▊         | 156/1875 [01:59<20:56,  1.37it/s]

Epoch 1, Batch 157/1875


Epoch 1:   8%|▊         | 157/1875 [01:59<20:53,  1.37it/s]

Epoch 1, Batch 158/1875


Epoch 1:   8%|▊         | 158/1875 [02:00<20:48,  1.38it/s]

Epoch 1, Batch 159/1875


Epoch 1:   8%|▊         | 159/1875 [02:01<21:10,  1.35it/s]

Epoch 1, Batch 160/1875


Epoch 1:   9%|▊         | 160/1875 [02:02<21:04,  1.36it/s]

Epoch 1, Batch 161/1875


Epoch 1:   9%|▊         | 161/1875 [02:02<20:55,  1.37it/s]

Epoch 1, Batch 162/1875


Epoch 1:   9%|▊         | 162/1875 [02:03<22:47,  1.25it/s]

Epoch 1, Batch 163/1875


Epoch 1:   9%|▊         | 163/1875 [02:04<24:29,  1.16it/s]

Epoch 1, Batch 164/1875


Epoch 1:   9%|▊         | 164/1875 [02:05<26:16,  1.09it/s]

Epoch 1, Batch 165/1875


Epoch 1:   9%|▉         | 165/1875 [02:06<25:10,  1.13it/s]

Epoch 1, Batch 166/1875


Epoch 1:   9%|▉         | 166/1875 [02:07<23:58,  1.19it/s]

Epoch 1, Batch 167/1875


Epoch 1:   9%|▉         | 167/1875 [02:08<23:00,  1.24it/s]

Epoch 1, Batch 168/1875


Epoch 1:   9%|▉         | 168/1875 [02:08<22:02,  1.29it/s]

Epoch 1, Batch 169/1875


Epoch 1:   9%|▉         | 169/1875 [02:09<21:37,  1.31it/s]

Epoch 1, Batch 170/1875


Epoch 1:   9%|▉         | 170/1875 [02:10<21:18,  1.33it/s]

Epoch 1, Batch 171/1875


Epoch 1:   9%|▉         | 171/1875 [02:11<21:02,  1.35it/s]

Epoch 1, Batch 172/1875


Epoch 1:   9%|▉         | 172/1875 [02:11<20:51,  1.36it/s]

Epoch 1, Batch 173/1875


Epoch 1:   9%|▉         | 173/1875 [02:12<20:44,  1.37it/s]

Epoch 1, Batch 174/1875


Epoch 1:   9%|▉         | 174/1875 [02:13<20:40,  1.37it/s]

Epoch 1, Batch 175/1875


Epoch 1:   9%|▉         | 175/1875 [02:13<20:31,  1.38it/s]

Epoch 1, Batch 176/1875


Epoch 1:   9%|▉         | 176/1875 [02:14<20:19,  1.39it/s]

Epoch 1, Batch 177/1875


Epoch 1:   9%|▉         | 177/1875 [02:15<20:19,  1.39it/s]

Epoch 1, Batch 178/1875


Epoch 1:   9%|▉         | 178/1875 [02:16<20:37,  1.37it/s]

Epoch 1, Batch 179/1875


Epoch 1:  10%|▉         | 179/1875 [02:17<22:51,  1.24it/s]

Epoch 1, Batch 180/1875


Epoch 1:  10%|▉         | 180/1875 [02:18<24:44,  1.14it/s]

Epoch 1, Batch 181/1875


Epoch 1:  10%|▉         | 181/1875 [02:19<25:35,  1.10it/s]

Epoch 1, Batch 182/1875


Epoch 1:  10%|▉         | 182/1875 [02:19<24:05,  1.17it/s]

Epoch 1, Batch 183/1875


Epoch 1:  10%|▉         | 183/1875 [02:20<22:51,  1.23it/s]

Epoch 1, Batch 184/1875


Epoch 1:  10%|▉         | 184/1875 [02:21<21:55,  1.28it/s]

Epoch 1, Batch 185/1875


Epoch 1:  10%|▉         | 185/1875 [02:21<21:30,  1.31it/s]

Epoch 1, Batch 186/1875


Epoch 1:  10%|▉         | 186/1875 [02:22<21:02,  1.34it/s]

Epoch 1, Batch 187/1875


Epoch 1:  10%|▉         | 187/1875 [02:23<20:46,  1.35it/s]

Epoch 1, Batch 188/1875


Epoch 1:  10%|█         | 188/1875 [02:24<20:29,  1.37it/s]

Epoch 1, Batch 189/1875


Epoch 1:  10%|█         | 189/1875 [02:24<20:15,  1.39it/s]

Epoch 1, Batch 190/1875


Epoch 1:  10%|█         | 190/1875 [02:25<20:09,  1.39it/s]

Epoch 1, Batch 191/1875


Epoch 1:  10%|█         | 191/1875 [02:26<19:57,  1.41it/s]

Epoch 1, Batch 192/1875


Epoch 1:  10%|█         | 192/1875 [02:26<19:50,  1.41it/s]

Epoch 1, Batch 193/1875


Epoch 1:  10%|█         | 193/1875 [02:27<19:55,  1.41it/s]

Epoch 1, Batch 194/1875


Epoch 1:  10%|█         | 194/1875 [02:28<19:57,  1.40it/s]

Epoch 1, Batch 195/1875


Epoch 1:  10%|█         | 195/1875 [02:29<20:36,  1.36it/s]

Epoch 1, Batch 196/1875


Epoch 1:  10%|█         | 196/1875 [02:30<22:52,  1.22it/s]

Epoch 1, Batch 197/1875


Epoch 1:  11%|█         | 197/1875 [02:31<24:22,  1.15it/s]

Epoch 1, Batch 198/1875


Epoch 1:  11%|█         | 198/1875 [02:32<24:39,  1.13it/s]

Epoch 1, Batch 199/1875


Epoch 1:  11%|█         | 199/1875 [02:32<23:13,  1.20it/s]

Epoch 1, Batch 200/1875


Epoch 1:  11%|█         | 200/1875 [02:33<22:08,  1.26it/s]

Epoch 1, Batch 201/1875


Epoch 1:  11%|█         | 201/1875 [02:34<21:43,  1.28it/s]

Epoch 1, Batch 202/1875


Epoch 1:  11%|█         | 202/1875 [02:34<21:12,  1.31it/s]

Epoch 1, Batch 203/1875


Epoch 1:  11%|█         | 203/1875 [02:35<20:50,  1.34it/s]

Epoch 1, Batch 204/1875


Epoch 1:  11%|█         | 204/1875 [02:36<20:36,  1.35it/s]

Epoch 1, Batch 205/1875


Epoch 1:  11%|█         | 205/1875 [02:37<20:29,  1.36it/s]

Epoch 1, Batch 206/1875


Epoch 1:  11%|█         | 206/1875 [02:37<20:14,  1.37it/s]

Epoch 1, Batch 207/1875


Epoch 1:  11%|█         | 207/1875 [02:38<20:11,  1.38it/s]

Epoch 1, Batch 208/1875


Epoch 1:  11%|█         | 208/1875 [02:39<20:16,  1.37it/s]

Epoch 1, Batch 209/1875


Epoch 1:  11%|█         | 209/1875 [02:40<20:26,  1.36it/s]

Epoch 1, Batch 210/1875


Epoch 1:  11%|█         | 210/1875 [02:40<20:23,  1.36it/s]

Epoch 1, Batch 211/1875


Epoch 1:  11%|█▏        | 211/1875 [02:41<20:21,  1.36it/s]

Epoch 1, Batch 212/1875


Epoch 1:  11%|█▏        | 212/1875 [02:42<21:52,  1.27it/s]

Epoch 1, Batch 213/1875


Epoch 1:  11%|█▏        | 213/1875 [02:43<23:23,  1.18it/s]

Epoch 1, Batch 214/1875


Epoch 1:  11%|█▏        | 214/1875 [02:44<25:01,  1.11it/s]

Epoch 1, Batch 215/1875


Epoch 1:  11%|█▏        | 215/1875 [02:45<23:54,  1.16it/s]

Epoch 1, Batch 216/1875


Epoch 1:  12%|█▏        | 216/1875 [02:45<22:43,  1.22it/s]

Epoch 1, Batch 217/1875


Epoch 1:  12%|█▏        | 217/1875 [02:46<21:50,  1.27it/s]

Epoch 1, Batch 218/1875


Epoch 1:  12%|█▏        | 218/1875 [02:47<21:09,  1.31it/s]

Epoch 1, Batch 219/1875


Epoch 1:  12%|█▏        | 219/1875 [02:48<20:38,  1.34it/s]

Epoch 1, Batch 220/1875


Epoch 1:  12%|█▏        | 220/1875 [02:48<20:15,  1.36it/s]

Epoch 1, Batch 221/1875


Epoch 1:  12%|█▏        | 221/1875 [02:49<20:08,  1.37it/s]

Epoch 1, Batch 222/1875


Epoch 1:  12%|█▏        | 222/1875 [02:50<20:04,  1.37it/s]

Epoch 1, Batch 223/1875


Epoch 1:  12%|█▏        | 223/1875 [02:50<20:00,  1.38it/s]

Epoch 1, Batch 224/1875


Epoch 1:  12%|█▏        | 224/1875 [02:51<19:57,  1.38it/s]

Epoch 1, Batch 225/1875


Epoch 1:  12%|█▏        | 225/1875 [02:52<19:55,  1.38it/s]

Epoch 1, Batch 226/1875


Epoch 1:  12%|█▏        | 226/1875 [02:53<20:05,  1.37it/s]

Epoch 1, Batch 227/1875


Epoch 1:  12%|█▏        | 227/1875 [02:53<20:06,  1.37it/s]

Epoch 1, Batch 228/1875


Epoch 1:  12%|█▏        | 228/1875 [02:54<20:12,  1.36it/s]

Epoch 1, Batch 229/1875


Epoch 1:  12%|█▏        | 229/1875 [02:55<22:21,  1.23it/s]

Epoch 1, Batch 230/1875


Epoch 1:  12%|█▏        | 230/1875 [02:56<23:52,  1.15it/s]

Epoch 1, Batch 231/1875


Epoch 1:  12%|█▏        | 231/1875 [02:57<24:36,  1.11it/s]

Epoch 1, Batch 232/1875


Epoch 1:  12%|█▏        | 232/1875 [02:58<23:09,  1.18it/s]

Epoch 1, Batch 233/1875


Epoch 1:  12%|█▏        | 233/1875 [02:59<22:04,  1.24it/s]

Epoch 1, Batch 234/1875


Epoch 1:  12%|█▏        | 234/1875 [02:59<21:14,  1.29it/s]

Epoch 1, Batch 235/1875


Epoch 1:  13%|█▎        | 235/1875 [03:00<20:52,  1.31it/s]

Epoch 1, Batch 236/1875


Epoch 1:  13%|█▎        | 236/1875 [03:01<20:40,  1.32it/s]

Epoch 1, Batch 237/1875


Epoch 1:  13%|█▎        | 237/1875 [03:01<20:24,  1.34it/s]

Epoch 1, Batch 238/1875


Epoch 1:  13%|█▎        | 238/1875 [03:02<20:03,  1.36it/s]

Epoch 1, Batch 239/1875


Epoch 1:  13%|█▎        | 239/1875 [03:03<19:59,  1.36it/s]

Epoch 1, Batch 240/1875


Epoch 1:  13%|█▎        | 240/1875 [03:04<20:08,  1.35it/s]

Epoch 1, Batch 241/1875


Epoch 1:  13%|█▎        | 241/1875 [03:04<20:11,  1.35it/s]

Epoch 1, Batch 242/1875


Epoch 1:  13%|█▎        | 242/1875 [03:05<20:10,  1.35it/s]

Epoch 1, Batch 243/1875


Epoch 1:  13%|█▎        | 243/1875 [03:06<20:00,  1.36it/s]

Epoch 1, Batch 244/1875


Epoch 1:  13%|█▎        | 244/1875 [03:07<19:55,  1.36it/s]

Epoch 1, Batch 245/1875


Epoch 1:  13%|█▎        | 245/1875 [03:07<20:51,  1.30it/s]

Epoch 1, Batch 246/1875


Epoch 1:  13%|█▎        | 246/1875 [03:08<22:29,  1.21it/s]

Epoch 1, Batch 247/1875


Epoch 1:  13%|█▎        | 247/1875 [03:09<24:01,  1.13it/s]

Epoch 1, Batch 248/1875


Epoch 1:  13%|█▎        | 248/1875 [03:10<23:20,  1.16it/s]

Epoch 1, Batch 249/1875


Epoch 1:  13%|█▎        | 249/1875 [03:11<22:03,  1.23it/s]

Epoch 1, Batch 250/1875


Epoch 1:  13%|█▎        | 250/1875 [03:12<21:06,  1.28it/s]

Epoch 1, Batch 251/1875


Epoch 1:  13%|█▎        | 251/1875 [03:12<20:31,  1.32it/s]

Epoch 1, Batch 252/1875


Epoch 1:  13%|█▎        | 252/1875 [03:13<20:06,  1.34it/s]

Epoch 1, Batch 253/1875


Epoch 1:  13%|█▎        | 253/1875 [03:14<19:45,  1.37it/s]

Epoch 1, Batch 254/1875


Epoch 1:  14%|█▎        | 254/1875 [03:14<19:30,  1.39it/s]

Epoch 1, Batch 255/1875


Epoch 1:  14%|█▎        | 255/1875 [03:15<19:28,  1.39it/s]

Epoch 1, Batch 256/1875


Epoch 1:  14%|█▎        | 256/1875 [03:16<19:14,  1.40it/s]

Epoch 1, Batch 257/1875


Epoch 1:  14%|█▎        | 257/1875 [03:17<19:18,  1.40it/s]

Epoch 1, Batch 258/1875


Epoch 1:  14%|█▍        | 258/1875 [03:17<19:23,  1.39it/s]

Epoch 1, Batch 259/1875


Epoch 1:  14%|█▍        | 259/1875 [03:18<19:23,  1.39it/s]

Epoch 1, Batch 260/1875


Epoch 1:  14%|█▍        | 260/1875 [03:19<19:16,  1.40it/s]

Epoch 1, Batch 261/1875


Epoch 1:  14%|█▍        | 261/1875 [03:19<19:14,  1.40it/s]

Epoch 1, Batch 262/1875


Epoch 1:  14%|█▍        | 262/1875 [03:20<20:13,  1.33it/s]

Epoch 1, Batch 263/1875


Epoch 1:  14%|█▍        | 263/1875 [03:21<21:50,  1.23it/s]

Epoch 1, Batch 264/1875


Epoch 1:  14%|█▍        | 264/1875 [03:22<23:05,  1.16it/s]

Epoch 1, Batch 265/1875


Epoch 1:  14%|█▍        | 265/1875 [03:23<22:31,  1.19it/s]

Epoch 1, Batch 266/1875


Epoch 1:  14%|█▍        | 266/1875 [03:24<21:41,  1.24it/s]

Epoch 1, Batch 267/1875


Epoch 1:  14%|█▍        | 267/1875 [03:24<20:55,  1.28it/s]

Epoch 1, Batch 268/1875


Epoch 1:  14%|█▍        | 268/1875 [03:25<20:06,  1.33it/s]

Epoch 1, Batch 269/1875


Epoch 1:  14%|█▍        | 269/1875 [03:26<19:46,  1.35it/s]

Epoch 1, Batch 270/1875


Epoch 1:  14%|█▍        | 270/1875 [03:26<19:25,  1.38it/s]

Epoch 1, Batch 271/1875


Epoch 1:  14%|█▍        | 271/1875 [03:27<19:11,  1.39it/s]

Epoch 1, Batch 272/1875


Epoch 1:  15%|█▍        | 272/1875 [03:28<19:03,  1.40it/s]

Epoch 1, Batch 273/1875


Epoch 1:  15%|█▍        | 273/1875 [03:29<18:57,  1.41it/s]

Epoch 1, Batch 274/1875


Epoch 1:  15%|█▍        | 274/1875 [03:29<18:54,  1.41it/s]

Epoch 1, Batch 275/1875


Epoch 1:  15%|█▍        | 275/1875 [03:30<18:57,  1.41it/s]

Epoch 1, Batch 276/1875


Epoch 1:  15%|█▍        | 276/1875 [03:31<18:55,  1.41it/s]

Epoch 1, Batch 277/1875


Epoch 1:  15%|█▍        | 277/1875 [03:31<18:49,  1.41it/s]

Epoch 1, Batch 278/1875


Epoch 1:  15%|█▍        | 278/1875 [03:32<18:48,  1.42it/s]

Epoch 1, Batch 279/1875


Epoch 1:  15%|█▍        | 279/1875 [03:33<20:00,  1.33it/s]

Epoch 1, Batch 280/1875


Epoch 1:  15%|█▍        | 280/1875 [03:34<21:44,  1.22it/s]

Epoch 1, Batch 281/1875


Epoch 1:  15%|█▍        | 281/1875 [03:35<23:16,  1.14it/s]

Epoch 1, Batch 282/1875


Epoch 1:  15%|█▌        | 282/1875 [03:36<22:45,  1.17it/s]

Epoch 1, Batch 283/1875


Epoch 1:  15%|█▌        | 283/1875 [03:37<21:39,  1.23it/s]

Epoch 1, Batch 284/1875


Epoch 1:  15%|█▌        | 284/1875 [03:37<20:59,  1.26it/s]

Epoch 1, Batch 285/1875


Epoch 1:  15%|█▌        | 285/1875 [03:38<20:18,  1.31it/s]

Epoch 1, Batch 286/1875


Epoch 1:  15%|█▌        | 286/1875 [03:39<19:48,  1.34it/s]

Epoch 1, Batch 287/1875


Epoch 1:  15%|█▌        | 287/1875 [03:39<19:43,  1.34it/s]

Epoch 1, Batch 288/1875


Epoch 1:  15%|█▌        | 288/1875 [03:40<19:28,  1.36it/s]

Epoch 1, Batch 289/1875


Epoch 1:  15%|█▌        | 289/1875 [03:41<19:15,  1.37it/s]

Epoch 1, Batch 290/1875


Epoch 1:  15%|█▌        | 290/1875 [03:42<19:04,  1.38it/s]

Epoch 1, Batch 291/1875


Epoch 1:  16%|█▌        | 291/1875 [03:42<18:57,  1.39it/s]

Epoch 1, Batch 292/1875


Epoch 1:  16%|█▌        | 292/1875 [03:43<18:58,  1.39it/s]

Epoch 1, Batch 293/1875


Epoch 1:  16%|█▌        | 293/1875 [03:44<18:51,  1.40it/s]

Epoch 1, Batch 294/1875


Epoch 1:  16%|█▌        | 294/1875 [03:44<18:41,  1.41it/s]

Epoch 1, Batch 295/1875


Epoch 1:  16%|█▌        | 295/1875 [03:45<18:38,  1.41it/s]

Epoch 1, Batch 296/1875


Epoch 1:  16%|█▌        | 296/1875 [03:46<20:16,  1.30it/s]

Epoch 1, Batch 297/1875


Epoch 1:  16%|█▌        | 297/1875 [03:47<21:46,  1.21it/s]

Epoch 1, Batch 298/1875


Epoch 1:  16%|█▌        | 298/1875 [03:48<23:04,  1.14it/s]

Epoch 1, Batch 299/1875


Epoch 1:  16%|█▌        | 299/1875 [03:49<22:12,  1.18it/s]

Epoch 1, Batch 300/1875


Epoch 1:  16%|█▌        | 300/1875 [03:49<21:09,  1.24it/s]

Epoch 1, Batch 301/1875


Epoch 1:  16%|█▌        | 301/1875 [03:50<20:11,  1.30it/s]

Epoch 1, Batch 302/1875


Epoch 1:  16%|█▌        | 302/1875 [03:51<19:49,  1.32it/s]

Epoch 1, Batch 303/1875


Epoch 1:  16%|█▌        | 303/1875 [03:52<19:26,  1.35it/s]

Epoch 1, Batch 304/1875


Epoch 1:  16%|█▌        | 304/1875 [03:52<19:06,  1.37it/s]

Epoch 1, Batch 305/1875


Epoch 1:  16%|█▋        | 305/1875 [03:53<18:51,  1.39it/s]

Epoch 1, Batch 306/1875


Epoch 1:  16%|█▋        | 306/1875 [03:54<18:48,  1.39it/s]

Epoch 1, Batch 307/1875


Epoch 1:  16%|█▋        | 307/1875 [03:54<18:43,  1.40it/s]

Epoch 1, Batch 308/1875


Epoch 1:  16%|█▋        | 308/1875 [03:55<18:31,  1.41it/s]

Epoch 1, Batch 309/1875


Epoch 1:  16%|█▋        | 309/1875 [03:56<18:29,  1.41it/s]

Epoch 1, Batch 310/1875


Epoch 1:  17%|█▋        | 310/1875 [03:56<18:24,  1.42it/s]

Epoch 1, Batch 311/1875


Epoch 1:  17%|█▋        | 311/1875 [03:57<18:23,  1.42it/s]

Epoch 1, Batch 312/1875


Epoch 1:  17%|█▋        | 312/1875 [03:58<18:27,  1.41it/s]

Epoch 1, Batch 313/1875


Epoch 1:  17%|█▋        | 313/1875 [03:59<20:14,  1.29it/s]

Epoch 1, Batch 314/1875


Epoch 1:  17%|█▋        | 314/1875 [04:00<21:59,  1.18it/s]

Epoch 1, Batch 315/1875


Epoch 1:  17%|█▋        | 315/1875 [04:02<31:22,  1.21s/it]

Epoch 1, Batch 316/1875


Epoch 1:  17%|█▋        | 316/1875 [04:03<33:25,  1.29s/it]

Epoch 1, Batch 317/1875


Epoch 1:  17%|█▋        | 317/1875 [04:04<29:04,  1.12s/it]

Epoch 1, Batch 318/1875


Epoch 1:  17%|█▋        | 318/1875 [04:05<25:59,  1.00s/it]

Epoch 1, Batch 319/1875


Epoch 1:  17%|█▋        | 319/1875 [04:06<23:51,  1.09it/s]

Epoch 1, Batch 320/1875


Epoch 1:  17%|█▋        | 320/1875 [04:06<22:20,  1.16it/s]

Epoch 1, Batch 321/1875


Epoch 1:  17%|█▋        | 321/1875 [04:07<21:06,  1.23it/s]

Epoch 1, Batch 322/1875


Epoch 1:  17%|█▋        | 322/1875 [04:08<20:17,  1.28it/s]

Epoch 1, Batch 323/1875


Epoch 1:  17%|█▋        | 323/1875 [04:08<19:37,  1.32it/s]

Epoch 1, Batch 324/1875


Epoch 1:  17%|█▋        | 324/1875 [04:09<19:15,  1.34it/s]

Epoch 1, Batch 325/1875


Epoch 1:  17%|█▋        | 325/1875 [04:10<18:58,  1.36it/s]

Epoch 1, Batch 326/1875


Epoch 1:  17%|█▋        | 326/1875 [04:10<18:37,  1.39it/s]

Epoch 1, Batch 327/1875


Epoch 1:  17%|█▋        | 327/1875 [04:11<19:03,  1.35it/s]

Epoch 1, Batch 328/1875


Epoch 1:  17%|█▋        | 328/1875 [04:12<21:01,  1.23it/s]

Epoch 1, Batch 329/1875


Epoch 1:  18%|█▊        | 329/1875 [04:13<22:33,  1.14it/s]

Epoch 1, Batch 330/1875


Epoch 1:  18%|█▊        | 330/1875 [04:14<22:48,  1.13it/s]

Epoch 1, Batch 331/1875


Epoch 1:  18%|█▊        | 331/1875 [04:15<21:21,  1.21it/s]

Epoch 1, Batch 332/1875


Epoch 1:  18%|█▊        | 332/1875 [04:16<20:25,  1.26it/s]

Epoch 1, Batch 333/1875


Epoch 1:  18%|█▊        | 333/1875 [04:16<19:43,  1.30it/s]

Epoch 1, Batch 334/1875


Epoch 1:  18%|█▊        | 334/1875 [04:17<19:09,  1.34it/s]

Epoch 1, Batch 335/1875


Epoch 1:  18%|█▊        | 335/1875 [04:18<18:47,  1.37it/s]

Epoch 1, Batch 336/1875


Epoch 1:  18%|█▊        | 336/1875 [04:18<18:39,  1.37it/s]

Epoch 1, Batch 337/1875


Epoch 1:  18%|█▊        | 337/1875 [04:19<18:24,  1.39it/s]

Epoch 1, Batch 338/1875


Epoch 1:  18%|█▊        | 338/1875 [04:20<18:27,  1.39it/s]

Epoch 1, Batch 339/1875


Epoch 1:  18%|█▊        | 339/1875 [04:21<18:24,  1.39it/s]

Epoch 1, Batch 340/1875


Epoch 1:  18%|█▊        | 340/1875 [04:21<18:27,  1.39it/s]

Epoch 1, Batch 341/1875


Epoch 1:  18%|█▊        | 341/1875 [04:22<18:10,  1.41it/s]

Epoch 1, Batch 342/1875


Epoch 1:  18%|█▊        | 342/1875 [04:23<18:05,  1.41it/s]

Epoch 1, Batch 343/1875


Epoch 1:  18%|█▊        | 343/1875 [04:23<18:04,  1.41it/s]

Epoch 1, Batch 344/1875


Epoch 1:  18%|█▊        | 344/1875 [04:24<18:26,  1.38it/s]

Epoch 1, Batch 345/1875


Epoch 1:  18%|█▊        | 345/1875 [04:25<20:18,  1.26it/s]

Epoch 1, Batch 346/1875


Epoch 1:  18%|█▊        | 346/1875 [04:26<21:44,  1.17it/s]

Epoch 1, Batch 347/1875


Epoch 1:  19%|█▊        | 347/1875 [04:27<22:02,  1.16it/s]

Epoch 1, Batch 348/1875


Epoch 1:  19%|█▊        | 348/1875 [04:28<20:53,  1.22it/s]

Epoch 1, Batch 349/1875


Epoch 1:  19%|█▊        | 349/1875 [04:28<19:53,  1.28it/s]

Epoch 1, Batch 350/1875


Epoch 1:  19%|█▊        | 350/1875 [04:29<19:13,  1.32it/s]

Epoch 1, Batch 351/1875


Epoch 1:  19%|█▊        | 351/1875 [04:30<18:53,  1.35it/s]

Epoch 1, Batch 352/1875


Epoch 1:  19%|█▉        | 352/1875 [04:31<18:41,  1.36it/s]

Epoch 1, Batch 353/1875


Epoch 1:  19%|█▉        | 353/1875 [04:31<18:34,  1.37it/s]

Epoch 1, Batch 354/1875


Epoch 1:  19%|█▉        | 354/1875 [04:32<18:35,  1.36it/s]

Epoch 1, Batch 355/1875


Epoch 1:  19%|█▉        | 355/1875 [04:33<18:26,  1.37it/s]

Epoch 1, Batch 356/1875


Epoch 1:  19%|█▉        | 356/1875 [04:33<18:11,  1.39it/s]

Epoch 1, Batch 357/1875


Epoch 1:  19%|█▉        | 357/1875 [04:34<18:08,  1.40it/s]

Epoch 1, Batch 358/1875


Epoch 1:  19%|█▉        | 358/1875 [04:35<18:09,  1.39it/s]

Epoch 1, Batch 359/1875


Epoch 1:  19%|█▉        | 359/1875 [04:36<18:08,  1.39it/s]

Epoch 1, Batch 360/1875


Epoch 1:  19%|█▉        | 360/1875 [04:36<17:57,  1.41it/s]

Epoch 1, Batch 361/1875


Epoch 1:  19%|█▉        | 361/1875 [04:37<18:43,  1.35it/s]

Epoch 1, Batch 362/1875


Epoch 1:  19%|█▉        | 362/1875 [04:38<20:31,  1.23it/s]

Epoch 1, Batch 363/1875


Epoch 1:  19%|█▉        | 363/1875 [04:39<21:53,  1.15it/s]

Epoch 1, Batch 364/1875


Epoch 1:  19%|█▉        | 364/1875 [04:40<21:46,  1.16it/s]

Epoch 1, Batch 365/1875


Epoch 1:  19%|█▉        | 365/1875 [04:41<20:33,  1.22it/s]

Epoch 1, Batch 366/1875


Epoch 1:  20%|█▉        | 366/1875 [04:41<19:48,  1.27it/s]

Epoch 1, Batch 367/1875


Epoch 1:  20%|█▉        | 367/1875 [04:42<19:19,  1.30it/s]

Epoch 1, Batch 368/1875


Epoch 1:  20%|█▉        | 368/1875 [04:43<18:51,  1.33it/s]

Epoch 1, Batch 369/1875


Epoch 1:  20%|█▉        | 369/1875 [04:43<18:46,  1.34it/s]

Epoch 1, Batch 370/1875


Epoch 1:  20%|█▉        | 370/1875 [04:44<18:31,  1.35it/s]

Epoch 1, Batch 371/1875


Epoch 1:  20%|█▉        | 371/1875 [04:45<18:15,  1.37it/s]

Epoch 1, Batch 372/1875


Epoch 1:  20%|█▉        | 372/1875 [04:46<18:16,  1.37it/s]

Epoch 1, Batch 373/1875


Epoch 1:  20%|█▉        | 373/1875 [04:46<18:13,  1.37it/s]

Epoch 1, Batch 374/1875


Epoch 1:  20%|█▉        | 374/1875 [04:47<18:04,  1.38it/s]

Epoch 1, Batch 375/1875


Epoch 1:  20%|██        | 375/1875 [04:48<17:55,  1.39it/s]

Epoch 1, Batch 376/1875


Epoch 1:  20%|██        | 376/1875 [04:49<18:02,  1.39it/s]

Epoch 1, Batch 377/1875


Epoch 1:  20%|██        | 377/1875 [04:49<17:54,  1.39it/s]

Epoch 1, Batch 378/1875


Epoch 1:  20%|██        | 378/1875 [04:50<18:54,  1.32it/s]

Epoch 1, Batch 379/1875


Epoch 1:  20%|██        | 379/1875 [04:51<20:29,  1.22it/s]

Epoch 1, Batch 380/1875


Epoch 1:  20%|██        | 380/1875 [04:52<21:50,  1.14it/s]

Epoch 1, Batch 381/1875


Epoch 1:  20%|██        | 381/1875 [04:53<21:09,  1.18it/s]

Epoch 1, Batch 382/1875


Epoch 1:  20%|██        | 382/1875 [04:54<20:12,  1.23it/s]

Epoch 1, Batch 383/1875


Epoch 1:  20%|██        | 383/1875 [04:54<19:22,  1.28it/s]

Epoch 1, Batch 384/1875


Epoch 1:  20%|██        | 384/1875 [04:55<18:49,  1.32it/s]

Epoch 1, Batch 385/1875


Epoch 1:  21%|██        | 385/1875 [04:56<18:35,  1.34it/s]

Epoch 1, Batch 386/1875


Epoch 1:  21%|██        | 386/1875 [04:56<18:12,  1.36it/s]

Epoch 1, Batch 387/1875


Epoch 1:  21%|██        | 387/1875 [04:57<17:59,  1.38it/s]

Epoch 1, Batch 388/1875


Epoch 1:  21%|██        | 388/1875 [04:58<17:52,  1.39it/s]

Epoch 1, Batch 389/1875


Epoch 1:  21%|██        | 389/1875 [04:59<17:48,  1.39it/s]

Epoch 1, Batch 390/1875


Epoch 1:  21%|██        | 390/1875 [04:59<17:35,  1.41it/s]

Epoch 1, Batch 391/1875


Epoch 1:  21%|██        | 391/1875 [05:00<18:58,  1.30it/s]

Epoch 1, Batch 392/1875


Epoch 1:  21%|██        | 392/1875 [05:01<19:42,  1.25it/s]

Epoch 1, Batch 393/1875


Epoch 1:  21%|██        | 393/1875 [05:02<20:16,  1.22it/s]

Epoch 1, Batch 394/1875


Epoch 1:  21%|██        | 394/1875 [05:03<20:23,  1.21it/s]

Epoch 1, Batch 395/1875


Epoch 1:  21%|██        | 395/1875 [05:04<21:33,  1.14it/s]

Epoch 1, Batch 396/1875


Epoch 1:  21%|██        | 396/1875 [05:05<22:22,  1.10it/s]

Epoch 1, Batch 397/1875


Epoch 1:  21%|██        | 397/1875 [05:06<21:47,  1.13it/s]

Epoch 1, Batch 398/1875


Epoch 1:  21%|██        | 398/1875 [05:06<20:33,  1.20it/s]

Epoch 1, Batch 399/1875


Epoch 1:  21%|██▏       | 399/1875 [05:07<19:49,  1.24it/s]

Epoch 1, Batch 400/1875


Epoch 1:  21%|██▏       | 400/1875 [05:08<19:11,  1.28it/s]

Epoch 1, Batch 401/1875


Epoch 1:  21%|██▏       | 401/1875 [05:08<18:42,  1.31it/s]

Epoch 1, Batch 402/1875


Epoch 1:  21%|██▏       | 402/1875 [05:09<18:21,  1.34it/s]

Epoch 1, Batch 403/1875


Epoch 1:  21%|██▏       | 403/1875 [05:10<17:56,  1.37it/s]

Epoch 1, Batch 404/1875


Epoch 1:  22%|██▏       | 404/1875 [05:11<17:41,  1.39it/s]

Epoch 1, Batch 405/1875


Epoch 1:  22%|██▏       | 405/1875 [05:11<17:32,  1.40it/s]

Epoch 1, Batch 406/1875


Epoch 1:  22%|██▏       | 406/1875 [05:12<17:25,  1.40it/s]

Epoch 1, Batch 407/1875


Epoch 1:  22%|██▏       | 407/1875 [05:13<17:26,  1.40it/s]