Imports


In [None]:
import torch
import torchvision
import pandas as pd
import torch.nn as nn
from tqdm import tqdm
import multiprocessing
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import tabulate

Versión torch


In [None]:
print("Torch version: ", torch.__version__)

####################################################################
# Set Device
####################################################################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: ", device)

Torch version:  2.5.1+cu121
Device:  cuda


Preparar datos


In [None]:
####################################################################
# Prepare Data
####################################################################

train_set = torchvision.datasets.MNIST(".data/", train=True, download=True)
# train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)

test_set = torchvision.datasets.MNIST(".data/", train=False, download=True)
# test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True)

print("Train images: ", train_set)
print("Image: ", train_set[0][0])
print("Label: ", train_set[0][1])
print("Label one hot: ", F.one_hot(torch.tensor(train_set[0][1]), num_classes=10))

Train images:  Dataset MNIST
    Number of datapoints: 60000
    Root location: .data/
    Split: Train
Image:  <PIL.Image.Image image mode=L size=28x28 at 0x784D9DEB5930>
Label:  5
Label one hot:  tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])


Dataset class


In [None]:
####################################################################
# Dataset Class
####################################################################
class MNIST_dataset(Dataset):
    def __init__(self, data, partition="train"):
        print("\nLoading MNIST ", partition, " Dataset...")
        self.data = data
        self.partition = partition
        print("\tTotal Len.: ", len(self.data), "\n", 50 * "-")

    def __len__(self):
        return len(self.data)

    def from_pil_to_tensor(self, image):
        return torchvision.transforms.ToTensor()(image)

    def __getitem__(self, idx):
        # Image
        image = self.data[idx][0]
        # PIL Image to torch tensor
        image_tensor = self.from_pil_to_tensor(image)
        # care! net expect a 784 size vector and our dataset
        # provide 1x28x28 (channels, height, width) -> Reshape!
        image_tensor = image_tensor.view(-1)

        # Label
        label = torch.tensor(self.data[idx][1])
        label = F.one_hot(label, num_classes=10).float()

        return {"img": image_tensor, "label": label}


train_set = MNIST_dataset(train_set, partition="train")
test_set = MNIST_dataset(test_set, partition="test")


Loading MNIST  train  Dataset...
	Total Len.:  60000 
 --------------------------------------------------

Loading MNIST  test  Dataset...
	Total Len.:  10000 
 --------------------------------------------------


DataLoader class


In [None]:
####################################################################
# DataLoader Class
####################################################################

batch_size = 100
num_workers = multiprocessing.cpu_count() - 1
print("Num workers", num_workers)
train_dataloader = DataLoader(
    train_set, batch_size, shuffle=True, num_workers=num_workers
)
test_dataloader = DataLoader(
    test_set, batch_size, shuffle=False, num_workers=num_workers
)

Num workers 1


NN Class


In [None]:
####################################################################
# Neural Network Class
####################################################################
# Creating our Neural Network - Fully Connected
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(784, 1024)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(1024, 1024)
        self.relu2 = nn.ReLU()
        self.linear3 = nn.Linear(1024, 1024)
        self.relu3 = nn.ReLU()
        self.classifier = nn.Linear(1024, num_classes)

    def forward(self, x):
        out = self.relu1(self.linear1(x))
        out = self.relu2(self.linear2(out))
        out = self.relu3(self.linear3(out))
        out = self.classifier(out)
        return out


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

Función de entrenamineto


In [None]:
# TRAIN NETWORK
def train_network(optimizer, epoch, criterion, net):
    train_loss, train_correct = 0, 0
    net.train()
    with tqdm(
        iter(train_dataloader), desc="Epoch " + str(epoch), unit="batch"
    ) as tepoch:
        for batch in tepoch:
            # Returned values of Dataset Class
            images = batch["img"].to(device)
            labels = batch["label"].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = net(images)
            loss = criterion(outputs, labels)

            # Calculate gradients
            loss.backward()

            # Update parameters
            optimizer.step()

            # one hot -> labels
            labels = torch.argmax(labels, dim=1)
            pred = torch.argmax(outputs, dim=1)
            train_correct += pred.eq(labels).sum().item()

            # print statistics
            train_loss += loss.item()

    train_loss /= len(train_dataloader.dataset)
    return train_loss, train_correct

Función de test


In [None]:
# TEST NETWORK
def test_network(net, epoch, criterion):
    test_loss, test_correct = 0, 0
    net.eval()
    with torch.no_grad():
        with tqdm(
            iter(test_dataloader), desc="Test " + str(epoch), unit="batch"
        ) as tepoch:
            for batch in tepoch:
                images = batch["img"].to(device)
                labels = batch["label"].to(device)

                # Forward
                outputs = net(images)
                test_loss += criterion(outputs, labels)

                # one hot -> labels
                labels = torch.argmax(labels, dim=1)
                pred = torch.argmax(outputs, dim=1)

                test_correct += pred.eq(labels).sum().item()

        test_loss /= len(test_dataloader.dataset)
        test_accuracy = 100.0 * test_correct / len(test_dataloader.dataset)
    return test_loss, test_correct, test_accuracy

Entrenamiento con validación


In [None]:
# Instantiating the network and printing its architecture
num_classes = 10
net = Net(num_classes)
print(net)
print("Params: ", count_parameters(net))
####################################################################
# Training settings
####################################################################

# Training hyperparameters
epochs = 15
criterion = nn.CrossEntropyLoss()
best_results = dict()
for lr in [0.1, 0.01, 0.001, 0.0001]:
    best_results[lr] = dict()
    for optimizer in [optim.AdamW, optim.SGD, optim.Adadelta, optim.Adam]:
        ####################################################################
        # Training
        ####################################################################

        # Load model in GPU, cada vez que hacemos un entrenamiento con un tipo de optimizador y lr tenemos que crear la red
        net = Net(num_classes)
        net.to(device)
        print("Params: ", count_parameters(net))
        # y el optimizador debe crearse despues de haber creado la red porque le pasamos los parámetros de la red que serán los que se encargue de optimizar
        if optimizer == optim.SGD:
            optimizer = optimizer(
                net.parameters(), lr=lr, weight_decay=1e-6, momentum=0.9
            )
        else:
            optimizer = optimizer(net.parameters(), lr=lr, weight_decay=1e-6)

        best_results[lr][optimizer.__class__.__name__] = {
            "best_accuracy": -1,
            "best_epoch": 0,
        }

        print("\n---- Start Training ----")
        print(f"Optimizer: {optimizer.__class__.__name__} - Learning Rate: {lr}")
        for epoch in range(epochs):
            train_loss, train_correct = train_network(optimizer, epoch, criterion, net)

            test_loss, test_correct, test_accuracy = test_network(net, epoch, criterion)

            print(
                "[Epoch {}] Train Loss: {:.6f} - Test Loss: {:.6f} - Train Accuracy: {:.2f}% - Test Accuracy: {:.2f}%".format(
                    epoch + 1,
                    train_loss,
                    test_loss,
                    100.0 * train_correct / len(train_dataloader.dataset),
                    test_accuracy,
                )
            )

            if (
                test_accuracy
                > best_results[lr][optimizer.__class__.__name__]["best_accuracy"]
            ):
                best_results[lr][optimizer.__class__.__name__][
                    "best_accuracy"
                ] = test_accuracy
                best_results[lr][optimizer.__class__.__name__]["best_epoch"] = epoch

                # Save best weights
                torch.save(net.state_dict(), "best_model.pt")

        print(
            "\nBEST TEST ACCURACY: ",
            best_results[lr][optimizer.__class__.__name__]["best_accuracy"],
            " in epoch ",
            best_results[lr][optimizer.__class__.__name__]["best_epoch"],
        )

####################################################################
# Results
####################################################################
print(best_results)

Net(
  (linear1): Linear(in_features=784, out_features=1024, bias=True)
  (relu1): ReLU()
  (linear2): Linear(in_features=1024, out_features=1024, bias=True)
  (relu2): ReLU()
  (linear3): Linear(in_features=1024, out_features=1024, bias=True)
  (relu3): ReLU()
  (classifier): Linear(in_features=1024, out_features=10, bias=True)
)
Params:  2913290
Params:  2913290

---- Start Training ----
Optimizer: AdamW - Learning Rate: 0.1


Epoch 0: 100%|██████████| 600/600 [00:17<00:00, 34.48batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 34.27batch/s]

[Epoch 1] Train Loss: 0.505471 - Test Loss: 0.023069 - Train Accuracy: 12.88% - Test Accuracy: 9.58%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.45batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.84batch/s]

[Epoch 2] Train Loss: 0.023065 - Test Loss: 0.023089 - Train Accuracy: 10.60% - Test Accuracy: 9.80%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.99batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 47.14batch/s]


[Epoch 3] Train Loss: 0.023068 - Test Loss: 0.023066 - Train Accuracy: 10.32% - Test Accuracy: 11.35%


Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.55batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 38.96batch/s]

[Epoch 4] Train Loss: 0.023087 - Test Loss: 0.023174 - Train Accuracy: 10.31% - Test Accuracy: 11.35%



Epoch 4: 100%|██████████| 600/600 [00:15<00:00, 38.18batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 41.50batch/s]

[Epoch 5] Train Loss: 0.023075 - Test Loss: 0.023093 - Train Accuracy: 10.67% - Test Accuracy: 10.28%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.98batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 46.17batch/s]

[Epoch 6] Train Loss: 0.023078 - Test Loss: 0.023112 - Train Accuracy: 10.42% - Test Accuracy: 9.80%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.25batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 37.96batch/s]

[Epoch 7] Train Loss: 0.023078 - Test Loss: 0.023060 - Train Accuracy: 10.36% - Test Accuracy: 10.28%



Epoch 7: 100%|██████████| 600/600 [00:15<00:00, 39.22batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 46.22batch/s]

[Epoch 8] Train Loss: 0.023087 - Test Loss: 0.023064 - Train Accuracy: 10.34% - Test Accuracy: 10.09%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.11batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.82batch/s]

[Epoch 9] Train Loss: 0.023081 - Test Loss: 0.023039 - Train Accuracy: 10.55% - Test Accuracy: 11.35%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.93batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 40.12batch/s]

[Epoch 10] Train Loss: 0.023072 - Test Loss: 0.023029 - Train Accuracy: 10.71% - Test Accuracy: 11.35%



Epoch 10: 100%|██████████| 600/600 [00:15<00:00, 39.41batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 44.55batch/s]

[Epoch 11] Train Loss: 0.023085 - Test Loss: 0.023113 - Train Accuracy: 10.49% - Test Accuracy: 10.10%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.18batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.34batch/s]

[Epoch 12] Train Loss: 0.023086 - Test Loss: 0.023072 - Train Accuracy: 10.46% - Test Accuracy: 9.80%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.84batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 42.12batch/s]

[Epoch 13] Train Loss: 0.023083 - Test Loss: 0.023184 - Train Accuracy: 10.48% - Test Accuracy: 10.28%



Epoch 13: 100%|██████████| 600/600 [00:15<00:00, 39.46batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 43.93batch/s]

[Epoch 14] Train Loss: 0.023079 - Test Loss: 0.023075 - Train Accuracy: 10.58% - Test Accuracy: 10.28%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.88batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.59batch/s]

[Epoch 15] Train Loss: 0.023085 - Test Loss: 0.023075 - Train Accuracy: 10.40% - Test Accuracy: 11.35%

BEST TEST ACCURACY:  11.35  in epoch  2
Params:  2913290

---- Start Training ----
Optimizer: SGD - Learning Rate: 0.1



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.20batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.62batch/s]

[Epoch 1] Train Loss: 0.003134 - Test Loss: 0.001123 - Train Accuracy: 90.43% - Test Accuracy: 96.61%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 40.01batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 37.95batch/s]

[Epoch 2] Train Loss: 0.000992 - Test Loss: 0.001134 - Train Accuracy: 96.99% - Test Accuracy: 96.54%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.27batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 46.27batch/s]


[Epoch 3] Train Loss: 0.000677 - Test Loss: 0.000727 - Train Accuracy: 97.91% - Test Accuracy: 97.65%


Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.62batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 46.30batch/s]

[Epoch 4] Train Loss: 0.000490 - Test Loss: 0.000645 - Train Accuracy: 98.50% - Test Accuracy: 97.99%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.31batch/s]
Test 4: 100%|██████████| 100/100 [00:03<00:00, 32.60batch/s]

[Epoch 5] Train Loss: 0.000366 - Test Loss: 0.000742 - Train Accuracy: 98.84% - Test Accuracy: 97.83%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 41.45batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 44.73batch/s]

[Epoch 6] Train Loss: 0.000300 - Test Loss: 0.000890 - Train Accuracy: 99.05% - Test Accuracy: 97.80%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.10batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.44batch/s]

[Epoch 7] Train Loss: 0.000223 - Test Loss: 0.000715 - Train Accuracy: 99.31% - Test Accuracy: 98.13%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.63batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 35.46batch/s]

[Epoch 8] Train Loss: 0.000224 - Test Loss: 0.000744 - Train Accuracy: 99.25% - Test Accuracy: 98.06%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.05batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.91batch/s]

[Epoch 9] Train Loss: 0.000208 - Test Loss: 0.000978 - Train Accuracy: 99.33% - Test Accuracy: 97.68%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.58batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.00batch/s]

[Epoch 10] Train Loss: 0.000158 - Test Loss: 0.000687 - Train Accuracy: 99.48% - Test Accuracy: 98.32%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.14batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 41.81batch/s]

[Epoch 11] Train Loss: 0.000102 - Test Loss: 0.000759 - Train Accuracy: 99.63% - Test Accuracy: 98.23%



Epoch 11: 100%|██████████| 600/600 [00:15<00:00, 39.81batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 44.76batch/s]

[Epoch 12] Train Loss: 0.000127 - Test Loss: 0.000820 - Train Accuracy: 99.59% - Test Accuracy: 98.27%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.74batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 46.26batch/s]

[Epoch 13] Train Loss: 0.000065 - Test Loss: 0.000833 - Train Accuracy: 99.79% - Test Accuracy: 98.13%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 42.04batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 44.50batch/s]

[Epoch 14] Train Loss: 0.000092 - Test Loss: 0.000881 - Train Accuracy: 99.72% - Test Accuracy: 98.12%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.52batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 34.65batch/s]


[Epoch 15] Train Loss: 0.000081 - Test Loss: 0.000866 - Train Accuracy: 99.74% - Test Accuracy: 98.06%

BEST TEST ACCURACY:  98.32  in epoch  9
Params:  2913290

---- Start Training ----
Optimizer: Adadelta - Learning Rate: 0.1


Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 40.88batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.85batch/s]

[Epoch 1] Train Loss: 0.007036 - Test Loss: 0.002868 - Train Accuracy: 80.35% - Test Accuracy: 91.67%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 40.81batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 46.11batch/s]

[Epoch 2] Train Loss: 0.002299 - Test Loss: 0.001788 - Train Accuracy: 93.33% - Test Accuracy: 94.75%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.43batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 33.73batch/s]


[Epoch 3] Train Loss: 0.001551 - Test Loss: 0.001324 - Train Accuracy: 95.47% - Test Accuracy: 96.01%


Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 40.52batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 45.35batch/s]

[Epoch 4] Train Loss: 0.001153 - Test Loss: 0.001042 - Train Accuracy: 96.63% - Test Accuracy: 96.90%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 40.81batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.65batch/s]

[Epoch 5] Train Loss: 0.000895 - Test Loss: 0.000930 - Train Accuracy: 97.33% - Test Accuracy: 97.14%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.65batch/s]
Test 5: 100%|██████████| 100/100 [00:03<00:00, 32.57batch/s]

[Epoch 6] Train Loss: 0.000713 - Test Loss: 0.000832 - Train Accuracy: 97.91% - Test Accuracy: 97.41%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 40.98batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 43.72batch/s]

[Epoch 7] Train Loss: 0.000584 - Test Loss: 0.000809 - Train Accuracy: 98.31% - Test Accuracy: 97.33%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.74batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.35batch/s]

[Epoch 8] Train Loss: 0.000482 - Test Loss: 0.000692 - Train Accuracy: 98.59% - Test Accuracy: 97.83%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.86batch/s]
Test 8: 100%|██████████| 100/100 [00:03<00:00, 31.97batch/s]

[Epoch 9] Train Loss: 0.000395 - Test Loss: 0.000674 - Train Accuracy: 98.84% - Test Accuracy: 97.80%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.74batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 46.31batch/s]

[Epoch 10] Train Loss: 0.000326 - Test Loss: 0.000665 - Train Accuracy: 99.04% - Test Accuracy: 98.01%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.38batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 45.55batch/s]

[Epoch 11] Train Loss: 0.000266 - Test Loss: 0.000655 - Train Accuracy: 99.25% - Test Accuracy: 98.03%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.85batch/s]
Test 11: 100%|██████████| 100/100 [00:03<00:00, 32.29batch/s]

[Epoch 12] Train Loss: 0.000220 - Test Loss: 0.000627 - Train Accuracy: 99.42% - Test Accuracy: 98.04%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.93batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.65batch/s]

[Epoch 13] Train Loss: 0.000174 - Test Loss: 0.000646 - Train Accuracy: 99.57% - Test Accuracy: 98.13%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.71batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 45.03batch/s]

[Epoch 14] Train Loss: 0.000141 - Test Loss: 0.000854 - Train Accuracy: 99.69% - Test Accuracy: 97.33%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.79batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 33.49batch/s]


[Epoch 15] Train Loss: 0.000116 - Test Loss: 0.000618 - Train Accuracy: 99.72% - Test Accuracy: 98.18%

BEST TEST ACCURACY:  98.18  in epoch  14
Params:  2913290

---- Start Training ----
Optimizer: Adam - Learning Rate: 0.1


Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.47batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 46.22batch/s]

[Epoch 1] Train Loss: 2.044886 - Test Loss: 0.023056 - Train Accuracy: 10.79% - Test Accuracy: 11.35%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.07batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.23batch/s]

[Epoch 2] Train Loss: 0.023077 - Test Loss: 0.023114 - Train Accuracy: 10.46% - Test Accuracy: 10.28%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.24batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 38.37batch/s]

[Epoch 3] Train Loss: 0.023078 - Test Loss: 0.023125 - Train Accuracy: 10.52% - Test Accuracy: 10.10%



Epoch 3: 100%|██████████| 600/600 [00:15<00:00, 39.71batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 45.99batch/s]

[Epoch 4] Train Loss: 0.023077 - Test Loss: 0.023026 - Train Accuracy: 10.40% - Test Accuracy: 11.35%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.21batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.16batch/s]

[Epoch 5] Train Loss: 0.023086 - Test Loss: 0.023033 - Train Accuracy: 10.41% - Test Accuracy: 11.35%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 41.18batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 41.73batch/s]

[Epoch 6] Train Loss: 0.023083 - Test Loss: 0.023101 - Train Accuracy: 10.48% - Test Accuracy: 10.28%



Epoch 6: 100%|██████████| 600/600 [00:15<00:00, 39.25batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.62batch/s]

[Epoch 7] Train Loss: 0.054354 - Test Loss: 0.023074 - Train Accuracy: 10.69% - Test Accuracy: 11.35%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.15batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.95batch/s]

[Epoch 8] Train Loss: 0.023078 - Test Loss: 0.023040 - Train Accuracy: 10.33% - Test Accuracy: 11.35%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.15batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 44.79batch/s]

[Epoch 9] Train Loss: 0.030731 - Test Loss: 0.023050 - Train Accuracy: 10.40% - Test Accuracy: 10.28%



Epoch 9: 100%|██████████| 600/600 [00:15<00:00, 39.26batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 42.69batch/s]


[Epoch 10] Train Loss: 0.023077 - Test Loss: 0.023091 - Train Accuracy: 10.45% - Test Accuracy: 8.92%


Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.32batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 45.93batch/s]

[Epoch 11] Train Loss: 0.023084 - Test Loss: 0.023106 - Train Accuracy: 10.32% - Test Accuracy: 10.32%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.35batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.38batch/s]

[Epoch 12] Train Loss: 0.023084 - Test Loss: 0.023120 - Train Accuracy: 10.43% - Test Accuracy: 11.35%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.14batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 36.44batch/s]

[Epoch 13] Train Loss: 0.023085 - Test Loss: 0.023063 - Train Accuracy: 10.17% - Test Accuracy: 10.32%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 41.52batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 45.48batch/s]

[Epoch 14] Train Loss: 0.023089 - Test Loss: 0.023083 - Train Accuracy: 10.32% - Test Accuracy: 10.09%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.23batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.64batch/s]

[Epoch 15] Train Loss: 0.023080 - Test Loss: 0.023100 - Train Accuracy: 10.32% - Test Accuracy: 11.35%

BEST TEST ACCURACY:  11.35  in epoch  0
Params:  2913290

---- Start Training ----
Optimizer: AdamW - Learning Rate: 0.01



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.46batch/s]
Test 0: 100%|██████████| 100/100 [00:03<00:00, 32.50batch/s]

[Epoch 1] Train Loss: 0.003599 - Test Loss: 0.002660 - Train Accuracy: 90.15% - Test Accuracy: 93.15%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.22batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.67batch/s]

[Epoch 2] Train Loss: 0.002109 - Test Loss: 0.002330 - Train Accuracy: 94.43% - Test Accuracy: 93.33%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.83batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 46.13batch/s]

[Epoch 3] Train Loss: 0.001809 - Test Loss: 0.001778 - Train Accuracy: 95.14% - Test Accuracy: 95.21%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.18batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 37.09batch/s]


[Epoch 4] Train Loss: 0.001607 - Test Loss: 0.001640 - Train Accuracy: 95.64% - Test Accuracy: 95.61%


Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 40.11batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.68batch/s]

[Epoch 5] Train Loss: 0.001563 - Test Loss: 0.002055 - Train Accuracy: 95.87% - Test Accuracy: 94.80%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.95batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.03batch/s]

[Epoch 6] Train Loss: 0.001403 - Test Loss: 0.001602 - Train Accuracy: 96.17% - Test Accuracy: 95.88%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.55batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 40.07batch/s]

[Epoch 7] Train Loss: 0.001338 - Test Loss: 0.001617 - Train Accuracy: 96.42% - Test Accuracy: 95.96%



Epoch 7: 100%|██████████| 600/600 [00:15<00:00, 39.51batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 46.40batch/s]

[Epoch 8] Train Loss: 0.001260 - Test Loss: 0.001697 - Train Accuracy: 96.61% - Test Accuracy: 96.21%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.35batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.83batch/s]

[Epoch 9] Train Loss: 0.001243 - Test Loss: 0.001617 - Train Accuracy: 96.70% - Test Accuracy: 96.11%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.04batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.07batch/s]

[Epoch 10] Train Loss: 0.001202 - Test Loss: 0.002186 - Train Accuracy: 96.83% - Test Accuracy: 95.37%



Epoch 10: 100%|██████████| 600/600 [00:15<00:00, 39.47batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 44.20batch/s]


[Epoch 11] Train Loss: 0.001086 - Test Loss: 0.002171 - Train Accuracy: 97.17% - Test Accuracy: 94.77%


Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.22batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 42.42batch/s]

[Epoch 12] Train Loss: 0.001034 - Test Loss: 0.001771 - Train Accuracy: 97.12% - Test Accuracy: 95.93%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.26batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 43.81batch/s]


[Epoch 13] Train Loss: 0.001034 - Test Loss: 0.001713 - Train Accuracy: 97.26% - Test Accuracy: 96.66%


Epoch 13: 100%|██████████| 600/600 [00:15<00:00, 39.17batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 43.95batch/s]

[Epoch 14] Train Loss: 0.001003 - Test Loss: 0.001886 - Train Accuracy: 97.28% - Test Accuracy: 96.44%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.13batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.91batch/s]

[Epoch 15] Train Loss: 0.001085 - Test Loss: 0.001967 - Train Accuracy: 97.23% - Test Accuracy: 96.07%

BEST TEST ACCURACY:  96.66  in epoch  12
Params:  2913290

---- Start Training ----
Optimizer: SGD - Learning Rate: 0.01



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.61batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 44.66batch/s]

[Epoch 1] Train Loss: 0.007348 - Test Loss: 0.002433 - Train Accuracy: 79.29% - Test Accuracy: 93.09%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 40.43batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 37.41batch/s]

[Epoch 2] Train Loss: 0.002001 - Test Loss: 0.001502 - Train Accuracy: 94.17% - Test Accuracy: 95.44%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.63batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 46.55batch/s]


[Epoch 3] Train Loss: 0.001285 - Test Loss: 0.001107 - Train Accuracy: 96.23% - Test Accuracy: 96.55%


Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.80batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 45.89batch/s]

[Epoch 4] Train Loss: 0.000918 - Test Loss: 0.000933 - Train Accuracy: 97.29% - Test Accuracy: 97.09%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.51batch/s]
Test 4: 100%|██████████| 100/100 [00:03<00:00, 32.37batch/s]

[Epoch 5] Train Loss: 0.000713 - Test Loss: 0.000837 - Train Accuracy: 97.91% - Test Accuracy: 97.37%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 41.40batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 46.21batch/s]

[Epoch 6] Train Loss: 0.000558 - Test Loss: 0.000766 - Train Accuracy: 98.32% - Test Accuracy: 97.56%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.59batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.68batch/s]

[Epoch 7] Train Loss: 0.000440 - Test Loss: 0.000703 - Train Accuracy: 98.69% - Test Accuracy: 97.84%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.39batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 35.00batch/s]

[Epoch 8] Train Loss: 0.000336 - Test Loss: 0.000694 - Train Accuracy: 99.06% - Test Accuracy: 97.81%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.10batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.63batch/s]

[Epoch 9] Train Loss: 0.000272 - Test Loss: 0.000646 - Train Accuracy: 99.20% - Test Accuracy: 97.98%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.65batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.67batch/s]

[Epoch 10] Train Loss: 0.000206 - Test Loss: 0.000641 - Train Accuracy: 99.43% - Test Accuracy: 98.11%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.47batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 39.96batch/s]

[Epoch 11] Train Loss: 0.000154 - Test Loss: 0.000667 - Train Accuracy: 99.63% - Test Accuracy: 97.90%



Epoch 11: 100%|██████████| 600/600 [00:15<00:00, 39.77batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.13batch/s]

[Epoch 12] Train Loss: 0.000128 - Test Loss: 0.000670 - Train Accuracy: 99.67% - Test Accuracy: 97.98%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.59batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 46.29batch/s]

[Epoch 13] Train Loss: 0.000097 - Test Loss: 0.000661 - Train Accuracy: 99.78% - Test Accuracy: 98.13%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 41.81batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 46.16batch/s]

[Epoch 14] Train Loss: 0.000071 - Test Loss: 0.000636 - Train Accuracy: 99.86% - Test Accuracy: 98.16%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.67batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 36.59batch/s]

[Epoch 15] Train Loss: 0.000049 - Test Loss: 0.000675 - Train Accuracy: 99.92% - Test Accuracy: 98.08%

BEST TEST ACCURACY:  98.16  in epoch  13
Params:  2913290

---- Start Training ----
Optimizer: Adadelta - Learning Rate: 0.01



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.20batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.33batch/s]

[Epoch 1] Train Loss: 0.022337 - Test Loss: 0.020675 - Train Accuracy: 49.67% - Test Accuracy: 64.22%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.18batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.50batch/s]

[Epoch 2] Train Loss: 0.013987 - Test Loss: 0.007960 - Train Accuracy: 72.17% - Test Accuracy: 79.71%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.06batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 33.74batch/s]

[Epoch 3] Train Loss: 0.006353 - Test Loss: 0.005029 - Train Accuracy: 82.45% - Test Accuracy: 85.81%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 40.73batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 45.23batch/s]

[Epoch 4] Train Loss: 0.004696 - Test Loss: 0.004085 - Train Accuracy: 86.79% - Test Accuracy: 88.40%



Epoch 4: 100%|██████████| 600/600 [00:15<00:00, 39.91batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.80batch/s]

[Epoch 5] Train Loss: 0.004011 - Test Loss: 0.003649 - Train Accuracy: 88.64% - Test Accuracy: 89.48%



Epoch 5: 100%|██████████| 600/600 [00:15<00:00, 39.85batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 34.11batch/s]

[Epoch 6] Train Loss: 0.003622 - Test Loss: 0.003345 - Train Accuracy: 89.71% - Test Accuracy: 90.39%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 40.27batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 44.63batch/s]

[Epoch 7] Train Loss: 0.003361 - Test Loss: 0.003132 - Train Accuracy: 90.47% - Test Accuracy: 91.07%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.16batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 43.98batch/s]

[Epoch 8] Train Loss: 0.003157 - Test Loss: 0.002946 - Train Accuracy: 91.03% - Test Accuracy: 91.44%



Epoch 8: 100%|██████████| 600/600 [00:15<00:00, 39.49batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 35.45batch/s]

[Epoch 9] Train Loss: 0.002985 - Test Loss: 0.002784 - Train Accuracy: 91.50% - Test Accuracy: 91.96%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.87batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 44.65batch/s]

[Epoch 10] Train Loss: 0.002831 - Test Loss: 0.002665 - Train Accuracy: 91.95% - Test Accuracy: 92.25%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.09batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 46.43batch/s]

[Epoch 11] Train Loss: 0.002688 - Test Loss: 0.002527 - Train Accuracy: 92.33% - Test Accuracy: 92.79%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.41batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 33.84batch/s]


[Epoch 12] Train Loss: 0.002555 - Test Loss: 0.002416 - Train Accuracy: 92.68% - Test Accuracy: 93.08%


Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.73batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.72batch/s]

[Epoch 13] Train Loss: 0.002431 - Test Loss: 0.002306 - Train Accuracy: 93.03% - Test Accuracy: 93.45%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.59batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 43.94batch/s]


[Epoch 14] Train Loss: 0.002316 - Test Loss: 0.002215 - Train Accuracy: 93.40% - Test Accuracy: 93.68%


Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.53batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 33.35batch/s]


[Epoch 15] Train Loss: 0.002206 - Test Loss: 0.002110 - Train Accuracy: 93.72% - Test Accuracy: 93.90%

BEST TEST ACCURACY:  93.9  in epoch  14
Params:  2913290

---- Start Training ----
Optimizer: Adam - Learning Rate: 0.01


Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.06batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 44.63batch/s]

[Epoch 1] Train Loss: 0.003694 - Test Loss: 0.002442 - Train Accuracy: 89.56% - Test Accuracy: 92.86%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.09batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.14batch/s]

[Epoch 2] Train Loss: 0.002074 - Test Loss: 0.002093 - Train Accuracy: 94.40% - Test Accuracy: 94.47%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.86batch/s]
Test 2: 100%|██████████| 100/100 [00:03<00:00, 32.21batch/s]

[Epoch 3] Train Loss: 0.001705 - Test Loss: 0.001839 - Train Accuracy: 95.22% - Test Accuracy: 95.20%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 40.64batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 46.26batch/s]

[Epoch 4] Train Loss: 0.001586 - Test Loss: 0.001731 - Train Accuracy: 95.64% - Test Accuracy: 95.40%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 40.93batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.32batch/s]

[Epoch 5] Train Loss: 0.001474 - Test Loss: 0.001817 - Train Accuracy: 95.89% - Test Accuracy: 95.21%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.33batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 33.85batch/s]

[Epoch 6] Train Loss: 0.001309 - Test Loss: 0.002117 - Train Accuracy: 96.43% - Test Accuracy: 94.41%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 40.95batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 44.33batch/s]

[Epoch 7] Train Loss: 0.001303 - Test Loss: 0.001973 - Train Accuracy: 96.40% - Test Accuracy: 95.00%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.84batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.30batch/s]

[Epoch 8] Train Loss: 0.001221 - Test Loss: 0.001889 - Train Accuracy: 96.66% - Test Accuracy: 96.00%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.50batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 34.42batch/s]

[Epoch 9] Train Loss: 0.001173 - Test Loss: 0.001584 - Train Accuracy: 96.87% - Test Accuracy: 96.16%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.70batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 46.14batch/s]

[Epoch 10] Train Loss: 0.001242 - Test Loss: 0.001908 - Train Accuracy: 96.72% - Test Accuracy: 95.71%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.88batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 43.83batch/s]

[Epoch 11] Train Loss: 0.001206 - Test Loss: 0.001684 - Train Accuracy: 96.85% - Test Accuracy: 96.13%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.69batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 35.86batch/s]

[Epoch 12] Train Loss: 0.001077 - Test Loss: 0.001617 - Train Accuracy: 97.06% - Test Accuracy: 96.30%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.26batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.46batch/s]

[Epoch 13] Train Loss: 0.001027 - Test Loss: 0.001736 - Train Accuracy: 97.22% - Test Accuracy: 96.33%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.96batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 44.71batch/s]

[Epoch 14] Train Loss: 0.001007 - Test Loss: 0.003471 - Train Accuracy: 97.28% - Test Accuracy: 94.11%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.94batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 36.38batch/s]


[Epoch 15] Train Loss: 0.001031 - Test Loss: 0.001712 - Train Accuracy: 97.30% - Test Accuracy: 96.39%

BEST TEST ACCURACY:  96.39  in epoch  14
Params:  2913290

---- Start Training ----
Optimizer: AdamW - Learning Rate: 0.001


Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 40.77batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.06batch/s]

[Epoch 1] Train Loss: 0.002191 - Test Loss: 0.001488 - Train Accuracy: 93.35% - Test Accuracy: 95.55%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 40.96batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 46.21batch/s]

[Epoch 2] Train Loss: 0.000899 - Test Loss: 0.000814 - Train Accuracy: 97.24% - Test Accuracy: 97.45%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.04batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 38.92batch/s]


[Epoch 3] Train Loss: 0.000651 - Test Loss: 0.000801 - Train Accuracy: 98.04% - Test Accuracy: 97.78%


Epoch 3: 100%|██████████| 600/600 [00:15<00:00, 39.53batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 46.47batch/s]

[Epoch 4] Train Loss: 0.000447 - Test Loss: 0.000725 - Train Accuracy: 98.62% - Test Accuracy: 98.00%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 40.83batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.47batch/s]

[Epoch 5] Train Loss: 0.000414 - Test Loss: 0.000904 - Train Accuracy: 98.77% - Test Accuracy: 97.98%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 41.02batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 41.15batch/s]

[Epoch 6] Train Loss: 0.000304 - Test Loss: 0.000922 - Train Accuracy: 99.06% - Test Accuracy: 97.70%



Epoch 6: 100%|██████████| 600/600 [00:15<00:00, 39.14batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 44.63batch/s]

[Epoch 7] Train Loss: 0.000272 - Test Loss: 0.000940 - Train Accuracy: 99.18% - Test Accuracy: 97.88%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.19batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.59batch/s]

[Epoch 8] Train Loss: 0.000257 - Test Loss: 0.000928 - Train Accuracy: 99.21% - Test Accuracy: 98.08%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.95batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.37batch/s]

[Epoch 9] Train Loss: 0.000222 - Test Loss: 0.000776 - Train Accuracy: 99.39% - Test Accuracy: 98.11%



Epoch 9: 100%|██████████| 600/600 [00:15<00:00, 39.13batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 43.01batch/s]

[Epoch 10] Train Loss: 0.000184 - Test Loss: 0.000780 - Train Accuracy: 99.48% - Test Accuracy: 98.30%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.80batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 44.66batch/s]

[Epoch 11] Train Loss: 0.000187 - Test Loss: 0.000872 - Train Accuracy: 99.45% - Test Accuracy: 98.13%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.69batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 46.82batch/s]

[Epoch 12] Train Loss: 0.000158 - Test Loss: 0.000754 - Train Accuracy: 99.53% - Test Accuracy: 98.33%



Epoch 12: 100%|██████████| 600/600 [00:15<00:00, 39.10batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 40.94batch/s]

[Epoch 13] Train Loss: 0.000171 - Test Loss: 0.000815 - Train Accuracy: 99.53% - Test Accuracy: 98.32%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.62batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 43.77batch/s]

[Epoch 14] Train Loss: 0.000126 - Test Loss: 0.000913 - Train Accuracy: 99.63% - Test Accuracy: 98.06%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.86batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.19batch/s]


[Epoch 15] Train Loss: 0.000146 - Test Loss: 0.000916 - Train Accuracy: 99.56% - Test Accuracy: 98.25%

BEST TEST ACCURACY:  98.33  in epoch  11
Params:  2913290

---- Start Training ----
Optimizer: SGD - Learning Rate: 0.001


Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 40.18batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 37.58batch/s]

[Epoch 1] Train Loss: 0.022171 - Test Loss: 0.019850 - Train Accuracy: 46.87% - Test Accuracy: 65.58%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.59batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.22batch/s]

[Epoch 2] Train Loss: 0.011997 - Test Loss: 0.006462 - Train Accuracy: 74.28% - Test Accuracy: 82.48%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.69batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 44.04batch/s]

[Epoch 3] Train Loss: 0.005317 - Test Loss: 0.004273 - Train Accuracy: 85.23% - Test Accuracy: 87.78%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 40.93batch/s]
Test 3: 100%|██████████| 100/100 [00:03<00:00, 32.60batch/s]

[Epoch 4] Train Loss: 0.004058 - Test Loss: 0.003577 - Train Accuracy: 88.59% - Test Accuracy: 89.68%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.45batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.31batch/s]

[Epoch 5] Train Loss: 0.003538 - Test Loss: 0.003263 - Train Accuracy: 89.94% - Test Accuracy: 90.51%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.90batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 46.18batch/s]


[Epoch 6] Train Loss: 0.003225 - Test Loss: 0.002965 - Train Accuracy: 90.75% - Test Accuracy: 91.29%


Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.37batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 34.78batch/s]

[Epoch 7] Train Loss: 0.002976 - Test Loss: 0.002761 - Train Accuracy: 91.45% - Test Accuracy: 91.91%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.87batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 44.35batch/s]


[Epoch 8] Train Loss: 0.002768 - Test Loss: 0.002593 - Train Accuracy: 92.08% - Test Accuracy: 92.49%


Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.33batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.89batch/s]

[Epoch 9] Train Loss: 0.002579 - Test Loss: 0.002414 - Train Accuracy: 92.64% - Test Accuracy: 93.18%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.61batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 37.25batch/s]

[Epoch 10] Train Loss: 0.002409 - Test Loss: 0.002276 - Train Accuracy: 93.09% - Test Accuracy: 93.55%



Epoch 10: 100%|██████████| 600/600 [00:15<00:00, 39.99batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 44.55batch/s]

[Epoch 11] Train Loss: 0.002254 - Test Loss: 0.002191 - Train Accuracy: 93.59% - Test Accuracy: 93.72%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.55batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.90batch/s]

[Epoch 12] Train Loss: 0.002111 - Test Loss: 0.002030 - Train Accuracy: 94.00% - Test Accuracy: 94.08%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.30batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 43.15batch/s]

[Epoch 13] Train Loss: 0.001979 - Test Loss: 0.001920 - Train Accuracy: 94.39% - Test Accuracy: 94.64%



Epoch 13: 100%|██████████| 600/600 [00:15<00:00, 39.98batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 41.92batch/s]

[Epoch 14] Train Loss: 0.001864 - Test Loss: 0.001816 - Train Accuracy: 94.67% - Test Accuracy: 94.78%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.32batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.68batch/s]

[Epoch 15] Train Loss: 0.001755 - Test Loss: 0.001706 - Train Accuracy: 94.99% - Test Accuracy: 94.91%

BEST TEST ACCURACY:  94.91  in epoch  14
Params:  2913290

---- Start Training ----
Optimizer: Adadelta - Learning Rate: 0.001



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 40.64batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.75batch/s]

[Epoch 1] Train Loss: 0.022997 - Test Loss: 0.022953 - Train Accuracy: 10.09% - Test Accuracy: 10.72%



Epoch 1: 100%|██████████| 600/600 [00:15<00:00, 38.78batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 42.68batch/s]

[Epoch 2] Train Loss: 0.022914 - Test Loss: 0.022865 - Train Accuracy: 14.97% - Test Accuracy: 21.49%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.12batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 45.02batch/s]

[Epoch 3] Train Loss: 0.022824 - Test Loss: 0.022766 - Train Accuracy: 27.12% - Test Accuracy: 34.48%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 40.41batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 41.76batch/s]

[Epoch 4] Train Loss: 0.022719 - Test Loss: 0.022648 - Train Accuracy: 37.43% - Test Accuracy: 42.02%



Epoch 4: 100%|██████████| 600/600 [00:15<00:00, 38.72batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 46.09batch/s]

[Epoch 5] Train Loss: 0.022590 - Test Loss: 0.022501 - Train Accuracy: 44.47% - Test Accuracy: 47.18%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.73batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.04batch/s]


[Epoch 6] Train Loss: 0.022426 - Test Loss: 0.022309 - Train Accuracy: 49.64% - Test Accuracy: 51.51%


Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 40.32batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 41.99batch/s]

[Epoch 7] Train Loss: 0.022209 - Test Loss: 0.022053 - Train Accuracy: 53.59% - Test Accuracy: 54.79%



Epoch 7: 100%|██████████| 600/600 [00:15<00:00, 38.37batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.69batch/s]

[Epoch 8] Train Loss: 0.021916 - Test Loss: 0.021703 - Train Accuracy: 56.40% - Test Accuracy: 57.56%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.89batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.41batch/s]

[Epoch 9] Train Loss: 0.021510 - Test Loss: 0.021214 - Train Accuracy: 59.15% - Test Accuracy: 60.24%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.60batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 40.86batch/s]

[Epoch 10] Train Loss: 0.020938 - Test Loss: 0.020523 - Train Accuracy: 61.98% - Test Accuracy: 62.44%



Epoch 10: 100%|██████████| 600/600 [00:15<00:00, 38.35batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 44.57batch/s]

[Epoch 11] Train Loss: 0.020133 - Test Loss: 0.019553 - Train Accuracy: 64.57% - Test Accuracy: 64.79%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.53batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.52batch/s]

[Epoch 12] Train Loss: 0.019016 - Test Loss: 0.018233 - Train Accuracy: 66.46% - Test Accuracy: 67.61%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.98batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 41.49batch/s]

[Epoch 13] Train Loss: 0.017551 - Test Loss: 0.016580 - Train Accuracy: 68.11% - Test Accuracy: 68.96%



Epoch 13: 100%|██████████| 600/600 [00:15<00:00, 38.60batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 44.72batch/s]

[Epoch 14] Train Loss: 0.015815 - Test Loss: 0.014741 - Train Accuracy: 69.09% - Test Accuracy: 69.91%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.48batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 44.94batch/s]


[Epoch 15] Train Loss: 0.014010 - Test Loss: 0.012967 - Train Accuracy: 70.14% - Test Accuracy: 71.96%

BEST TEST ACCURACY:  71.96  in epoch  14
Params:  2913290

---- Start Training ----
Optimizer: Adam - Learning Rate: 0.001


Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 40.99batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 40.84batch/s]

[Epoch 1] Train Loss: 0.002238 - Test Loss: 0.001202 - Train Accuracy: 93.09% - Test Accuracy: 96.44%



Epoch 1: 100%|██████████| 600/600 [00:15<00:00, 38.74batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 44.71batch/s]

[Epoch 2] Train Loss: 0.000856 - Test Loss: 0.001056 - Train Accuracy: 97.33% - Test Accuracy: 97.21%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.38batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 45.92batch/s]

[Epoch 3] Train Loss: 0.000612 - Test Loss: 0.001030 - Train Accuracy: 98.12% - Test Accuracy: 97.11%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 40.96batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 44.41batch/s]

[Epoch 4] Train Loss: 0.000499 - Test Loss: 0.000634 - Train Accuracy: 98.47% - Test Accuracy: 98.09%



Epoch 4: 100%|██████████| 600/600 [00:15<00:00, 39.19batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 40.61batch/s]

[Epoch 5] Train Loss: 0.000356 - Test Loss: 0.000747 - Train Accuracy: 98.87% - Test Accuracy: 97.90%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.75batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.75batch/s]

[Epoch 6] Train Loss: 0.000345 - Test Loss: 0.000840 - Train Accuracy: 98.94% - Test Accuracy: 97.75%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.12batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 46.01batch/s]

[Epoch 7] Train Loss: 0.000287 - Test Loss: 0.000888 - Train Accuracy: 99.12% - Test Accuracy: 97.71%



Epoch 7: 100%|██████████| 600/600 [00:15<00:00, 39.73batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 37.21batch/s]


[Epoch 8] Train Loss: 0.000241 - Test Loss: 0.000784 - Train Accuracy: 99.27% - Test Accuracy: 98.18%


Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.80batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.09batch/s]

[Epoch 9] Train Loss: 0.000217 - Test Loss: 0.000792 - Train Accuracy: 99.30% - Test Accuracy: 98.19%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.88batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 46.09batch/s]

[Epoch 10] Train Loss: 0.000193 - Test Loss: 0.000916 - Train Accuracy: 99.42% - Test Accuracy: 97.91%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.28batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 36.21batch/s]

[Epoch 11] Train Loss: 0.000195 - Test Loss: 0.000729 - Train Accuracy: 99.41% - Test Accuracy: 98.27%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.78batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.34batch/s]

[Epoch 12] Train Loss: 0.000151 - Test Loss: 0.000925 - Train Accuracy: 99.55% - Test Accuracy: 97.94%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.90batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.84batch/s]

[Epoch 13] Train Loss: 0.000146 - Test Loss: 0.000839 - Train Accuracy: 99.57% - Test Accuracy: 98.20%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.28batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 33.64batch/s]

[Epoch 14] Train Loss: 0.000137 - Test Loss: 0.000982 - Train Accuracy: 99.60% - Test Accuracy: 98.03%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.14batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.11batch/s]

[Epoch 15] Train Loss: 0.000155 - Test Loss: 0.000707 - Train Accuracy: 99.56% - Test Accuracy: 98.36%

BEST TEST ACCURACY:  98.36  in epoch  14
Params:  2913290

---- Start Training ----
Optimizer: AdamW - Learning Rate: 0.0001



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.08batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.44batch/s]

[Epoch 1] Train Loss: 0.004339 - Test Loss: 0.002000 - Train Accuracy: 88.42% - Test Accuracy: 94.08%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 40.86batch/s]
Test 1: 100%|██████████| 100/100 [00:03<00:00, 31.13batch/s]

[Epoch 2] Train Loss: 0.001602 - Test Loss: 0.001245 - Train Accuracy: 95.35% - Test Accuracy: 96.21%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.85batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 46.03batch/s]

[Epoch 3] Train Loss: 0.001070 - Test Loss: 0.001063 - Train Accuracy: 96.81% - Test Accuracy: 96.74%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.06batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 44.63batch/s]

[Epoch 4] Train Loss: 0.000785 - Test Loss: 0.000897 - Train Accuracy: 97.66% - Test Accuracy: 97.22%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 40.87batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 34.45batch/s]


[Epoch 5] Train Loss: 0.000585 - Test Loss: 0.000737 - Train Accuracy: 98.25% - Test Accuracy: 97.72%


Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.69batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 46.17batch/s]

[Epoch 6] Train Loss: 0.000456 - Test Loss: 0.000664 - Train Accuracy: 98.61% - Test Accuracy: 97.86%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.03batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 44.97batch/s]


[Epoch 7] Train Loss: 0.000335 - Test Loss: 0.000634 - Train Accuracy: 99.01% - Test Accuracy: 98.02%


Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.37batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 35.22batch/s]

[Epoch 8] Train Loss: 0.000251 - Test Loss: 0.000650 - Train Accuracy: 99.24% - Test Accuracy: 98.11%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.31batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 43.97batch/s]

[Epoch 9] Train Loss: 0.000191 - Test Loss: 0.000638 - Train Accuracy: 99.45% - Test Accuracy: 98.12%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.63batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.57batch/s]

[Epoch 10] Train Loss: 0.000146 - Test Loss: 0.000725 - Train Accuracy: 99.57% - Test Accuracy: 97.94%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.86batch/s]
Test 10: 100%|██████████| 100/100 [00:03<00:00, 32.79batch/s]

[Epoch 11] Train Loss: 0.000110 - Test Loss: 0.000727 - Train Accuracy: 99.70% - Test Accuracy: 97.99%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.58batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 43.56batch/s]

[Epoch 12] Train Loss: 0.000106 - Test Loss: 0.000757 - Train Accuracy: 99.66% - Test Accuracy: 97.99%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.92batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.15batch/s]

[Epoch 13] Train Loss: 0.000081 - Test Loss: 0.000841 - Train Accuracy: 99.75% - Test Accuracy: 97.90%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.83batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 35.36batch/s]

[Epoch 14] Train Loss: 0.000055 - Test Loss: 0.000827 - Train Accuracy: 99.84% - Test Accuracy: 98.10%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.12batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.89batch/s]

[Epoch 15] Train Loss: 0.000036 - Test Loss: 0.000816 - Train Accuracy: 99.90% - Test Accuracy: 98.24%

BEST TEST ACCURACY:  98.24  in epoch  14
Params:  2913290

---- Start Training ----
Optimizer: SGD - Learning Rate: 0.0001



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.30batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.03batch/s]

[Epoch 1] Train Loss: 0.022995 - Test Loss: 0.022951 - Train Accuracy: 10.23% - Test Accuracy: 10.27%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 40.60batch/s]
Test 1: 100%|██████████| 100/100 [00:03<00:00, 32.10batch/s]


[Epoch 2] Train Loss: 0.022911 - Test Loss: 0.022862 - Train Accuracy: 12.32% - Test Accuracy: 16.16%


Epoch 2: 100%|██████████| 600/600 [00:15<00:00, 39.12batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 43.04batch/s]

[Epoch 3] Train Loss: 0.022818 - Test Loss: 0.022759 - Train Accuracy: 24.84% - Test Accuracy: 34.63%



Epoch 3: 100%|██████████| 600/600 [00:15<00:00, 39.90batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 43.00batch/s]

[Epoch 4] Train Loss: 0.022707 - Test Loss: 0.022635 - Train Accuracy: 39.49% - Test Accuracy: 45.86%



Epoch 4: 100%|██████████| 600/600 [00:15<00:00, 39.26batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 34.33batch/s]

[Epoch 5] Train Loss: 0.022569 - Test Loss: 0.022476 - Train Accuracy: 48.42% - Test Accuracy: 51.86%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.76batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 43.47batch/s]

[Epoch 6] Train Loss: 0.022390 - Test Loss: 0.022267 - Train Accuracy: 52.61% - Test Accuracy: 53.38%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.04batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.18batch/s]

[Epoch 7] Train Loss: 0.022149 - Test Loss: 0.021982 - Train Accuracy: 52.42% - Test Accuracy: 53.75%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.71batch/s]
Test 7: 100%|██████████| 100/100 [00:03<00:00, 32.52batch/s]

[Epoch 8] Train Loss: 0.021815 - Test Loss: 0.021580 - Train Accuracy: 52.79% - Test Accuracy: 54.29%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.81batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 44.39batch/s]

[Epoch 9] Train Loss: 0.021340 - Test Loss: 0.021008 - Train Accuracy: 53.59% - Test Accuracy: 55.93%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.09batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.28batch/s]

[Epoch 10] Train Loss: 0.020660 - Test Loss: 0.020185 - Train Accuracy: 56.45% - Test Accuracy: 58.69%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.33batch/s]
Test 10: 100%|██████████| 100/100 [00:03<00:00, 32.02batch/s]

[Epoch 11] Train Loss: 0.019690 - Test Loss: 0.019021 - Train Accuracy: 59.99% - Test Accuracy: 62.34%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.58batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 44.79batch/s]

[Epoch 12] Train Loss: 0.018345 - Test Loss: 0.017432 - Train Accuracy: 63.18% - Test Accuracy: 65.00%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.93batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.07batch/s]

[Epoch 13] Train Loss: 0.016575 - Test Loss: 0.015435 - Train Accuracy: 65.39% - Test Accuracy: 67.38%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.51batch/s]
Test 13: 100%|██████████| 100/100 [00:03<00:00, 31.99batch/s]


[Epoch 14] Train Loss: 0.014502 - Test Loss: 0.013288 - Train Accuracy: 68.55% - Test Accuracy: 70.50%


Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.55batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 42.91batch/s]


[Epoch 15] Train Loss: 0.012471 - Test Loss: 0.011381 - Train Accuracy: 72.05% - Test Accuracy: 74.65%

BEST TEST ACCURACY:  74.65  in epoch  14
Params:  2913290

---- Start Training ----
Optimizer: Adadelta - Learning Rate: 0.0001


Epoch 0: 100%|██████████| 600/600 [00:15<00:00, 39.90batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 44.05batch/s]

[Epoch 1] Train Loss: 0.023003 - Test Loss: 0.022999 - Train Accuracy: 11.14% - Test Accuracy: 11.29%



Epoch 1: 100%|██████████| 600/600 [00:15<00:00, 38.58batch/s]
Test 1: 100%|██████████| 100/100 [00:03<00:00, 32.72batch/s]

[Epoch 2] Train Loss: 0.022995 - Test Loss: 0.022991 - Train Accuracy: 12.09% - Test Accuracy: 12.00%



Epoch 2: 100%|██████████| 600/600 [00:15<00:00, 38.82batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 43.58batch/s]


[Epoch 3] Train Loss: 0.022986 - Test Loss: 0.022982 - Train Accuracy: 13.08% - Test Accuracy: 13.13%


Epoch 3: 100%|██████████| 600/600 [00:15<00:00, 39.43batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 43.77batch/s]


[Epoch 4] Train Loss: 0.022978 - Test Loss: 0.022974 - Train Accuracy: 14.00% - Test Accuracy: 13.94%


Epoch 4: 100%|██████████| 600/600 [00:16<00:00, 36.43batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 41.33batch/s]

[Epoch 5] Train Loss: 0.022970 - Test Loss: 0.022965 - Train Accuracy: 14.78% - Test Accuracy: 14.70%



Epoch 5: 100%|██████████| 600/600 [00:15<00:00, 38.61batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 42.68batch/s]


[Epoch 6] Train Loss: 0.022961 - Test Loss: 0.022957 - Train Accuracy: 15.51% - Test Accuracy: 15.38%


Epoch 6: 100%|██████████| 600/600 [00:15<00:00, 39.03batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 34.59batch/s]


[Epoch 7] Train Loss: 0.022953 - Test Loss: 0.022948 - Train Accuracy: 16.07% - Test Accuracy: 15.94%


Epoch 7: 100%|██████████| 600/600 [00:15<00:00, 37.79batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 42.58batch/s]


[Epoch 8] Train Loss: 0.022945 - Test Loss: 0.022939 - Train Accuracy: 16.57% - Test Accuracy: 16.37%


Epoch 8: 100%|██████████| 600/600 [00:15<00:00, 38.99batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 43.21batch/s]

[Epoch 9] Train Loss: 0.022936 - Test Loss: 0.022930 - Train Accuracy: 16.92% - Test Accuracy: 16.77%



Epoch 9: 100%|██████████| 600/600 [00:15<00:00, 38.37batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 33.40batch/s]


[Epoch 10] Train Loss: 0.022928 - Test Loss: 0.022922 - Train Accuracy: 17.25% - Test Accuracy: 17.16%


Epoch 10: 100%|██████████| 600/600 [00:15<00:00, 39.07batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 43.88batch/s]

[Epoch 11] Train Loss: 0.022919 - Test Loss: 0.022913 - Train Accuracy: 17.53% - Test Accuracy: 17.48%



Epoch 11: 100%|██████████| 600/600 [00:15<00:00, 39.23batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 43.22batch/s]


[Epoch 12] Train Loss: 0.022910 - Test Loss: 0.022904 - Train Accuracy: 17.84% - Test Accuracy: 17.89%


Epoch 12: 100%|██████████| 600/600 [00:16<00:00, 36.32batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 42.87batch/s]

[Epoch 13] Train Loss: 0.022901 - Test Loss: 0.022895 - Train Accuracy: 18.26% - Test Accuracy: 18.26%



Epoch 13: 100%|██████████| 600/600 [00:15<00:00, 38.38batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 43.44batch/s]

[Epoch 14] Train Loss: 0.022892 - Test Loss: 0.022885 - Train Accuracy: 18.68% - Test Accuracy: 18.70%



Epoch 14: 100%|██████████| 600/600 [00:15<00:00, 39.10batch/s]
Test 14: 100%|██████████| 100/100 [00:03<00:00, 32.27batch/s]


[Epoch 15] Train Loss: 0.022883 - Test Loss: 0.022876 - Train Accuracy: 19.19% - Test Accuracy: 19.18%

BEST TEST ACCURACY:  19.18  in epoch  14
Params:  2913290

---- Start Training ----
Optimizer: Adam - Learning Rate: 0.0001


Epoch 0: 100%|██████████| 600/600 [00:15<00:00, 38.70batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 41.78batch/s]

[Epoch 1] Train Loss: 0.004329 - Test Loss: 0.001849 - Train Accuracy: 88.64% - Test Accuracy: 94.43%



Epoch 1: 100%|██████████| 600/600 [00:15<00:00, 39.13batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 41.67batch/s]

[Epoch 2] Train Loss: 0.001598 - Test Loss: 0.001297 - Train Accuracy: 95.24% - Test Accuracy: 96.12%



Epoch 2: 100%|██████████| 600/600 [00:15<00:00, 38.76batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 34.70batch/s]

[Epoch 3] Train Loss: 0.001069 - Test Loss: 0.001005 - Train Accuracy: 96.84% - Test Accuracy: 96.86%



Epoch 3: 100%|██████████| 600/600 [00:15<00:00, 39.09batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 44.55batch/s]

[Epoch 4] Train Loss: 0.000773 - Test Loss: 0.000815 - Train Accuracy: 97.69% - Test Accuracy: 97.31%



Epoch 4: 100%|██████████| 600/600 [00:15<00:00, 39.97batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.03batch/s]

[Epoch 5] Train Loss: 0.000579 - Test Loss: 0.000808 - Train Accuracy: 98.23% - Test Accuracy: 97.51%



Epoch 5: 100%|██████████| 600/600 [00:16<00:00, 37.08batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 43.68batch/s]

[Epoch 6] Train Loss: 0.000451 - Test Loss: 0.000765 - Train Accuracy: 98.64% - Test Accuracy: 97.54%



Epoch 6: 100%|██████████| 600/600 [00:15<00:00, 39.65batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 44.19batch/s]

[Epoch 7] Train Loss: 0.000337 - Test Loss: 0.000701 - Train Accuracy: 98.99% - Test Accuracy: 97.79%



Epoch 7: 100%|██████████| 600/600 [00:15<00:00, 39.05batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 37.88batch/s]

[Epoch 8] Train Loss: 0.000256 - Test Loss: 0.000619 - Train Accuracy: 99.25% - Test Accuracy: 98.07%



Epoch 8: 100%|██████████| 600/600 [00:16<00:00, 37.43batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.13batch/s]

[Epoch 9] Train Loss: 0.000202 - Test Loss: 0.000702 - Train Accuracy: 99.38% - Test Accuracy: 97.85%



Epoch 9: 100%|██████████| 600/600 [00:15<00:00, 39.06batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 44.87batch/s]

[Epoch 10] Train Loss: 0.000160 - Test Loss: 0.000653 - Train Accuracy: 99.53% - Test Accuracy: 98.09%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.05batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 35.32batch/s]


[Epoch 11] Train Loss: 0.000111 - Test Loss: 0.000740 - Train Accuracy: 99.68% - Test Accuracy: 98.03%


Epoch 11: 100%|██████████| 600/600 [00:15<00:00, 39.10batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.15batch/s]

[Epoch 12] Train Loss: 0.000096 - Test Loss: 0.000744 - Train Accuracy: 99.71% - Test Accuracy: 98.19%



Epoch 12: 100%|██████████| 600/600 [00:15<00:00, 39.89batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 42.62batch/s]

[Epoch 13] Train Loss: 0.000070 - Test Loss: 0.000817 - Train Accuracy: 99.79% - Test Accuracy: 97.98%



Epoch 13: 100%|██████████| 600/600 [00:15<00:00, 39.44batch/s]
Test 13: 100%|██████████| 100/100 [00:03<00:00, 32.92batch/s]

[Epoch 14] Train Loss: 0.000079 - Test Loss: 0.000747 - Train Accuracy: 99.73% - Test Accuracy: 98.12%



Epoch 14: 100%|██████████| 600/600 [00:15<00:00, 39.24batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 43.52batch/s]

[Epoch 15] Train Loss: 0.000041 - Test Loss: 0.000851 - Train Accuracy: 99.89% - Test Accuracy: 98.13%

BEST TEST ACCURACY:  98.19  in epoch  11
{0.1: {'AdamW': {'best_accuracy': 11.35, 'best_epoch': 2}, 'SGD': {'best_accuracy': 98.32, 'best_epoch': 9}, 'Adadelta': {'best_accuracy': 98.18, 'best_epoch': 14}, 'Adam': {'best_accuracy': 11.35, 'best_epoch': 0}}, 0.01: {'AdamW': {'best_accuracy': 96.66, 'best_epoch': 12}, 'SGD': {'best_accuracy': 98.16, 'best_epoch': 13}, 'Adadelta': {'best_accuracy': 93.9, 'best_epoch': 14}, 'Adam': {'best_accuracy': 96.39, 'best_epoch': 14}}, 0.001: {'AdamW': {'best_accuracy': 98.33, 'best_epoch': 11}, 'SGD': {'best_accuracy': 94.91, 'best_epoch': 14}, 'Adadelta': {'best_accuracy': 71.96, 'best_epoch': 14}, 'Adam': {'best_accuracy': 98.36, 'best_epoch': 14}}, 0.0001: {'AdamW': {'best_accuracy': 98.24, 'best_epoch': 14}, 'SGD': {'best_accuracy': 74.65, 'best_epoch': 14}, 'Adadelta': {'best_accuracy': 19.18, 'best_epoch': 14}, 'Adam': {'best_accuracy': 98.




In [None]:
print(best_results)

{0.1: {'AdamW': {'best_accuracy': 11.35, 'best_epoch': 2}, 'SGD': {'best_accuracy': 98.32, 'best_epoch': 9}, 'Adadelta': {'best_accuracy': 98.18, 'best_epoch': 14}, 'Adam': {'best_accuracy': 11.35, 'best_epoch': 0}}, 0.01: {'AdamW': {'best_accuracy': 96.66, 'best_epoch': 12}, 'SGD': {'best_accuracy': 98.16, 'best_epoch': 13}, 'Adadelta': {'best_accuracy': 93.9, 'best_epoch': 14}, 'Adam': {'best_accuracy': 96.39, 'best_epoch': 14}}, 0.001: {'AdamW': {'best_accuracy': 98.33, 'best_epoch': 11}, 'SGD': {'best_accuracy': 94.91, 'best_epoch': 14}, 'Adadelta': {'best_accuracy': 71.96, 'best_epoch': 14}, 'Adam': {'best_accuracy': 98.36, 'best_epoch': 14}}, 0.0001: {'AdamW': {'best_accuracy': 98.24, 'best_epoch': 14}, 'SGD': {'best_accuracy': 74.65, 'best_epoch': 14}, 'Adadelta': {'best_accuracy': 19.18, 'best_epoch': 14}, 'Adam': {'best_accuracy': 98.19, 'best_epoch': 11}}}
