Imports


In [12]:
import torch
import torchvision
import pandas as pd
import torch.nn as nn
from tqdm import tqdm
import multiprocessing
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import tabulate

Versión torch


In [13]:
print("Torch version: ", torch.__version__)

####################################################################
# Set Device
####################################################################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: ", device)

Torch version:  2.5.1+cu121
Device:  cuda


Preparar datos


In [14]:
####################################################################
# Prepare Data
####################################################################

train_set = torchvision.datasets.MNIST(".data/", train=True, download=True)
# train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)

test_set = torchvision.datasets.MNIST(".data/", train=False, download=True)
# test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True)

print("Train images: ", train_set)
print("Image: ", train_set[0][0])
print("Label: ", train_set[0][1])
print("Label one hot: ", F.one_hot(torch.tensor(train_set[0][1]), num_classes=10))

Train images:  Dataset MNIST
    Number of datapoints: 60000
    Root location: .data/
    Split: Train
Image:  <PIL.Image.Image image mode=L size=28x28 at 0x7C7CE048F0A0>
Label:  5
Label one hot:  tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])


Dataset class


In [15]:
####################################################################
# Dataset Class
####################################################################
class MNIST_dataset(Dataset):
    def __init__(self, data, partition="train"):
        print("\nLoading MNIST ", partition, " Dataset...")
        self.data = data
        self.partition = partition
        print("\tTotal Len.: ", len(self.data), "\n", 50 * "-")

    def __len__(self):
        return len(self.data)

    def from_pil_to_tensor(self, image):
        return torchvision.transforms.ToTensor()(image)

    def __getitem__(self, idx):
        # Image
        image = self.data[idx][0]
        # PIL Image to torch tensor
        image_tensor = self.from_pil_to_tensor(image)
        # care! net expect a 784 size vector and our dataset
        # provide 1x28x28 (channels, height, width) -> Reshape!
        image_tensor = image_tensor.view(-1)

        # Label
        label = torch.tensor(self.data[idx][1])
        label = F.one_hot(label, num_classes=10).float()

        return {"img": image_tensor, "label": label}


train_set = MNIST_dataset(train_set, partition="train")
test_set = MNIST_dataset(test_set, partition="test")


Loading MNIST  train  Dataset...
	Total Len.:  60000 
 --------------------------------------------------

Loading MNIST  test  Dataset...
	Total Len.:  10000 
 --------------------------------------------------


DataLoader class


In [16]:
####################################################################
# DataLoader Class
####################################################################

batch_size = 100
num_workers = multiprocessing.cpu_count() - 1
print("Num workers", num_workers)
train_dataloader = DataLoader(
    train_set, batch_size, shuffle=True, num_workers=num_workers
)
test_dataloader = DataLoader(
    test_set, batch_size, shuffle=False, num_workers=num_workers
)

Num workers 1


NN Class


In [17]:
####################################################################
# Neural Network Class
####################################################################
# Creating our Neural Network - Fully Connected
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(784, 256)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(256, 256)
        self.relu2 = nn.ReLU()
        self.linear3 = nn.Linear(256, 256)
        self.relu3 = nn.ReLU()
        self.classifier = nn.Linear(256, num_classes)

    def forward(self, x):
        out = self.relu1(self.linear1(x))
        out = self.relu2(self.linear2(out))
        out = self.relu3(self.linear3(out))
        out = self.classifier(out)
        return out


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

Función de entrenamineto


In [18]:
# TRAIN NETWORK
def train_network(optimizer, epoch, criterion, net):
    train_loss, train_correct = 0, 0
    net.train()
    with tqdm(
        iter(train_dataloader), desc="Epoch " + str(epoch), unit="batch"
    ) as tepoch:
        for batch in tepoch:
            # Returned values of Dataset Class
            images = batch["img"].to(device)
            labels = batch["label"].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = net(images)
            loss = criterion(outputs, labels)

            # Calculate gradients
            loss.backward()

            # Update parameters
            optimizer.step()

            # one hot -> labels
            labels = torch.argmax(labels, dim=1)
            pred = torch.argmax(outputs, dim=1)
            train_correct += pred.eq(labels).sum().item()

            # print statistics
            train_loss += loss.item()

    train_loss /= len(train_dataloader.dataset)
    return train_loss, train_correct

Función de test


In [19]:
# TEST NETWORK
def test_network(net, epoch, criterion):
    test_loss, test_correct = 0, 0
    net.eval()
    with torch.no_grad():
        with tqdm(
            iter(test_dataloader), desc="Test " + str(epoch), unit="batch"
        ) as tepoch:
            for batch in tepoch:
                images = batch["img"].to(device)
                labels = batch["label"].to(device)

                # Forward
                outputs = net(images)
                test_loss += criterion(outputs, labels)

                # one hot -> labels
                labels = torch.argmax(labels, dim=1)
                pred = torch.argmax(outputs, dim=1)

                test_correct += pred.eq(labels).sum().item()

        test_loss /= len(test_dataloader.dataset)
        test_accuracy = 100.0 * test_correct / len(test_dataloader.dataset)
    return test_loss, test_correct, test_accuracy

Entrenamiento con validación


In [20]:
# Instantiating the network and printing its architecture
num_classes = 10
net = Net(num_classes)
print(net)
print("Params: ", count_parameters(net))
####################################################################
# Training settings
####################################################################

# Training hyperparameters
epochs = 15
criterion = nn.CrossEntropyLoss()
best_results = dict()
for lr in [0.1, 0.01, 0.001, 0.0001]:
    best_results[lr] = dict()
    for optimizer in [optim.AdamW, optim.SGD, optim.Adadelta, optim.Adam]:
        ####################################################################
        # Training
        ####################################################################

        # Load model in GPU, cada vez que hacemos un entrenamiento con un tipo de optimizador y lr tenemos que crear la red
        net = Net(num_classes)
        net.to(device)
        print("Params: ", count_parameters(net))
        # y el optimizador debe crearse despues de haber creado la red porque le pasamos los parámetros de la red que serán los que se encargue de optimizar
        if optimizer == optim.SGD:
            optimizer = optimizer(
                net.parameters(), lr=lr, weight_decay=1e-6, momentum=0.9
            )
        else:
            optimizer = optimizer(net.parameters(), lr=lr, weight_decay=1e-6)

        best_results[lr][optimizer.__class__.__name__] = {
            "best_accuracy": -1,
            "best_epoch": 0,
        }

        print("\n---- Start Training ----")
        print(f"Optimizer: {optimizer.__class__.__name__} - Learning Rate: {lr}")
        for epoch in range(epochs):
            train_loss, train_correct = train_network(optimizer, epoch, criterion, net)

            test_loss, test_correct, test_accuracy = test_network(net, epoch, criterion)

            print(
                "[Epoch {}] Train Loss: {:.6f} - Test Loss: {:.6f} - Train Accuracy: {:.2f}% - Test Accuracy: {:.2f}%".format(
                    epoch + 1,
                    train_loss,
                    test_loss,
                    100.0 * train_correct / len(train_dataloader.dataset),
                    test_accuracy,
                )
            )

            if (
                test_accuracy
                > best_results[lr][optimizer.__class__.__name__]["best_accuracy"]
            ):
                best_results[lr][optimizer.__class__.__name__][
                    "best_accuracy"
                ] = test_accuracy
                best_results[lr][optimizer.__class__.__name__]["best_epoch"] = epoch

                # Save best weights
                torch.save(net.state_dict(), "best_model.pt")

        print(
            "\nBEST TEST ACCURACY: ",
            best_results[lr][optimizer.__class__.__name__]["best_accuracy"],
            " in epoch ",
            best_results[lr][optimizer.__class__.__name__]["best_epoch"],
        )

####################################################################
# Results
####################################################################
print(best_results)

Net(
  (linear1): Linear(in_features=784, out_features=256, bias=True)
  (relu1): ReLU()
  (linear2): Linear(in_features=256, out_features=256, bias=True)
  (relu2): ReLU()
  (linear3): Linear(in_features=256, out_features=256, bias=True)
  (relu3): ReLU()
  (classifier): Linear(in_features=256, out_features=10, bias=True)
)
Params:  335114
Params:  335114

---- Start Training ----
Optimizer: AdamW - Learning Rate: 0.1


Epoch 0: 100%|██████████| 600/600 [00:16<00:00, 35.74batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.71batch/s]

[Epoch 1] Train Loss: 0.033213 - Test Loss: 0.018195 - Train Accuracy: 21.34% - Test Accuracy: 26.80%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 40.56batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 38.90batch/s]

[Epoch 2] Train Loss: 0.017263 - Test Loss: 0.017299 - Train Accuracy: 29.56% - Test Accuracy: 29.60%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.46batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 46.11batch/s]

[Epoch 3] Train Loss: 0.016944 - Test Loss: 0.017409 - Train Accuracy: 31.83% - Test Accuracy: 31.80%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.32batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 45.55batch/s]

[Epoch 4] Train Loss: 0.016711 - Test Loss: 0.016462 - Train Accuracy: 32.93% - Test Accuracy: 32.83%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 40.09batch/s]
Test 4: 100%|██████████| 100/100 [00:03<00:00, 27.65batch/s]

[Epoch 5] Train Loss: 0.016508 - Test Loss: 0.017010 - Train Accuracy: 32.90% - Test Accuracy: 32.64%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.55batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.53batch/s]

[Epoch 6] Train Loss: 0.016505 - Test Loss: 0.016947 - Train Accuracy: 33.04% - Test Accuracy: 31.75%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.06batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.34batch/s]

[Epoch 7] Train Loss: 0.016816 - Test Loss: 0.016213 - Train Accuracy: 32.06% - Test Accuracy: 35.49%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.23batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 35.16batch/s]

[Epoch 8] Train Loss: 0.016660 - Test Loss: 0.025968 - Train Accuracy: 33.40% - Test Accuracy: 12.55%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.06batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.01batch/s]

[Epoch 9] Train Loss: 0.019572 - Test Loss: 0.019906 - Train Accuracy: 18.77% - Test Accuracy: 18.40%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.88batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.14batch/s]

[Epoch 10] Train Loss: 0.019945 - Test Loss: 0.020933 - Train Accuracy: 18.41% - Test Accuracy: 17.90%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.49batch/s]
Test 10: 100%|██████████| 100/100 [00:03<00:00, 33.06batch/s]

[Epoch 11] Train Loss: 0.019644 - Test Loss: 0.019231 - Train Accuracy: 18.89% - Test Accuracy: 20.14%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.40batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.13batch/s]

[Epoch 12] Train Loss: 0.020084 - Test Loss: 0.019367 - Train Accuracy: 18.50% - Test Accuracy: 20.19%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.72batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 44.35batch/s]

[Epoch 13] Train Loss: 0.019291 - Test Loss: 0.019092 - Train Accuracy: 19.07% - Test Accuracy: 20.50%



Epoch 13: 100%|██████████| 600/600 [00:15<00:00, 39.63batch/s]
Test 13: 100%|██████████| 100/100 [00:03<00:00, 32.67batch/s]

[Epoch 14] Train Loss: 0.019065 - Test Loss: 0.018922 - Train Accuracy: 19.41% - Test Accuracy: 18.41%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.14batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.42batch/s]

[Epoch 15] Train Loss: 0.018909 - Test Loss: 0.018794 - Train Accuracy: 19.43% - Test Accuracy: 18.88%

BEST TEST ACCURACY:  35.49  in epoch  6
Params:  335114

---- Start Training ----
Optimizer: SGD - Learning Rate: 0.1



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.63batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.70batch/s]

[Epoch 1] Train Loss: 0.003527 - Test Loss: 0.001219 - Train Accuracy: 88.61% - Test Accuracy: 96.33%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.45batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 35.17batch/s]

[Epoch 2] Train Loss: 0.001128 - Test Loss: 0.001353 - Train Accuracy: 96.57% - Test Accuracy: 96.17%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.23batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 45.77batch/s]

[Epoch 3] Train Loss: 0.000775 - Test Loss: 0.000856 - Train Accuracy: 97.60% - Test Accuracy: 97.50%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.35batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 45.68batch/s]

[Epoch 4] Train Loss: 0.000606 - Test Loss: 0.000862 - Train Accuracy: 98.10% - Test Accuracy: 97.34%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.66batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 39.79batch/s]

[Epoch 5] Train Loss: 0.000508 - Test Loss: 0.000903 - Train Accuracy: 98.42% - Test Accuracy: 97.58%



Epoch 5: 100%|██████████| 600/600 [00:15<00:00, 39.60batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.42batch/s]

[Epoch 6] Train Loss: 0.000373 - Test Loss: 0.000852 - Train Accuracy: 98.82% - Test Accuracy: 97.70%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.60batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 46.12batch/s]

[Epoch 7] Train Loss: 0.000339 - Test Loss: 0.000831 - Train Accuracy: 98.88% - Test Accuracy: 97.91%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.79batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 46.08batch/s]

[Epoch 8] Train Loss: 0.000285 - Test Loss: 0.000824 - Train Accuracy: 99.11% - Test Accuracy: 97.90%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.55batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 36.31batch/s]

[Epoch 9] Train Loss: 0.000243 - Test Loss: 0.000882 - Train Accuracy: 99.23% - Test Accuracy: 97.88%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.15batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.05batch/s]

[Epoch 10] Train Loss: 0.000191 - Test Loss: 0.001000 - Train Accuracy: 99.39% - Test Accuracy: 97.68%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.33batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 45.36batch/s]

[Epoch 11] Train Loss: 0.000232 - Test Loss: 0.001042 - Train Accuracy: 99.27% - Test Accuracy: 97.48%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.47batch/s]
Test 11: 100%|██████████| 100/100 [00:03<00:00, 32.54batch/s]


[Epoch 12] Train Loss: 0.000169 - Test Loss: 0.000873 - Train Accuracy: 99.45% - Test Accuracy: 97.96%


Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.16batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.46batch/s]

[Epoch 13] Train Loss: 0.000154 - Test Loss: 0.000914 - Train Accuracy: 99.50% - Test Accuracy: 98.11%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 41.61batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 45.52batch/s]

[Epoch 14] Train Loss: 0.000127 - Test Loss: 0.001016 - Train Accuracy: 99.59% - Test Accuracy: 97.86%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.71batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 39.91batch/s]

[Epoch 15] Train Loss: 0.000145 - Test Loss: 0.001147 - Train Accuracy: 99.55% - Test Accuracy: 97.72%

BEST TEST ACCURACY:  98.11  in epoch  12
Params:  335114

---- Start Training ----
Optimizer: Adadelta - Learning Rate: 0.1



Epoch 0: 100%|██████████| 600/600 [00:15<00:00, 39.33batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.42batch/s]

[Epoch 1] Train Loss: 0.008482 - Test Loss: 0.003272 - Train Accuracy: 75.00% - Test Accuracy: 90.47%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.16batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.11batch/s]

[Epoch 2] Train Loss: 0.002930 - Test Loss: 0.002354 - Train Accuracy: 91.48% - Test Accuracy: 92.98%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.91batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 44.17batch/s]


[Epoch 3] Train Loss: 0.002098 - Test Loss: 0.001781 - Train Accuracy: 93.89% - Test Accuracy: 94.72%


Epoch 3: 100%|██████████| 600/600 [00:15<00:00, 39.92batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 40.48batch/s]

[Epoch 4] Train Loss: 0.001618 - Test Loss: 0.001373 - Train Accuracy: 95.26% - Test Accuracy: 95.96%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 40.90batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.34batch/s]

[Epoch 5] Train Loss: 0.001300 - Test Loss: 0.001229 - Train Accuracy: 96.22% - Test Accuracy: 96.18%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 41.19batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.22batch/s]

[Epoch 6] Train Loss: 0.001075 - Test Loss: 0.001153 - Train Accuracy: 96.83% - Test Accuracy: 96.47%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 40.12batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 36.85batch/s]

[Epoch 7] Train Loss: 0.000914 - Test Loss: 0.001000 - Train Accuracy: 97.31% - Test Accuracy: 96.97%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.95batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.87batch/s]

[Epoch 8] Train Loss: 0.000799 - Test Loss: 0.000854 - Train Accuracy: 97.59% - Test Accuracy: 97.30%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.96batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.09batch/s]

[Epoch 9] Train Loss: 0.000697 - Test Loss: 0.000914 - Train Accuracy: 97.91% - Test Accuracy: 97.11%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.28batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 33.71batch/s]

[Epoch 10] Train Loss: 0.000607 - Test Loss: 0.000820 - Train Accuracy: 98.19% - Test Accuracy: 97.46%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.31batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 45.12batch/s]

[Epoch 11] Train Loss: 0.000540 - Test Loss: 0.000824 - Train Accuracy: 98.39% - Test Accuracy: 97.35%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.94batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.04batch/s]

[Epoch 12] Train Loss: 0.000482 - Test Loss: 0.000813 - Train Accuracy: 98.59% - Test Accuracy: 97.47%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.01batch/s]
Test 12: 100%|██████████| 100/100 [00:03<00:00, 32.59batch/s]

[Epoch 13] Train Loss: 0.000425 - Test Loss: 0.000719 - Train Accuracy: 98.77% - Test Accuracy: 97.79%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 41.17batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 45.48batch/s]

[Epoch 14] Train Loss: 0.000375 - Test Loss: 0.000727 - Train Accuracy: 98.95% - Test Accuracy: 97.69%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.07batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.61batch/s]

[Epoch 15] Train Loss: 0.000335 - Test Loss: 0.000708 - Train Accuracy: 99.03% - Test Accuracy: 97.87%

BEST TEST ACCURACY:  97.87  in epoch  14
Params:  335114

---- Start Training ----
Optimizer: Adam - Learning Rate: 0.1



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.05batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 36.35batch/s]

[Epoch 1] Train Loss: 0.037069 - Test Loss: 0.023112 - Train Accuracy: 10.43% - Test Accuracy: 10.30%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 40.10batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.31batch/s]

[Epoch 2] Train Loss: 0.023135 - Test Loss: 0.023084 - Train Accuracy: 10.40% - Test Accuracy: 9.58%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.24batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 45.12batch/s]

[Epoch 3] Train Loss: 0.023075 - Test Loss: 0.023075 - Train Accuracy: 10.40% - Test Accuracy: 10.28%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.09batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 40.04batch/s]

[Epoch 4] Train Loss: 0.023090 - Test Loss: 0.023063 - Train Accuracy: 10.46% - Test Accuracy: 10.28%



Epoch 4: 100%|██████████| 600/600 [00:15<00:00, 39.69batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 44.69batch/s]

[Epoch 5] Train Loss: 0.023084 - Test Loss: 0.023046 - Train Accuracy: 10.60% - Test Accuracy: 9.74%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.95batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.74batch/s]

[Epoch 6] Train Loss: 0.023085 - Test Loss: 0.023061 - Train Accuracy: 10.39% - Test Accuracy: 10.28%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.08batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.10batch/s]

[Epoch 7] Train Loss: 0.023093 - Test Loss: 0.023102 - Train Accuracy: 10.43% - Test Accuracy: 10.28%



Epoch 7: 100%|██████████| 600/600 [00:15<00:00, 39.74batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 38.17batch/s]

[Epoch 8] Train Loss: 0.023085 - Test Loss: 0.023115 - Train Accuracy: 10.43% - Test Accuracy: 11.35%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.23batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 44.53batch/s]

[Epoch 9] Train Loss: 0.023079 - Test Loss: 0.023113 - Train Accuracy: 10.46% - Test Accuracy: 10.10%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.23batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.75batch/s]

[Epoch 10] Train Loss: 0.023078 - Test Loss: 0.023074 - Train Accuracy: 10.33% - Test Accuracy: 9.82%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.49batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 34.91batch/s]

[Epoch 11] Train Loss: 0.023090 - Test Loss: 0.023067 - Train Accuracy: 10.30% - Test Accuracy: 11.35%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.12batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 43.82batch/s]

[Epoch 12] Train Loss: 0.023082 - Test Loss: 0.023085 - Train Accuracy: 10.53% - Test Accuracy: 10.09%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.95batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.62batch/s]

[Epoch 13] Train Loss: 0.023086 - Test Loss: 0.023089 - Train Accuracy: 10.33% - Test Accuracy: 11.35%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.88batch/s]
Test 13: 100%|██████████| 100/100 [00:03<00:00, 33.02batch/s]

[Epoch 14] Train Loss: 0.023089 - Test Loss: 0.023127 - Train Accuracy: 10.37% - Test Accuracy: 11.35%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.17batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.34batch/s]

[Epoch 15] Train Loss: 0.023079 - Test Loss: 0.023132 - Train Accuracy: 10.50% - Test Accuracy: 11.35%

BEST TEST ACCURACY:  11.35  in epoch  7
Params:  335114

---- Start Training ----
Optimizer: AdamW - Learning Rate: 0.01



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.33batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.25batch/s]

[Epoch 1] Train Loss: 0.002932 - Test Loss: 0.001800 - Train Accuracy: 91.37% - Test Accuracy: 94.76%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.21batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 35.49batch/s]

[Epoch 2] Train Loss: 0.001614 - Test Loss: 0.001557 - Train Accuracy: 95.62% - Test Accuracy: 95.91%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.47batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 44.93batch/s]

[Epoch 3] Train Loss: 0.001393 - Test Loss: 0.001488 - Train Accuracy: 96.29% - Test Accuracy: 96.23%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.22batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 44.84batch/s]

[Epoch 4] Train Loss: 0.001229 - Test Loss: 0.001415 - Train Accuracy: 96.71% - Test Accuracy: 96.54%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.21batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 38.82batch/s]

[Epoch 5] Train Loss: 0.001193 - Test Loss: 0.001455 - Train Accuracy: 96.88% - Test Accuracy: 96.60%



Epoch 5: 100%|██████████| 600/600 [00:15<00:00, 39.57batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.68batch/s]

[Epoch 6] Train Loss: 0.000989 - Test Loss: 0.001182 - Train Accuracy: 97.29% - Test Accuracy: 96.90%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.01batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.30batch/s]

[Epoch 7] Train Loss: 0.001014 - Test Loss: 0.001455 - Train Accuracy: 97.38% - Test Accuracy: 96.51%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.94batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 42.34batch/s]

[Epoch 8] Train Loss: 0.000898 - Test Loss: 0.001507 - Train Accuracy: 97.59% - Test Accuracy: 96.69%



Epoch 8: 100%|██████████| 600/600 [00:15<00:00, 39.79batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 43.56batch/s]

[Epoch 9] Train Loss: 0.000997 - Test Loss: 0.001585 - Train Accuracy: 97.49% - Test Accuracy: 96.43%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.78batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 44.75batch/s]

[Epoch 10] Train Loss: 0.000877 - Test Loss: 0.001817 - Train Accuracy: 97.77% - Test Accuracy: 96.45%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.15batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 44.83batch/s]

[Epoch 11] Train Loss: 0.000701 - Test Loss: 0.001148 - Train Accuracy: 98.18% - Test Accuracy: 97.35%



Epoch 11: 100%|██████████| 600/600 [00:15<00:00, 39.60batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 39.37batch/s]

[Epoch 12] Train Loss: 0.000742 - Test Loss: 0.001544 - Train Accuracy: 98.16% - Test Accuracy: 96.93%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.05batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 44.77batch/s]

[Epoch 13] Train Loss: 0.000800 - Test Loss: 0.001578 - Train Accuracy: 97.97% - Test Accuracy: 97.04%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 41.27batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 45.49batch/s]

[Epoch 14] Train Loss: 0.000613 - Test Loss: 0.001998 - Train Accuracy: 98.41% - Test Accuracy: 96.67%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.06batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 34.89batch/s]

[Epoch 15] Train Loss: 0.000791 - Test Loss: 0.001450 - Train Accuracy: 98.15% - Test Accuracy: 96.96%

BEST TEST ACCURACY:  97.35  in epoch  10
Params:  335114

---- Start Training ----
Optimizer: SGD - Learning Rate: 0.01



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.09batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 44.67batch/s]

[Epoch 1] Train Loss: 0.008222 - Test Loss: 0.003020 - Train Accuracy: 76.30% - Test Accuracy: 91.24%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.37batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.59batch/s]

[Epoch 2] Train Loss: 0.002292 - Test Loss: 0.001659 - Train Accuracy: 93.23% - Test Accuracy: 95.12%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.54batch/s]
Test 2: 100%|██████████| 100/100 [00:03<00:00, 33.08batch/s]

[Epoch 3] Train Loss: 0.001468 - Test Loss: 0.001231 - Train Accuracy: 95.65% - Test Accuracy: 96.27%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.25batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 45.61batch/s]

[Epoch 4] Train Loss: 0.001080 - Test Loss: 0.000986 - Train Accuracy: 96.77% - Test Accuracy: 96.93%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.33batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.14batch/s]

[Epoch 5] Train Loss: 0.000829 - Test Loss: 0.000878 - Train Accuracy: 97.52% - Test Accuracy: 97.31%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 41.55batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 40.29batch/s]

[Epoch 6] Train Loss: 0.000687 - Test Loss: 0.000785 - Train Accuracy: 97.96% - Test Accuracy: 97.60%



Epoch 6: 100%|██████████| 600/600 [00:15<00:00, 39.81batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.32batch/s]

[Epoch 7] Train Loss: 0.000557 - Test Loss: 0.000738 - Train Accuracy: 98.31% - Test Accuracy: 97.63%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.53batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.50batch/s]

[Epoch 8] Train Loss: 0.000465 - Test Loss: 0.000755 - Train Accuracy: 98.61% - Test Accuracy: 97.63%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.30batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.13batch/s]

[Epoch 9] Train Loss: 0.000393 - Test Loss: 0.000726 - Train Accuracy: 98.86% - Test Accuracy: 97.79%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.49batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 36.55batch/s]

[Epoch 10] Train Loss: 0.000330 - Test Loss: 0.000658 - Train Accuracy: 99.03% - Test Accuracy: 97.96%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.37batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 45.55batch/s]

[Epoch 11] Train Loss: 0.000280 - Test Loss: 0.000698 - Train Accuracy: 99.17% - Test Accuracy: 97.79%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.52batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 44.46batch/s]

[Epoch 12] Train Loss: 0.000231 - Test Loss: 0.000651 - Train Accuracy: 99.33% - Test Accuracy: 98.08%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.54batch/s]
Test 12: 100%|██████████| 100/100 [00:03<00:00, 32.14batch/s]

[Epoch 13] Train Loss: 0.000188 - Test Loss: 0.000671 - Train Accuracy: 99.44% - Test Accuracy: 97.98%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 41.36batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 45.86batch/s]

[Epoch 14] Train Loss: 0.000157 - Test Loss: 0.000734 - Train Accuracy: 99.56% - Test Accuracy: 97.87%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.49batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.91batch/s]

[Epoch 15] Train Loss: 0.000119 - Test Loss: 0.000681 - Train Accuracy: 99.70% - Test Accuracy: 98.00%

BEST TEST ACCURACY:  98.08  in epoch  11
Params:  335114

---- Start Training ----
Optimizer: Adadelta - Learning Rate: 0.01



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.20batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 39.85batch/s]

[Epoch 1] Train Loss: 0.022744 - Test Loss: 0.022181 - Train Accuracy: 23.84% - Test Accuracy: 46.69%



Epoch 1: 100%|██████████| 600/600 [00:15<00:00, 39.74batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.40batch/s]

[Epoch 2] Train Loss: 0.019286 - Test Loss: 0.013851 - Train Accuracy: 55.22% - Test Accuracy: 66.49%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.95batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 45.69batch/s]

[Epoch 3] Train Loss: 0.009842 - Test Loss: 0.007145 - Train Accuracy: 75.09% - Test Accuracy: 79.96%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.20batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 45.01batch/s]

[Epoch 4] Train Loss: 0.006332 - Test Loss: 0.005391 - Train Accuracy: 82.15% - Test Accuracy: 84.53%



Epoch 4: 100%|██████████| 600/600 [00:15<00:00, 39.70batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 39.11batch/s]

[Epoch 5] Train Loss: 0.005119 - Test Loss: 0.004562 - Train Accuracy: 85.46% - Test Accuracy: 86.91%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 41.19batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.64batch/s]

[Epoch 6] Train Loss: 0.004472 - Test Loss: 0.004072 - Train Accuracy: 87.38% - Test Accuracy: 88.53%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.47batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.40batch/s]

[Epoch 7] Train Loss: 0.004068 - Test Loss: 0.003762 - Train Accuracy: 88.55% - Test Accuracy: 89.34%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.61batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 34.30batch/s]

[Epoch 8] Train Loss: 0.003796 - Test Loss: 0.003549 - Train Accuracy: 89.29% - Test Accuracy: 90.01%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.30batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.82batch/s]

[Epoch 9] Train Loss: 0.003594 - Test Loss: 0.003386 - Train Accuracy: 89.84% - Test Accuracy: 90.23%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.95batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.25batch/s]

[Epoch 10] Train Loss: 0.003429 - Test Loss: 0.003240 - Train Accuracy: 90.28% - Test Accuracy: 90.72%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.54batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 33.66batch/s]

[Epoch 11] Train Loss: 0.003290 - Test Loss: 0.003120 - Train Accuracy: 90.67% - Test Accuracy: 90.91%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.13batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.36batch/s]

[Epoch 12] Train Loss: 0.003165 - Test Loss: 0.003019 - Train Accuracy: 91.00% - Test Accuracy: 91.46%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.27batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.67batch/s]

[Epoch 13] Train Loss: 0.003054 - Test Loss: 0.002910 - Train Accuracy: 91.27% - Test Accuracy: 91.69%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 41.06batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 38.53batch/s]


[Epoch 14] Train Loss: 0.002948 - Test Loss: 0.002841 - Train Accuracy: 91.58% - Test Accuracy: 91.89%


Epoch 14: 100%|██████████| 600/600 [00:15<00:00, 39.66batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.13batch/s]

[Epoch 15] Train Loss: 0.002848 - Test Loss: 0.002721 - Train Accuracy: 91.89% - Test Accuracy: 92.30%

BEST TEST ACCURACY:  92.3  in epoch  14
Params:  335114

---- Start Training ----
Optimizer: Adam - Learning Rate: 0.01



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.48batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.45batch/s]

[Epoch 1] Train Loss: 0.002786 - Test Loss: 0.001570 - Train Accuracy: 91.91% - Test Accuracy: 95.88%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.22batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.94batch/s]

[Epoch 2] Train Loss: 0.001573 - Test Loss: 0.001585 - Train Accuracy: 95.72% - Test Accuracy: 96.00%



Epoch 2: 100%|██████████| 600/600 [00:15<00:00, 39.93batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 37.96batch/s]

[Epoch 3] Train Loss: 0.001239 - Test Loss: 0.001526 - Train Accuracy: 96.62% - Test Accuracy: 96.25%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.21batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 45.23batch/s]

[Epoch 4] Train Loss: 0.001175 - Test Loss: 0.001351 - Train Accuracy: 96.95% - Test Accuracy: 96.48%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.22batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.48batch/s]

[Epoch 5] Train Loss: 0.001056 - Test Loss: 0.001455 - Train Accuracy: 97.23% - Test Accuracy: 96.29%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.78batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 33.89batch/s]

[Epoch 6] Train Loss: 0.001003 - Test Loss: 0.001333 - Train Accuracy: 97.48% - Test Accuracy: 96.58%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 40.76batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 44.86batch/s]

[Epoch 7] Train Loss: 0.000941 - Test Loss: 0.001478 - Train Accuracy: 97.61% - Test Accuracy: 97.13%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.30batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.41batch/s]

[Epoch 8] Train Loss: 0.000937 - Test Loss: 0.001815 - Train Accuracy: 97.71% - Test Accuracy: 96.46%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.98batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 35.30batch/s]

[Epoch 9] Train Loss: 0.000795 - Test Loss: 0.001604 - Train Accuracy: 98.11% - Test Accuracy: 96.62%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.97batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.54batch/s]

[Epoch 10] Train Loss: 0.000843 - Test Loss: 0.001430 - Train Accuracy: 97.95% - Test Accuracy: 96.88%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.93batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 45.75batch/s]

[Epoch 11] Train Loss: 0.000855 - Test Loss: 0.001566 - Train Accuracy: 97.97% - Test Accuracy: 96.89%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.25batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 39.09batch/s]

[Epoch 12] Train Loss: 0.000715 - Test Loss: 0.001760 - Train Accuracy: 98.22% - Test Accuracy: 96.98%



Epoch 12: 100%|██████████| 600/600 [00:15<00:00, 39.55batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.31batch/s]

[Epoch 13] Train Loss: 0.000790 - Test Loss: 0.002034 - Train Accuracy: 98.17% - Test Accuracy: 96.80%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.96batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 45.53batch/s]

[Epoch 14] Train Loss: 0.000718 - Test Loss: 0.001456 - Train Accuracy: 98.33% - Test Accuracy: 97.04%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.30batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 44.99batch/s]


[Epoch 15] Train Loss: 0.000630 - Test Loss: 0.001362 - Train Accuracy: 98.49% - Test Accuracy: 97.59%

BEST TEST ACCURACY:  97.59  in epoch  14
Params:  335114

---- Start Training ----
Optimizer: AdamW - Learning Rate: 0.001


Epoch 0: 100%|██████████| 600/600 [00:15<00:00, 39.52batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 42.11batch/s]

[Epoch 1] Train Loss: 0.002999 - Test Loss: 0.001574 - Train Accuracy: 91.07% - Test Accuracy: 94.93%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.08batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.25batch/s]

[Epoch 2] Train Loss: 0.001060 - Test Loss: 0.000885 - Train Accuracy: 96.78% - Test Accuracy: 97.13%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.05batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 45.69batch/s]

[Epoch 3] Train Loss: 0.000713 - Test Loss: 0.000835 - Train Accuracy: 97.78% - Test Accuracy: 97.55%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 40.42batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 34.79batch/s]

[Epoch 4] Train Loss: 0.000528 - Test Loss: 0.000799 - Train Accuracy: 98.31% - Test Accuracy: 97.61%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.44batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.64batch/s]

[Epoch 5] Train Loss: 0.000433 - Test Loss: 0.000843 - Train Accuracy: 98.59% - Test Accuracy: 97.56%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 41.08batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.58batch/s]

[Epoch 6] Train Loss: 0.000327 - Test Loss: 0.000870 - Train Accuracy: 98.97% - Test Accuracy: 97.48%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.40batch/s]
Test 6: 100%|██████████| 100/100 [00:03<00:00, 32.70batch/s]

[Epoch 7] Train Loss: 0.000301 - Test Loss: 0.000763 - Train Accuracy: 99.01% - Test Accuracy: 97.97%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.19batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.67batch/s]

[Epoch 8] Train Loss: 0.000238 - Test Loss: 0.001001 - Train Accuracy: 99.19% - Test Accuracy: 97.41%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.26batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.31batch/s]

[Epoch 9] Train Loss: 0.000203 - Test Loss: 0.000790 - Train Accuracy: 99.34% - Test Accuracy: 97.78%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.74batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 38.58batch/s]

[Epoch 10] Train Loss: 0.000206 - Test Loss: 0.000986 - Train Accuracy: 99.34% - Test Accuracy: 97.81%



Epoch 10: 100%|██████████| 600/600 [00:15<00:00, 39.80batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 44.51batch/s]

[Epoch 11] Train Loss: 0.000176 - Test Loss: 0.000881 - Train Accuracy: 99.42% - Test Accuracy: 97.88%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.30batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.06batch/s]

[Epoch 12] Train Loss: 0.000159 - Test Loss: 0.000991 - Train Accuracy: 99.49% - Test Accuracy: 98.04%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.09batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 40.66batch/s]

[Epoch 13] Train Loss: 0.000171 - Test Loss: 0.000892 - Train Accuracy: 99.45% - Test Accuracy: 97.96%



Epoch 13: 100%|██████████| 600/600 [00:15<00:00, 39.45batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 45.27batch/s]

[Epoch 14] Train Loss: 0.000119 - Test Loss: 0.000975 - Train Accuracy: 99.62% - Test Accuracy: 98.09%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.08batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.44batch/s]

[Epoch 15] Train Loss: 0.000104 - Test Loss: 0.001070 - Train Accuracy: 99.67% - Test Accuracy: 98.10%

BEST TEST ACCURACY:  98.1  in epoch  14
Params:  335114

---- Start Training ----
Optimizer: SGD - Learning Rate: 0.001



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.32batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.53batch/s]

[Epoch 1] Train Loss: 0.022790 - Test Loss: 0.022322 - Train Accuracy: 29.64% - Test Accuracy: 39.97%



Epoch 1: 100%|██████████| 600/600 [00:15<00:00, 39.91batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 36.89batch/s]

[Epoch 2] Train Loss: 0.019136 - Test Loss: 0.012175 - Train Accuracy: 50.69% - Test Accuracy: 70.86%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.56batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 45.07batch/s]

[Epoch 3] Train Loss: 0.007923 - Test Loss: 0.005641 - Train Accuracy: 78.55% - Test Accuracy: 83.69%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.14batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 45.37batch/s]

[Epoch 4] Train Loss: 0.004982 - Test Loss: 0.004247 - Train Accuracy: 85.68% - Test Accuracy: 87.45%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.32batch/s]
Test 4: 100%|██████████| 100/100 [00:03<00:00, 32.11batch/s]

[Epoch 5] Train Loss: 0.004068 - Test Loss: 0.003685 - Train Accuracy: 88.38% - Test Accuracy: 89.17%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 41.12batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.21batch/s]

[Epoch 6] Train Loss: 0.003635 - Test Loss: 0.003384 - Train Accuracy: 89.58% - Test Accuracy: 90.35%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.21batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.36batch/s]

[Epoch 7] Train Loss: 0.003356 - Test Loss: 0.003136 - Train Accuracy: 90.47% - Test Accuracy: 90.91%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.36batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 37.13batch/s]

[Epoch 8] Train Loss: 0.003129 - Test Loss: 0.002966 - Train Accuracy: 91.10% - Test Accuracy: 91.36%



Epoch 8: 100%|██████████| 600/600 [00:15<00:00, 39.98batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.12batch/s]

[Epoch 9] Train Loss: 0.002944 - Test Loss: 0.002796 - Train Accuracy: 91.61% - Test Accuracy: 91.90%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.33batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.43batch/s]

[Epoch 10] Train Loss: 0.002769 - Test Loss: 0.002649 - Train Accuracy: 92.16% - Test Accuracy: 92.36%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.28batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 42.49batch/s]

[Epoch 11] Train Loss: 0.002602 - Test Loss: 0.002477 - Train Accuracy: 92.64% - Test Accuracy: 92.85%



Epoch 11: 100%|██████████| 600/600 [00:15<00:00, 39.52batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 43.09batch/s]

[Epoch 12] Train Loss: 0.002449 - Test Loss: 0.002337 - Train Accuracy: 93.06% - Test Accuracy: 93.33%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.98batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.28batch/s]

[Epoch 13] Train Loss: 0.002305 - Test Loss: 0.002189 - Train Accuracy: 93.47% - Test Accuracy: 93.78%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 41.06batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 44.97batch/s]

[Epoch 14] Train Loss: 0.002172 - Test Loss: 0.002094 - Train Accuracy: 93.84% - Test Accuracy: 93.99%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.24batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 37.00batch/s]

[Epoch 15] Train Loss: 0.002045 - Test Loss: 0.001979 - Train Accuracy: 94.25% - Test Accuracy: 94.34%

BEST TEST ACCURACY:  94.34  in epoch  14
Params:  335114

---- Start Training ----
Optimizer: Adadelta - Learning Rate: 0.001



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.10batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.23batch/s]

[Epoch 1] Train Loss: 0.023001 - Test Loss: 0.022984 - Train Accuracy: 14.08% - Test Accuracy: 16.31%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 41.01batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 44.97batch/s]

[Epoch 2] Train Loss: 0.022966 - Test Loss: 0.022947 - Train Accuracy: 17.91% - Test Accuracy: 18.99%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.45batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 34.15batch/s]

[Epoch 3] Train Loss: 0.022928 - Test Loss: 0.022905 - Train Accuracy: 19.67% - Test Accuracy: 20.07%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 40.85batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 45.66batch/s]

[Epoch 4] Train Loss: 0.022884 - Test Loss: 0.022857 - Train Accuracy: 20.63% - Test Accuracy: 21.01%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.06batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.22batch/s]

[Epoch 5] Train Loss: 0.022834 - Test Loss: 0.022801 - Train Accuracy: 21.99% - Test Accuracy: 23.16%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 41.05batch/s]
Test 5: 100%|██████████| 100/100 [00:03<00:00, 32.61batch/s]

[Epoch 6] Train Loss: 0.022774 - Test Loss: 0.022734 - Train Accuracy: 24.27% - Test Accuracy: 26.18%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.01batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.02batch/s]

[Epoch 7] Train Loss: 0.022701 - Test Loss: 0.022652 - Train Accuracy: 28.08% - Test Accuracy: 30.86%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.07batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.16batch/s]

[Epoch 8] Train Loss: 0.022612 - Test Loss: 0.022549 - Train Accuracy: 33.68% - Test Accuracy: 37.47%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.30batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 36.82batch/s]

[Epoch 9] Train Loss: 0.022499 - Test Loss: 0.022419 - Train Accuracy: 40.13% - Test Accuracy: 44.24%



Epoch 9: 100%|██████████| 600/600 [00:15<00:00, 39.85batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.26batch/s]

[Epoch 10] Train Loss: 0.022354 - Test Loss: 0.022251 - Train Accuracy: 46.59% - Test Accuracy: 50.75%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.03batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 44.58batch/s]

[Epoch 11] Train Loss: 0.022166 - Test Loss: 0.022030 - Train Accuracy: 52.30% - Test Accuracy: 55.72%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.16batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 40.93batch/s]

[Epoch 12] Train Loss: 0.021917 - Test Loss: 0.021736 - Train Accuracy: 56.56% - Test Accuracy: 59.07%



Epoch 12: 100%|██████████| 600/600 [00:15<00:00, 39.13batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 45.43batch/s]

[Epoch 13] Train Loss: 0.021584 - Test Loss: 0.021343 - Train Accuracy: 59.28% - Test Accuracy: 60.55%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 41.29batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 45.38batch/s]

[Epoch 14] Train Loss: 0.021139 - Test Loss: 0.020820 - Train Accuracy: 60.30% - Test Accuracy: 61.25%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 40.98batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 44.98batch/s]

[Epoch 15] Train Loss: 0.020549 - Test Loss: 0.020130 - Train Accuracy: 60.83% - Test Accuracy: 61.55%

BEST TEST ACCURACY:  61.55  in epoch  14
Params:  335114

---- Start Training ----
Optimizer: Adam - Learning Rate: 0.001



Epoch 0: 100%|██████████| 600/600 [00:15<00:00, 39.22batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 38.76batch/s]

[Epoch 1] Train Loss: 0.002971 - Test Loss: 0.001267 - Train Accuracy: 91.06% - Test Accuracy: 96.06%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 40.90batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.41batch/s]

[Epoch 2] Train Loss: 0.001064 - Test Loss: 0.000968 - Train Accuracy: 96.79% - Test Accuracy: 96.90%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.02batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 44.53batch/s]

[Epoch 3] Train Loss: 0.000724 - Test Loss: 0.001076 - Train Accuracy: 97.79% - Test Accuracy: 96.70%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 40.59batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 35.50batch/s]

[Epoch 4] Train Loss: 0.000554 - Test Loss: 0.000752 - Train Accuracy: 98.24% - Test Accuracy: 97.58%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 40.89batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 46.15batch/s]

[Epoch 5] Train Loss: 0.000416 - Test Loss: 0.000835 - Train Accuracy: 98.67% - Test Accuracy: 97.61%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 41.34batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 44.78batch/s]

[Epoch 6] Train Loss: 0.000338 - Test Loss: 0.000786 - Train Accuracy: 98.92% - Test Accuracy: 97.80%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 40.90batch/s]
Test 6: 100%|██████████| 100/100 [00:03<00:00, 32.10batch/s]

[Epoch 7] Train Loss: 0.000293 - Test Loss: 0.000792 - Train Accuracy: 99.08% - Test Accuracy: 97.81%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.22batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.12batch/s]

[Epoch 8] Train Loss: 0.000260 - Test Loss: 0.000833 - Train Accuracy: 99.12% - Test Accuracy: 97.97%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.01batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.41batch/s]

[Epoch 9] Train Loss: 0.000214 - Test Loss: 0.000967 - Train Accuracy: 99.28% - Test Accuracy: 97.76%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.03batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 36.92batch/s]

[Epoch 10] Train Loss: 0.000172 - Test Loss: 0.000959 - Train Accuracy: 99.44% - Test Accuracy: 97.85%



Epoch 10: 100%|██████████| 600/600 [00:15<00:00, 39.76batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 44.84batch/s]

[Epoch 11] Train Loss: 0.000191 - Test Loss: 0.000953 - Train Accuracy: 99.37% - Test Accuracy: 97.86%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.95batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.44batch/s]

[Epoch 12] Train Loss: 0.000152 - Test Loss: 0.001021 - Train Accuracy: 99.50% - Test Accuracy: 97.82%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.07batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 41.44batch/s]

[Epoch 13] Train Loss: 0.000160 - Test Loss: 0.001046 - Train Accuracy: 99.50% - Test Accuracy: 97.92%



Epoch 13: 100%|██████████| 600/600 [00:15<00:00, 38.97batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 44.44batch/s]

[Epoch 14] Train Loss: 0.000122 - Test Loss: 0.001035 - Train Accuracy: 99.59% - Test Accuracy: 97.76%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.10batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 44.61batch/s]

[Epoch 15] Train Loss: 0.000145 - Test Loss: 0.000768 - Train Accuracy: 99.52% - Test Accuracy: 98.20%

BEST TEST ACCURACY:  98.2  in epoch  14
Params:  335114

---- Start Training ----
Optimizer: AdamW - Learning Rate: 0.0001



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 40.85batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 44.84batch/s]

[Epoch 1] Train Loss: 0.007293 - Test Loss: 0.003175 - Train Accuracy: 81.03% - Test Accuracy: 91.09%



Epoch 1: 100%|██████████| 600/600 [00:15<00:00, 39.43batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 37.29batch/s]


[Epoch 2] Train Loss: 0.002828 - Test Loss: 0.002405 - Train Accuracy: 91.81% - Test Accuracy: 93.14%


Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 40.80batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 44.91batch/s]

[Epoch 3] Train Loss: 0.002244 - Test Loss: 0.001987 - Train Accuracy: 93.54% - Test Accuracy: 94.06%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 40.30batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 44.74batch/s]

[Epoch 4] Train Loss: 0.001853 - Test Loss: 0.001660 - Train Accuracy: 94.67% - Test Accuracy: 95.08%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 40.31batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 34.48batch/s]

[Epoch 5] Train Loss: 0.001576 - Test Loss: 0.001457 - Train Accuracy: 95.44% - Test Accuracy: 95.75%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.72batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 44.86batch/s]

[Epoch 6] Train Loss: 0.001366 - Test Loss: 0.001321 - Train Accuracy: 96.01% - Test Accuracy: 96.08%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.08batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 44.97batch/s]

[Epoch 7] Train Loss: 0.001197 - Test Loss: 0.001238 - Train Accuracy: 96.56% - Test Accuracy: 96.39%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.62batch/s]
Test 7: 100%|██████████| 100/100 [00:03<00:00, 33.22batch/s]

[Epoch 8] Train Loss: 0.001062 - Test Loss: 0.001079 - Train Accuracy: 96.86% - Test Accuracy: 96.78%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.37batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 44.16batch/s]

[Epoch 9] Train Loss: 0.000950 - Test Loss: 0.001056 - Train Accuracy: 97.17% - Test Accuracy: 96.85%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.84batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 44.75batch/s]

[Epoch 10] Train Loss: 0.000843 - Test Loss: 0.000957 - Train Accuracy: 97.48% - Test Accuracy: 97.18%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.68batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 35.18batch/s]

[Epoch 11] Train Loss: 0.000752 - Test Loss: 0.000902 - Train Accuracy: 97.79% - Test Accuracy: 97.25%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.19batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.03batch/s]

[Epoch 12] Train Loss: 0.000679 - Test Loss: 0.000879 - Train Accuracy: 97.99% - Test Accuracy: 97.31%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.50batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 44.59batch/s]

[Epoch 13] Train Loss: 0.000607 - Test Loss: 0.000844 - Train Accuracy: 98.22% - Test Accuracy: 97.45%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.88batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 38.47batch/s]

[Epoch 14] Train Loss: 0.000553 - Test Loss: 0.000807 - Train Accuracy: 98.34% - Test Accuracy: 97.69%



Epoch 14: 100%|██████████| 600/600 [00:15<00:00, 39.24batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 44.80batch/s]

[Epoch 15] Train Loss: 0.000490 - Test Loss: 0.000811 - Train Accuracy: 98.58% - Test Accuracy: 97.60%

BEST TEST ACCURACY:  97.69  in epoch  13
Params:  335114

---- Start Training ----
Optimizer: SGD - Learning Rate: 0.0001



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.15batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 43.27batch/s]

[Epoch 1] Train Loss: 0.023022 - Test Loss: 0.023001 - Train Accuracy: 10.24% - Test Accuracy: 10.49%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 40.97batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 41.23batch/s]

[Epoch 2] Train Loss: 0.022986 - Test Loss: 0.022964 - Train Accuracy: 10.92% - Test Accuracy: 11.43%



Epoch 2: 100%|██████████| 600/600 [00:15<00:00, 39.14batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 44.30batch/s]

[Epoch 3] Train Loss: 0.022949 - Test Loss: 0.022925 - Train Accuracy: 12.18% - Test Accuracy: 13.84%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.37batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 44.43batch/s]

[Epoch 4] Train Loss: 0.022908 - Test Loss: 0.022881 - Train Accuracy: 14.72% - Test Accuracy: 17.00%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.20batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.23batch/s]

[Epoch 5] Train Loss: 0.022862 - Test Loss: 0.022829 - Train Accuracy: 18.06% - Test Accuracy: 21.77%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.25batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 36.70batch/s]

[Epoch 6] Train Loss: 0.022807 - Test Loss: 0.022768 - Train Accuracy: 25.27% - Test Accuracy: 31.36%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 40.82batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 44.57batch/s]

[Epoch 7] Train Loss: 0.022740 - Test Loss: 0.022692 - Train Accuracy: 34.36% - Test Accuracy: 39.16%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.04batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 44.27batch/s]

[Epoch 8] Train Loss: 0.022656 - Test Loss: 0.022595 - Train Accuracy: 41.53% - Test Accuracy: 44.73%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.87batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 34.10batch/s]

[Epoch 9] Train Loss: 0.022549 - Test Loss: 0.022469 - Train Accuracy: 46.31% - Test Accuracy: 48.37%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.13batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.16batch/s]

[Epoch 10] Train Loss: 0.022406 - Test Loss: 0.022302 - Train Accuracy: 49.32% - Test Accuracy: 50.80%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.54batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 45.42batch/s]

[Epoch 11] Train Loss: 0.022214 - Test Loss: 0.022073 - Train Accuracy: 51.54% - Test Accuracy: 52.27%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.17batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 33.94batch/s]


[Epoch 12] Train Loss: 0.021948 - Test Loss: 0.021753 - Train Accuracy: 52.46% - Test Accuracy: 52.91%


Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.95batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 44.87batch/s]

[Epoch 13] Train Loss: 0.021570 - Test Loss: 0.021293 - Train Accuracy: 52.88% - Test Accuracy: 52.96%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 41.14batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 45.16batch/s]

[Epoch 14] Train Loss: 0.021023 - Test Loss: 0.020628 - Train Accuracy: 52.89% - Test Accuracy: 52.81%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.40batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 38.94batch/s]

[Epoch 15] Train Loss: 0.020232 - Test Loss: 0.019671 - Train Accuracy: 53.23% - Test Accuracy: 53.00%

BEST TEST ACCURACY:  53.0  in epoch  14
Params:  335114

---- Start Training ----
Optimizer: Adadelta - Learning Rate: 0.0001



Epoch 0: 100%|██████████| 600/600 [00:15<00:00, 39.45batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 45.30batch/s]

[Epoch 1] Train Loss: 0.023004 - Test Loss: 0.023000 - Train Accuracy: 10.79% - Test Accuracy: 10.93%



Epoch 1: 100%|██████████| 600/600 [00:14<00:00, 40.78batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 45.23batch/s]

[Epoch 2] Train Loss: 0.023000 - Test Loss: 0.022996 - Train Accuracy: 11.02% - Test Accuracy: 11.29%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.05batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 45.38batch/s]

[Epoch 3] Train Loss: 0.022996 - Test Loss: 0.022992 - Train Accuracy: 11.19% - Test Accuracy: 11.58%



Epoch 3: 100%|██████████| 600/600 [00:15<00:00, 39.38batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 41.15batch/s]

[Epoch 4] Train Loss: 0.022993 - Test Loss: 0.022989 - Train Accuracy: 11.43% - Test Accuracy: 11.85%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 41.03batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.38batch/s]

[Epoch 5] Train Loss: 0.022989 - Test Loss: 0.022985 - Train Accuracy: 11.70% - Test Accuracy: 12.04%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.82batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 44.81batch/s]

[Epoch 6] Train Loss: 0.022985 - Test Loss: 0.022981 - Train Accuracy: 11.96% - Test Accuracy: 12.29%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 40.11batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 35.51batch/s]

[Epoch 7] Train Loss: 0.022981 - Test Loss: 0.022977 - Train Accuracy: 12.22% - Test Accuracy: 12.48%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.13batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 45.45batch/s]

[Epoch 8] Train Loss: 0.022978 - Test Loss: 0.022973 - Train Accuracy: 12.49% - Test Accuracy: 12.74%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 40.86batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 44.87batch/s]

[Epoch 9] Train Loss: 0.022974 - Test Loss: 0.022969 - Train Accuracy: 12.73% - Test Accuracy: 13.00%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 40.20batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 33.68batch/s]

[Epoch 10] Train Loss: 0.022970 - Test Loss: 0.022965 - Train Accuracy: 13.01% - Test Accuracy: 13.28%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 40.79batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 45.15batch/s]

[Epoch 11] Train Loss: 0.022966 - Test Loss: 0.022961 - Train Accuracy: 13.27% - Test Accuracy: 13.47%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 41.07batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 44.50batch/s]

[Epoch 12] Train Loss: 0.022962 - Test Loss: 0.022957 - Train Accuracy: 13.56% - Test Accuracy: 13.66%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.79batch/s]
Test 12: 100%|██████████| 100/100 [00:03<00:00, 32.20batch/s]

[Epoch 13] Train Loss: 0.022958 - Test Loss: 0.022953 - Train Accuracy: 13.84% - Test Accuracy: 14.00%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 41.11batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 44.66batch/s]

[Epoch 14] Train Loss: 0.022954 - Test Loss: 0.022949 - Train Accuracy: 14.10% - Test Accuracy: 14.22%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.21batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 45.10batch/s]

[Epoch 15] Train Loss: 0.022950 - Test Loss: 0.022945 - Train Accuracy: 14.40% - Test Accuracy: 14.47%

BEST TEST ACCURACY:  14.47  in epoch  14
Params:  335114

---- Start Training ----
Optimizer: Adam - Learning Rate: 0.0001



Epoch 0: 100%|██████████| 600/600 [00:14<00:00, 41.16batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 39.76batch/s]

[Epoch 1] Train Loss: 0.007201 - Test Loss: 0.003118 - Train Accuracy: 81.60% - Test Accuracy: 90.91%



Epoch 1: 100%|██████████| 600/600 [00:15<00:00, 39.49batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 44.83batch/s]

[Epoch 2] Train Loss: 0.002851 - Test Loss: 0.002438 - Train Accuracy: 91.84% - Test Accuracy: 92.88%



Epoch 2: 100%|██████████| 600/600 [00:14<00:00, 41.14batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 45.75batch/s]

[Epoch 3] Train Loss: 0.002253 - Test Loss: 0.002039 - Train Accuracy: 93.48% - Test Accuracy: 94.14%



Epoch 3: 100%|██████████| 600/600 [00:14<00:00, 41.29batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 42.88batch/s]

[Epoch 4] Train Loss: 0.001860 - Test Loss: 0.001726 - Train Accuracy: 94.60% - Test Accuracy: 94.91%



Epoch 4: 100%|██████████| 600/600 [00:15<00:00, 39.58batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 42.24batch/s]

[Epoch 5] Train Loss: 0.001571 - Test Loss: 0.001582 - Train Accuracy: 95.44% - Test Accuracy: 95.33%



Epoch 5: 100%|██████████| 600/600 [00:14<00:00, 40.80batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.26batch/s]

[Epoch 6] Train Loss: 0.001363 - Test Loss: 0.001367 - Train Accuracy: 96.03% - Test Accuracy: 95.77%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.38batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.92batch/s]

[Epoch 7] Train Loss: 0.001205 - Test Loss: 0.001262 - Train Accuracy: 96.42% - Test Accuracy: 96.35%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 40.18batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 36.11batch/s]

[Epoch 8] Train Loss: 0.001061 - Test Loss: 0.001133 - Train Accuracy: 96.88% - Test Accuracy: 96.49%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 41.36batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 44.83batch/s]

[Epoch 9] Train Loss: 0.000949 - Test Loss: 0.001026 - Train Accuracy: 97.26% - Test Accuracy: 96.93%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.00batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 45.68batch/s]

[Epoch 10] Train Loss: 0.000852 - Test Loss: 0.001045 - Train Accuracy: 97.50% - Test Accuracy: 96.93%



Epoch 10: 100%|██████████| 600/600 [00:14<00:00, 41.17batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 34.05batch/s]

[Epoch 11] Train Loss: 0.000764 - Test Loss: 0.000921 - Train Accuracy: 97.73% - Test Accuracy: 97.24%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 40.27batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.47batch/s]

[Epoch 12] Train Loss: 0.000690 - Test Loss: 0.000895 - Train Accuracy: 97.96% - Test Accuracy: 97.49%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 40.79batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 44.06batch/s]

[Epoch 13] Train Loss: 0.000623 - Test Loss: 0.000861 - Train Accuracy: 98.17% - Test Accuracy: 97.45%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.76batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 37.78batch/s]

[Epoch 14] Train Loss: 0.000553 - Test Loss: 0.000852 - Train Accuracy: 98.39% - Test Accuracy: 97.58%



Epoch 14: 100%|██████████| 600/600 [00:15<00:00, 39.79batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 44.92batch/s]

[Epoch 15] Train Loss: 0.000500 - Test Loss: 0.000810 - Train Accuracy: 98.55% - Test Accuracy: 97.68%

BEST TEST ACCURACY:  97.68  in epoch  14
{0.1: {'AdamW': {'best_accuracy': 35.49, 'best_epoch': 6}, 'SGD': {'best_accuracy': 98.11, 'best_epoch': 12}, 'Adadelta': {'best_accuracy': 97.87, 'best_epoch': 14}, 'Adam': {'best_accuracy': 11.35, 'best_epoch': 7}}, 0.01: {'AdamW': {'best_accuracy': 97.35, 'best_epoch': 10}, 'SGD': {'best_accuracy': 98.08, 'best_epoch': 11}, 'Adadelta': {'best_accuracy': 92.3, 'best_epoch': 14}, 'Adam': {'best_accuracy': 97.59, 'best_epoch': 14}}, 0.001: {'AdamW': {'best_accuracy': 98.1, 'best_epoch': 14}, 'SGD': {'best_accuracy': 94.34, 'best_epoch': 14}, 'Adadelta': {'best_accuracy': 61.55, 'best_epoch': 14}, 'Adam': {'best_accuracy': 98.2, 'best_epoch': 14}}, 0.0001: {'AdamW': {'best_accuracy': 97.69, 'best_epoch': 13}, 'SGD': {'best_accuracy': 53.0, 'best_epoch': 14}, 'Adadelta': {'best_accuracy': 14.47, 'best_epoch': 14}, 'Adam': {'best_accuracy': 97.68




In [21]:
print(best_results)

{0.1: {'AdamW': {'best_accuracy': 35.49, 'best_epoch': 6}, 'SGD': {'best_accuracy': 98.11, 'best_epoch': 12}, 'Adadelta': {'best_accuracy': 97.87, 'best_epoch': 14}, 'Adam': {'best_accuracy': 11.35, 'best_epoch': 7}}, 0.01: {'AdamW': {'best_accuracy': 97.35, 'best_epoch': 10}, 'SGD': {'best_accuracy': 98.08, 'best_epoch': 11}, 'Adadelta': {'best_accuracy': 92.3, 'best_epoch': 14}, 'Adam': {'best_accuracy': 97.59, 'best_epoch': 14}}, 0.001: {'AdamW': {'best_accuracy': 98.1, 'best_epoch': 14}, 'SGD': {'best_accuracy': 94.34, 'best_epoch': 14}, 'Adadelta': {'best_accuracy': 61.55, 'best_epoch': 14}, 'Adam': {'best_accuracy': 98.2, 'best_epoch': 14}}, 0.0001: {'AdamW': {'best_accuracy': 97.69, 'best_epoch': 13}, 'SGD': {'best_accuracy': 53.0, 'best_epoch': 14}, 'Adadelta': {'best_accuracy': 14.47, 'best_epoch': 14}, 'Adam': {'best_accuracy': 97.68, 'best_epoch': 14}}}


In [22]:
# mostrar resultats del diccionari amb tabulate
for lr in best_results:
    for optimizer in best_results[lr]:
        best_results[lr][optimizer] = (
            round(best_results[lr][optimizer]["best_accuracy"], 2),
            best_results[lr][optimizer]["best_epoch"],
        )

headers = best_results[list(best_results.keys())[0]].keys()
values = [[name, *inner.values()] for name, inner in best_results.items()]

print(tabulate.tabulate(values, headers=headers))


best_accuracy = -1
best_lr = None
best_optimizer = None
for lr in best_results:
    for optimizer in best_results[lr]:
        if best_results[lr][optimizer][0] > best_accuracy:
            best_accuracy = best_results[lr][optimizer][0]
            best_lr = lr
            best_optimizer = optimizer

print(
    "\nBEST TEST ACCURACY: ",
    best_accuracy,
    " with lr: ",
    best_lr,
    " and optimizer: ",
    best_optimizer,
)
####################################################################
# Load best weights
####################################################################

# Load best weights, tenemos que tener una red creada con la misma arquitectura que la que hemos entrenado
net = Net(num_classes)
net.load_state_dict(torch.load("best_model.pt"))

_, _, test_accuracy = test_network(net, 0, criterion)
print("\nFinal best acc: ", test_accuracy)

        AdamW        SGD          Adadelta     Adam
------  -----------  -----------  -----------  -----------
0.1     (35.49, 6)   (98.11, 12)  (97.87, 14)  (11.35, 7)
0.01    (97.35, 10)  (98.08, 11)  (92.3, 14)   (97.59, 14)
0.001   (98.1, 14)   (94.34, 14)  (61.55, 14)  (98.2, 14)
0.0001  (97.69, 13)  (53.0, 14)   (14.47, 14)  (97.68, 14)

BEST TEST ACCURACY:  98.2  with lr:  0.001  and optimizer:  Adam


  net.load_state_dict(torch.load("best_model.pt"))
Test 0:   0%|          | 0/100 [00:00<?, ?batch/s]


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)