In [None]:
import torch
import torchvision
import pandas as pd
import torch.nn as nn
from tqdm import tqdm
import multiprocessing
import torch.optim as optim
import torch.nn.functional as  F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms

print("Torch version: ", torch. __version__)

Torch version:  2.2.1+cu121


In [None]:
# Set device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: ", device)

Device:  cuda


In [None]:
# Prepare data

train_set = torchvision.datasets.MNIST('.data/', train=True, download=True)
test_set = torchvision.datasets.MNIST('.data/', train=False, download=True)

print("Train images: ", train_set)
print("Image: ", train_set[0][0])
print("Label: ", train_set[0][1])
print("Label one hot: ", F.one_hot(torch.tensor(train_set[0][1]), num_classes=10))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to .data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 120402167.47it/s]


Extracting .data/MNIST/raw/train-images-idx3-ubyte.gz to .data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to .data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 28739191.89it/s]


Extracting .data/MNIST/raw/train-labels-idx1-ubyte.gz to .data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to .data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 31674436.08it/s]


Extracting .data/MNIST/raw/t10k-images-idx3-ubyte.gz to .data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to .data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 22491769.50it/s]


Extracting .data/MNIST/raw/t10k-labels-idx1-ubyte.gz to .data/MNIST/raw

Train images:  Dataset MNIST
    Number of datapoints: 60000
    Root location: .data/
    Split: Train
Image:  <PIL.Image.Image image mode=L size=28x28 at 0x79CA80980100>
Label:  5
Label one hot:  tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])


In [None]:
# Dataset class

class MNIST_dataset(Dataset):

    def __init__(self, data, partition = "train"):
        print("\nLoading MNIST ", partition, " Dataset...")
        self.data = data
        self.partition = partition
        print("\tTotal Len.: ", len(self.data), "\n", 50*"-")

    def __len__(self):
        return len(self.data)

    def from_pil_to_tensor(self, image):
        return torchvision.transforms.ToTensor()(image)

    def __getitem__(self, idx):
        # Image
        image = self.data[idx][0]
        # PIL Image to torch tensor
        image_tensor = self.from_pil_to_tensor(image)
        # care! net expect a 784 size vector and our dataset
        # provide 1x28x28 (channels, height, width) -> Reshape!
        image_tensor = image_tensor.view(-1)

        # Label
        label = torch.tensor(self.data[idx][1])
        label = F.one_hot(label, num_classes=10).float()

        return {"img": image_tensor, "label": label}

train_dataset = MNIST_dataset(train_set, partition="train")
test_dataset = MNIST_dataset(test_set, partition="test")


Loading MNIST  train  Dataset...
	Total Len.:  60000 
 --------------------------------------------------

Loading MNIST  test  Dataset...
	Total Len.:  10000 
 --------------------------------------------------


In [None]:
# Dataloader class

batch_size = 100
num_workers = multiprocessing.cpu_count()-1
print("Num workers", num_workers)
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=num_workers)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=False, num_workers=num_workers)

Num workers 1


In [None]:
# Neural network class

class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(784, 1024)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(1024, 1024)
        self.relu2 = nn.ReLU()
        self.linear3 = nn.Linear(1024, 1024)
        self.relu3 = nn.ReLU()
        self.classifier = nn.Linear(1024, num_classes)

    def forward(self, x):
        out = self.relu1(self.linear1(x))
        out = self.relu2(self.linear2(out))
        out = self.relu3(self.linear3(out))
        out = self.classifier(out)
        return out


# Instantiating the network and printing its architecture
num_classes = 10
net = Net(num_classes)
print(net)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Params: ", count_parameters(net))

Net(
  (linear1): Linear(in_features=784, out_features=1024, bias=True)
  (relu1): ReLU()
  (linear2): Linear(in_features=1024, out_features=1024, bias=True)
  (relu2): ReLU()
  (linear3): Linear(in_features=1024, out_features=1024, bias=True)
  (relu3): ReLU()
  (classifier): Linear(in_features=1024, out_features=10, bias=True)
)
Params:  2913290


In [None]:
# Training hyperparameters
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, weight_decay=1e-6, momentum=0.9)
epochs = 25

In [None]:
# Load model in GPU
net.to(device)

print("\n---- Start Training ----")
best_accuracy = -1
best_epoch = 0
for epoch in range(epochs):
    # TRAIN NETWORK
    train_loss, train_correct = 0, 0
    net.train()
    with tqdm(iter(train_dataloader), desc="Epoch " + str(epoch), unit="batch") as tepoch:
        for batch in tepoch:

            # Returned values of Dataset Class
            images = batch["img"].to(device)
            labels = batch["label"].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = net(images)
            loss = criterion(outputs, labels)

            # Calculate gradients
            loss.backward()

            # Update gradients
            optimizer.step()

            # one hot -> labels
            labels = torch.argmax(labels, dim=1)
            pred = torch.argmax(outputs, dim=1)
            train_correct += pred.eq(labels).sum().item()

            # print statistics
            train_loss += loss.item()

    train_loss /= len(train_dataloader.dataset)

    # TEST NETWORK
    test_loss, test_correct = 0, 0
    net.eval()
    with torch.no_grad():
      with tqdm(iter(test_dataloader), desc="Test " + str(epoch), unit="batch") as tepoch:
          for batch in tepoch:

            images = batch["img"].to(device)
            labels = batch["label"].to(device)

            # Forward
            outputs = net(images)
            test_loss += criterion(outputs, labels)

            # one hot -> labels
            labels = torch.argmax(labels, dim=1)
            pred = torch.argmax(outputs, dim=1)

            test_correct += pred.eq(labels).sum().item()

    test_loss /= len(test_dataloader.dataset)
    test_accuracy = 100. * test_correct / len(test_dataloader.dataset)

    print("[Epoch {}] Train Loss: {:.6f} - Test Loss: {:.6f} - Train Accuracy: {:.2f}% - Test Accuracy: {:.2f}%".format(
        epoch + 1, train_loss, test_loss, 100. * train_correct / len(train_dataloader.dataset), test_accuracy
    ))

    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_epoch = epoch

        # Save best weights
        torch.save(net.state_dict(), "best_model.pt")

print("\nBEST TEST ACCURACY: ", best_accuracy, " in epoch ", best_epoch)


---- Start Training ----


  self.pid = os.fork()
Epoch 0: 100%|██████████| 600/600 [00:15<00:00, 37.69batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 37.30batch/s]

[Epoch 1] Train Loss: 0.007362 - Test Loss: 0.002402 - Train Accuracy: 79.12% - Test Accuracy: 92.97%



Epoch 1: 100%|██████████| 600/600 [00:16<00:00, 37.31batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 46.21batch/s]

[Epoch 2] Train Loss: 0.001986 - Test Loss: 0.001502 - Train Accuracy: 94.19% - Test Accuracy: 95.46%



Epoch 2: 100%|██████████| 600/600 [00:15<00:00, 38.56batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 39.29batch/s]

[Epoch 3] Train Loss: 0.001289 - Test Loss: 0.001072 - Train Accuracy: 96.22% - Test Accuracy: 96.62%



Epoch 3: 100%|██████████| 600/600 [00:15<00:00, 37.64batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 46.00batch/s]

[Epoch 4] Train Loss: 0.000912 - Test Loss: 0.000998 - Train Accuracy: 97.31% - Test Accuracy: 96.81%



Epoch 4: 100%|██████████| 600/600 [00:14<00:00, 40.90batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 45.51batch/s]

[Epoch 5] Train Loss: 0.000706 - Test Loss: 0.000792 - Train Accuracy: 97.95% - Test Accuracy: 97.60%



Epoch 5: 100%|██████████| 600/600 [00:15<00:00, 38.79batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 45.74batch/s]

[Epoch 6] Train Loss: 0.000553 - Test Loss: 0.000721 - Train Accuracy: 98.33% - Test Accuracy: 97.75%



Epoch 6: 100%|██████████| 600/600 [00:14<00:00, 41.74batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 45.60batch/s]

[Epoch 7] Train Loss: 0.000444 - Test Loss: 0.000698 - Train Accuracy: 98.65% - Test Accuracy: 97.82%



Epoch 7: 100%|██████████| 600/600 [00:14<00:00, 41.79batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 35.84batch/s]

[Epoch 8] Train Loss: 0.000349 - Test Loss: 0.000640 - Train Accuracy: 98.99% - Test Accuracy: 97.88%



Epoch 8: 100%|██████████| 600/600 [00:14<00:00, 42.13batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 45.15batch/s]

[Epoch 9] Train Loss: 0.000274 - Test Loss: 0.000651 - Train Accuracy: 99.15% - Test Accuracy: 98.00%



Epoch 9: 100%|██████████| 600/600 [00:14<00:00, 41.67batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 44.07batch/s]

[Epoch 10] Train Loss: 0.000214 - Test Loss: 0.000667 - Train Accuracy: 99.39% - Test Accuracy: 98.08%



Epoch 10: 100%|██████████| 600/600 [00:15<00:00, 39.49batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 44.60batch/s]

[Epoch 11] Train Loss: 0.000155 - Test Loss: 0.000657 - Train Accuracy: 99.62% - Test Accuracy: 98.03%



Epoch 11: 100%|██████████| 600/600 [00:14<00:00, 42.41batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 45.14batch/s]

[Epoch 12] Train Loss: 0.000130 - Test Loss: 0.000695 - Train Accuracy: 99.67% - Test Accuracy: 98.03%



Epoch 12: 100%|██████████| 600/600 [00:14<00:00, 41.84batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 38.31batch/s]

[Epoch 13] Train Loss: 0.000090 - Test Loss: 0.000678 - Train Accuracy: 99.80% - Test Accuracy: 98.01%



Epoch 13: 100%|██████████| 600/600 [00:14<00:00, 40.81batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 45.67batch/s]

[Epoch 14] Train Loss: 0.000070 - Test Loss: 0.000645 - Train Accuracy: 99.86% - Test Accuracy: 98.11%



Epoch 14: 100%|██████████| 600/600 [00:14<00:00, 41.65batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 40.37batch/s]

[Epoch 15] Train Loss: 0.000048 - Test Loss: 0.000658 - Train Accuracy: 99.92% - Test Accuracy: 98.15%



Epoch 15: 100%|██████████| 600/600 [00:15<00:00, 39.35batch/s]
Test 15: 100%|██████████| 100/100 [00:02<00:00, 44.83batch/s]

[Epoch 16] Train Loss: 0.000039 - Test Loss: 0.000726 - Train Accuracy: 99.95% - Test Accuracy: 98.09%



Epoch 16: 100%|██████████| 600/600 [00:14<00:00, 41.26batch/s]
Test 16: 100%|██████████| 100/100 [00:02<00:00, 44.13batch/s]

[Epoch 17] Train Loss: 0.000024 - Test Loss: 0.000668 - Train Accuracy: 99.99% - Test Accuracy: 98.27%



Epoch 17: 100%|██████████| 600/600 [00:14<00:00, 41.39batch/s]
Test 17: 100%|██████████| 100/100 [00:02<00:00, 35.14batch/s]


[Epoch 18] Train Loss: 0.000018 - Test Loss: 0.000660 - Train Accuracy: 99.99% - Test Accuracy: 98.31%


Epoch 18: 100%|██████████| 600/600 [00:14<00:00, 41.75batch/s]
Test 18: 100%|██████████| 100/100 [00:02<00:00, 45.89batch/s]

[Epoch 19] Train Loss: 0.000013 - Test Loss: 0.000679 - Train Accuracy: 100.00% - Test Accuracy: 98.24%



Epoch 19: 100%|██████████| 600/600 [00:14<00:00, 41.23batch/s]
Test 19: 100%|██████████| 100/100 [00:02<00:00, 44.32batch/s]

[Epoch 20] Train Loss: 0.000011 - Test Loss: 0.000672 - Train Accuracy: 100.00% - Test Accuracy: 98.29%



Epoch 20: 100%|██████████| 600/600 [00:15<00:00, 39.56batch/s]
Test 20: 100%|██████████| 100/100 [00:02<00:00, 44.36batch/s]


[Epoch 21] Train Loss: 0.000010 - Test Loss: 0.000689 - Train Accuracy: 100.00% - Test Accuracy: 98.36%


Epoch 21: 100%|██████████| 600/600 [00:14<00:00, 41.05batch/s]
Test 21: 100%|██████████| 100/100 [00:02<00:00, 45.03batch/s]

[Epoch 22] Train Loss: 0.000009 - Test Loss: 0.000704 - Train Accuracy: 100.00% - Test Accuracy: 98.30%



Epoch 22: 100%|██████████| 600/600 [00:14<00:00, 41.53batch/s]
Test 22: 100%|██████████| 100/100 [00:02<00:00, 36.30batch/s]

[Epoch 23] Train Loss: 0.000008 - Test Loss: 0.000712 - Train Accuracy: 100.00% - Test Accuracy: 98.29%



Epoch 23: 100%|██████████| 600/600 [00:14<00:00, 41.24batch/s]
Test 23: 100%|██████████| 100/100 [00:02<00:00, 44.13batch/s]

[Epoch 24] Train Loss: 0.000007 - Test Loss: 0.000708 - Train Accuracy: 100.00% - Test Accuracy: 98.31%



Epoch 24: 100%|██████████| 600/600 [00:14<00:00, 41.20batch/s]
Test 24: 100%|██████████| 100/100 [00:02<00:00, 44.42batch/s]

[Epoch 25] Train Loss: 0.000006 - Test Loss: 0.000725 - Train Accuracy: 100.00% - Test Accuracy: 98.30%

BEST TEST ACCURACY:  98.36  in epoch  20





In [None]:
# Add batchnorm

class Net(nn.Module):
    def __init__(self, sizes=[[784, 512], [512, 512], [512, 512], [512, 10]], criterion=None):
        super(Net, self).__init__()

        self.layers = nn.ModuleList()

        for i in range(len(sizes)-1):
            dims = sizes[i]
            self.layers.append(nn.Linear(dims[0], dims[1]))
            self.layers.append(nn.BatchNorm1d(dims[1]))
            self.layers.append(nn.ReLU())

        dims = sizes[-1]
        self.classifier = nn.Linear(dims[0], dims[1])

        self.criterion = criterion

    def forward(self, x, y=None):
        for layer in self.layers:
            x = layer(x)
        x = self.classifier(x)

        if y != None:
            loss = self.criterion(x, labels)
            return loss, x
        return x

# Instantiating the network and printing its architecture
num_classes = 10
net = Net(sizes=[[784, 512], [512, 512], [512, 512], [512, num_classes]], criterion=criterion)
print(net)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Params: ", count_parameters(net))

Net(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=512, bias=True)
    (4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=512, bias=True)
    (7): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
  )
  (classifier): Linear(in_features=512, out_features=10, bias=True)
  (criterion): CrossEntropyLoss()
)
Params:  935434


In [None]:
# Add LRA

# Training hyperparameters
optimizer = optim.SGD(net.parameters(), lr=0.1, weight_decay=1e-6, momentum=0.9)

# Learning Rate Annealing (LRA) scheduling
# lr = 0.1     if epoch < 25
# lr = 0.01    if 25 <= epoch < 50
# lr = 0.001   if epoch >= 50
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[25, 50], gamma=0.1)

net = net.to(device)

# Start training
epochs = 75

In [None]:
# Add DA

train_transform = transforms.Compose([
    transforms.RandomRotation(degrees=10),  # Rotación aleatoria hasta 10 grados
    transforms.RandomAffine(degrees=20, translate=(0.1, 0.1), scale=(0.9, 1.1)),  # Rotación, traslación y escala aleatorias
    transforms.ToTensor(),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
])

In [None]:
class MNIST_dataset(Dataset):

    def __init__(self, partition = "train", transform=None):

        print("\nLoading MNIST ", partition, " Dataset...")
        self.partition = partition
        self.transform = transform
        if self.partition == "train":
            self.data = torchvision.datasets.MNIST('.data/', train=True, download=True)
        else:
            self.data = torchvision.datasets.MNIST('.data/', train=False, download=True)
        print("\tTotal Len.: ", len(self.data), "\n", 50*"-")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):

        # Image
        image = self.data[idx][0]
        image = self.transform(image)
        # care! net expect a 784 size vector and our dataset
        # provide 1x28x28 (channels, height, width) -> Reshape!
        image = image.view(-1)

        # Label
        label = torch.tensor(self.data[idx][1])
        label = F.one_hot(label, num_classes=10).float()

        return {"idx": idx, "img": image, "label": label}

train_dataset = MNIST_dataset(partition="train", transform=train_transform)
test_dataset = MNIST_dataset(partition="test", transform=test_transform)


Loading MNIST  train  Dataset...
	Total Len.:  60000 
 --------------------------------------------------

Loading MNIST  test  Dataset...
	Total Len.:  10000 
 --------------------------------------------------


In [None]:
batch_size = 100
num_workers = multiprocessing.cpu_count()-1
print("Num workers", num_workers)
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=num_workers)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=False, num_workers=num_workers)

Num workers 1


In [None]:
class Net(nn.Module):
    def __init__(self, sizes=[[784, 1024], [1024, 1024], [1024, 1024], [1024, 10]], criterion=None):
        super(Net, self).__init__()

        self.layers = nn.ModuleList()

        for i in range(len(sizes)-1):
            dims = sizes[i]
            self.layers.append(nn.Linear(dims[0], dims[1]))
            self.layers.append(nn.BatchNorm1d(dims[1]))
            self.layers.append(nn.ReLU())

        dims = sizes[-1]
        self.classifier = nn.Linear(dims[0], dims[1])

        self.criterion = criterion

    def forward(self, x, y=None):
        for layer in self.layers:
            x = layer(x)
        x = self.classifier(x)

        if y != None:
            loss = self.criterion(x, y)
            return loss, x
        return x

# Training Settings
criterion = nn.CrossEntropyLoss()

# Instantiating the network and printing its architecture
num_classes = 10
net = Net(sizes=[[784, 1024], [1024, 1024], [1024, 1024], [1024, num_classes]], criterion=criterion)
print(net)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Params: ", count_parameters(net))

Net(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=1024, bias=True)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=1024, out_features=1024, bias=True)
    (4): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Linear(in_features=1024, out_features=1024, bias=True)
    (7): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
  )
  (classifier): Linear(in_features=1024, out_features=10, bias=True)
  (criterion): CrossEntropyLoss()
)
Params:  2919434


In [None]:
# Training hyperparameters
optimizer = optim.SGD(net.parameters(), lr=0.1, weight_decay=1e-6, momentum=0.9)

# Learning Rate Annealing (LRA) scheduling
# lr = 0.1     if epoch < 25
# lr = 0.01    if 25 <= epoch < 50
# lr = 0.001   if epoch >= 50
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[25, 50], gamma=0.1)

net = net.to(device)

# Start training
epochs = 75

In [None]:
# Start training
epochs = 75

print("\n---- Start Training ----")
best_accuracy = -1
best_epoch = 0
for epoch in range(epochs):


    # TRAIN NETWORK
    train_loss, train_correct = 0, 0
    net.train()
    with tqdm(iter(train_dataloader), desc="Epoch " + str(epoch), unit="batch") as tepoch:
        for batch in tepoch:

          images = batch["img"].to(device)
          labels = batch["label"].to(device)
          ids = batch["idx"].to('cpu').numpy()

          # zero the parameter gradients
          optimizer.zero_grad()

          #  Forward
          loss, outputs = net(images, labels)

          loss.backward()

          optimizer.step()

          # one hot -> labels
          labels = torch.argmax(labels, dim=1)
          pred = torch.argmax(outputs, dim=1)

          train_correct += pred.eq(labels).sum().item()

          # print statistics
          train_loss += loss.item()

        scheduler.step()
        print("\tLR: ", optimizer.param_groups[0]['lr'])

    train_loss /= len(train_dataloader.dataset)

    # TEST NETWORK
    test_loss, test_correct = 0, 0
    net.eval()
    with torch.no_grad():
      with tqdm(iter(test_dataloader), desc="Test " + str(epoch), unit="batch") as tepoch:
          for batch in tepoch:

            images = batch["img"].to(device)
            labels = batch["label"].to(device)
            ids = batch["idx"].to('cpu').numpy()

            #  Forward
            outputs = net(images)
            test_loss += criterion(outputs, labels)

            # one hot -> labels
            labels = torch.argmax(labels, dim=1)
            pred = torch.argmax(outputs, dim=1)

            test_correct += pred.eq(labels).sum().item()

    test_loss /= len(test_dataloader.dataset)
    test_accuracy = 100. * test_correct / len(test_dataloader.dataset)

    print("[Epoch {}] Train Loss: {:.6f} - Test Loss: {:.6f} - Train Accuracy: {:.2f}% - Test Accuracy: {:.2f}%".format(
        epoch + 1, train_loss, test_loss, 100. * train_correct / len(train_dataloader.dataset), test_accuracy
    ))

    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_epoch = epoch

        # Save best weights
        torch.save(net.state_dict(), "best_model.pt")

print("\nBEST TEST ACCURACY: ", best_accuracy, " in epoch ", best_epoch)


# Load best weights
net.load_state_dict(torch.load("best_model.pt"))

test_loss, test_correct = 0, 0
net.eval()
with torch.no_grad():
    with tqdm(iter(test_dataloader), desc="Test " + str(epoch), unit="batch") as tepoch:
        for batch in tepoch:

            images = batch["img"].to(device)
            labels = batch["label"].to(device)
            ids = batch["idx"].to('cpu').numpy()

            #  Forward
            outputs = net(images)
            test_loss += criterion(outputs, labels)

            # one hot -> labels
            labels = torch.argmax(labels, dim=1)
            pred = torch.argmax(outputs, dim=1)

            test_correct += pred.eq(labels).sum().item()

    test_loss /= len(test_dataloader.dataset)
    test_accuracy = 100. * test_correct / len(test_dataloader.dataset)
print("Final best acc: ", test_accuracy)


---- Start Training ----


Epoch 0: 100%|██████████| 600/600 [00:27<00:00, 22.08batch/s]

	LR:  0.1



Test 0: 100%|██████████| 100/100 [00:02<00:00, 42.78batch/s]

[Epoch 1] Train Loss: 0.006287 - Test Loss: 0.001391 - Train Accuracy: 82.70% - Test Accuracy: 95.70%



Epoch 1: 100%|██████████| 600/600 [00:26<00:00, 22.40batch/s]

	LR:  0.1



Test 1: 100%|██████████| 100/100 [00:02<00:00, 34.16batch/s]

[Epoch 2] Train Loss: 0.002671 - Test Loss: 0.000899 - Train Accuracy: 91.61% - Test Accuracy: 97.19%



Epoch 2: 100%|██████████| 600/600 [00:27<00:00, 22.18batch/s]

	LR:  0.1



Test 2: 100%|██████████| 100/100 [00:02<00:00, 43.49batch/s]

[Epoch 3] Train Loss: 0.002045 - Test Loss: 0.000746 - Train Accuracy: 93.59% - Test Accuracy: 97.63%



Epoch 3: 100%|██████████| 600/600 [00:26<00:00, 22.49batch/s]

	LR:  0.1



Test 3: 100%|██████████| 100/100 [00:02<00:00, 43.52batch/s]

[Epoch 4] Train Loss: 0.001781 - Test Loss: 0.000592 - Train Accuracy: 94.51% - Test Accuracy: 98.11%



Epoch 4: 100%|██████████| 600/600 [00:27<00:00, 21.98batch/s]

	LR:  0.1



Test 4: 100%|██████████| 100/100 [00:02<00:00, 42.80batch/s]

[Epoch 5] Train Loss: 0.001556 - Test Loss: 0.000512 - Train Accuracy: 95.03% - Test Accuracy: 98.52%



Epoch 5: 100%|██████████| 600/600 [00:27<00:00, 22.02batch/s]

	LR:  0.1



Test 5: 100%|██████████| 100/100 [00:02<00:00, 43.70batch/s]

[Epoch 6] Train Loss: 0.001409 - Test Loss: 0.000535 - Train Accuracy: 95.53% - Test Accuracy: 98.24%



Epoch 6: 100%|██████████| 600/600 [00:27<00:00, 21.77batch/s]

	LR:  0.1



Test 6: 100%|██████████| 100/100 [00:02<00:00, 40.83batch/s]

[Epoch 7] Train Loss: 0.001318 - Test Loss: 0.000488 - Train Accuracy: 95.87% - Test Accuracy: 98.54%



Epoch 7: 100%|██████████| 600/600 [00:26<00:00, 22.24batch/s]

	LR:  0.1



Test 7: 100%|██████████| 100/100 [00:02<00:00, 43.35batch/s]

[Epoch 8] Train Loss: 0.001250 - Test Loss: 0.000493 - Train Accuracy: 96.03% - Test Accuracy: 98.37%



Epoch 8: 100%|██████████| 600/600 [00:26<00:00, 22.29batch/s]

	LR:  0.1



Test 8: 100%|██████████| 100/100 [00:02<00:00, 34.01batch/s]


[Epoch 9] Train Loss: 0.001159 - Test Loss: 0.000397 - Train Accuracy: 96.32% - Test Accuracy: 98.61%


Epoch 9: 100%|██████████| 600/600 [00:26<00:00, 22.32batch/s]

	LR:  0.1



Test 9: 100%|██████████| 100/100 [00:02<00:00, 43.43batch/s]

[Epoch 10] Train Loss: 0.001158 - Test Loss: 0.000375 - Train Accuracy: 96.35% - Test Accuracy: 98.78%



Epoch 10: 100%|██████████| 600/600 [00:26<00:00, 22.36batch/s]

	LR:  0.1



Test 10: 100%|██████████| 100/100 [00:02<00:00, 43.87batch/s]

[Epoch 11] Train Loss: 0.001007 - Test Loss: 0.000337 - Train Accuracy: 96.77% - Test Accuracy: 98.94%



Epoch 11: 100%|██████████| 600/600 [00:27<00:00, 21.92batch/s]

	LR:  0.1



Test 11: 100%|██████████| 100/100 [00:02<00:00, 44.40batch/s]

[Epoch 12] Train Loss: 0.001016 - Test Loss: 0.000402 - Train Accuracy: 96.74% - Test Accuracy: 98.58%



Epoch 12: 100%|██████████| 600/600 [00:26<00:00, 22.38batch/s]

	LR:  0.1



Test 12: 100%|██████████| 100/100 [00:02<00:00, 42.89batch/s]

[Epoch 13] Train Loss: 0.000975 - Test Loss: 0.000443 - Train Accuracy: 96.93% - Test Accuracy: 98.54%



Epoch 13: 100%|██████████| 600/600 [00:27<00:00, 22.06batch/s]

	LR:  0.1



Test 13: 100%|██████████| 100/100 [00:02<00:00, 36.77batch/s]

[Epoch 14] Train Loss: 0.000922 - Test Loss: 0.000358 - Train Accuracy: 97.06% - Test Accuracy: 98.91%



Epoch 14: 100%|██████████| 600/600 [00:26<00:00, 22.40batch/s]

	LR:  0.1



Test 14: 100%|██████████| 100/100 [00:02<00:00, 44.42batch/s]

[Epoch 15] Train Loss: 0.000883 - Test Loss: 0.000328 - Train Accuracy: 97.18% - Test Accuracy: 98.87%



Epoch 15: 100%|██████████| 600/600 [00:26<00:00, 22.46batch/s]

	LR:  0.1



Test 15: 100%|██████████| 100/100 [00:02<00:00, 40.09batch/s]

[Epoch 16] Train Loss: 0.000894 - Test Loss: 0.000318 - Train Accuracy: 97.15% - Test Accuracy: 98.92%



Epoch 16: 100%|██████████| 600/600 [00:27<00:00, 22.04batch/s]

	LR:  0.1



Test 16: 100%|██████████| 100/100 [00:02<00:00, 43.69batch/s]

[Epoch 17] Train Loss: 0.000859 - Test Loss: 0.000301 - Train Accuracy: 97.22% - Test Accuracy: 99.18%



Epoch 17: 100%|██████████| 600/600 [00:26<00:00, 22.32batch/s]

	LR:  0.1



Test 17: 100%|██████████| 100/100 [00:02<00:00, 43.71batch/s]

[Epoch 18] Train Loss: 0.000841 - Test Loss: 0.000335 - Train Accuracy: 97.31% - Test Accuracy: 99.05%



Epoch 18: 100%|██████████| 600/600 [00:27<00:00, 22.02batch/s]

	LR:  0.1



Test 18: 100%|██████████| 100/100 [00:02<00:00, 39.37batch/s]

[Epoch 19] Train Loss: 0.000831 - Test Loss: 0.000361 - Train Accuracy: 97.35% - Test Accuracy: 98.84%



Epoch 19: 100%|██████████| 600/600 [00:26<00:00, 22.28batch/s]

	LR:  0.1



Test 19: 100%|██████████| 100/100 [00:02<00:00, 44.52batch/s]

[Epoch 20] Train Loss: 0.000795 - Test Loss: 0.000301 - Train Accuracy: 97.56% - Test Accuracy: 98.98%



Epoch 20: 100%|██████████| 600/600 [00:26<00:00, 22.34batch/s]

	LR:  0.1



Test 20: 100%|██████████| 100/100 [00:02<00:00, 34.84batch/s]

[Epoch 21] Train Loss: 0.000781 - Test Loss: 0.000329 - Train Accuracy: 97.54% - Test Accuracy: 98.93%



Epoch 21: 100%|██████████| 600/600 [00:26<00:00, 22.27batch/s]

	LR:  0.1



Test 21: 100%|██████████| 100/100 [00:02<00:00, 43.42batch/s]

[Epoch 22] Train Loss: 0.000758 - Test Loss: 0.000278 - Train Accuracy: 97.55% - Test Accuracy: 99.10%



Epoch 22: 100%|██████████| 600/600 [00:26<00:00, 22.35batch/s]

	LR:  0.1



Test 22: 100%|██████████| 100/100 [00:02<00:00, 41.77batch/s]

[Epoch 23] Train Loss: 0.000732 - Test Loss: 0.000307 - Train Accuracy: 97.62% - Test Accuracy: 98.95%



Epoch 23: 100%|██████████| 600/600 [00:27<00:00, 21.91batch/s]

	LR:  0.1



Test 23: 100%|██████████| 100/100 [00:02<00:00, 43.58batch/s]

[Epoch 24] Train Loss: 0.000731 - Test Loss: 0.000282 - Train Accuracy: 97.62% - Test Accuracy: 99.09%



Epoch 24: 100%|██████████| 600/600 [00:26<00:00, 22.34batch/s]

	LR:  0.010000000000000002



Test 24: 100%|██████████| 100/100 [00:02<00:00, 43.23batch/s]

[Epoch 25] Train Loss: 0.000726 - Test Loss: 0.000296 - Train Accuracy: 97.69% - Test Accuracy: 98.97%



Epoch 25: 100%|██████████| 600/600 [00:26<00:00, 22.23batch/s]

	LR:  0.010000000000000002



Test 25: 100%|██████████| 100/100 [00:02<00:00, 36.50batch/s]

[Epoch 26] Train Loss: 0.000589 - Test Loss: 0.000229 - Train Accuracy: 98.12% - Test Accuracy: 99.18%



Epoch 26: 100%|██████████| 600/600 [00:26<00:00, 22.29batch/s]

	LR:  0.010000000000000002



Test 26: 100%|██████████| 100/100 [00:02<00:00, 42.59batch/s]

[Epoch 27] Train Loss: 0.000555 - Test Loss: 0.000222 - Train Accuracy: 98.21% - Test Accuracy: 99.17%



Epoch 27: 100%|██████████| 600/600 [00:27<00:00, 22.13batch/s]

	LR:  0.010000000000000002



Test 27: 100%|██████████| 100/100 [00:02<00:00, 36.95batch/s]

[Epoch 28] Train Loss: 0.000506 - Test Loss: 0.000217 - Train Accuracy: 98.34% - Test Accuracy: 99.22%



Epoch 28: 100%|██████████| 600/600 [00:26<00:00, 22.28batch/s]

	LR:  0.010000000000000002



Test 28: 100%|██████████| 100/100 [00:02<00:00, 43.74batch/s]

[Epoch 29] Train Loss: 0.000514 - Test Loss: 0.000201 - Train Accuracy: 98.37% - Test Accuracy: 99.30%



Epoch 29: 100%|██████████| 600/600 [00:26<00:00, 22.37batch/s]

	LR:  0.010000000000000002



Test 29: 100%|██████████| 100/100 [00:02<00:00, 44.30batch/s]

[Epoch 30] Train Loss: 0.000515 - Test Loss: 0.000206 - Train Accuracy: 98.35% - Test Accuracy: 99.26%



Epoch 30: 100%|██████████| 600/600 [00:27<00:00, 21.96batch/s]

	LR:  0.010000000000000002



Test 30: 100%|██████████| 100/100 [00:02<00:00, 43.28batch/s]

[Epoch 31] Train Loss: 0.000498 - Test Loss: 0.000203 - Train Accuracy: 98.40% - Test Accuracy: 99.30%



Epoch 31: 100%|██████████| 600/600 [00:26<00:00, 22.60batch/s]

	LR:  0.010000000000000002



Test 31: 100%|██████████| 100/100 [00:02<00:00, 44.03batch/s]

[Epoch 32] Train Loss: 0.000501 - Test Loss: 0.000200 - Train Accuracy: 98.39% - Test Accuracy: 99.26%



Epoch 32: 100%|██████████| 600/600 [00:26<00:00, 22.56batch/s]

	LR:  0.010000000000000002



Test 32: 100%|██████████| 100/100 [00:02<00:00, 34.26batch/s]

[Epoch 33] Train Loss: 0.000478 - Test Loss: 0.000201 - Train Accuracy: 98.42% - Test Accuracy: 99.32%



Epoch 33: 100%|██████████| 600/600 [00:26<00:00, 22.43batch/s]

	LR:  0.010000000000000002



Test 33: 100%|██████████| 100/100 [00:02<00:00, 42.44batch/s]

[Epoch 34] Train Loss: 0.000482 - Test Loss: 0.000194 - Train Accuracy: 98.48% - Test Accuracy: 99.32%



Epoch 34: 100%|██████████| 600/600 [00:26<00:00, 22.25batch/s]

	LR:  0.010000000000000002



Test 34: 100%|██████████| 100/100 [00:02<00:00, 44.55batch/s]

[Epoch 35] Train Loss: 0.000456 - Test Loss: 0.000192 - Train Accuracy: 98.52% - Test Accuracy: 99.31%



Epoch 35: 100%|██████████| 600/600 [00:27<00:00, 21.89batch/s]

	LR:  0.010000000000000002



Test 35: 100%|██████████| 100/100 [00:02<00:00, 44.69batch/s]

[Epoch 36] Train Loss: 0.000475 - Test Loss: 0.000191 - Train Accuracy: 98.47% - Test Accuracy: 99.32%



Epoch 36: 100%|██████████| 600/600 [00:26<00:00, 22.50batch/s]

	LR:  0.010000000000000002



Test 36: 100%|██████████| 100/100 [00:02<00:00, 42.54batch/s]


[Epoch 37] Train Loss: 0.000462 - Test Loss: 0.000190 - Train Accuracy: 98.50% - Test Accuracy: 99.34%


Epoch 37: 100%|██████████| 600/600 [00:26<00:00, 22.25batch/s]

	LR:  0.010000000000000002



Test 37: 100%|██████████| 100/100 [00:02<00:00, 37.24batch/s]

[Epoch 38] Train Loss: 0.000463 - Test Loss: 0.000188 - Train Accuracy: 98.48% - Test Accuracy: 99.34%



Epoch 38: 100%|██████████| 600/600 [00:26<00:00, 22.70batch/s]

	LR:  0.010000000000000002



Test 38: 100%|██████████| 100/100 [00:02<00:00, 43.67batch/s]

[Epoch 39] Train Loss: 0.000465 - Test Loss: 0.000190 - Train Accuracy: 98.54% - Test Accuracy: 99.32%



Epoch 39: 100%|██████████| 600/600 [00:26<00:00, 22.84batch/s]

	LR:  0.010000000000000002



Test 39: 100%|██████████| 100/100 [00:02<00:00, 43.30batch/s]

[Epoch 40] Train Loss: 0.000449 - Test Loss: 0.000197 - Train Accuracy: 98.53% - Test Accuracy: 99.33%



Epoch 40: 100%|██████████| 600/600 [00:27<00:00, 22.11batch/s]


	LR:  0.010000000000000002


Test 40: 100%|██████████| 100/100 [00:02<00:00, 42.93batch/s]

[Epoch 41] Train Loss: 0.000464 - Test Loss: 0.000190 - Train Accuracy: 98.52% - Test Accuracy: 99.25%



Epoch 41: 100%|██████████| 600/600 [00:27<00:00, 22.16batch/s]

	LR:  0.010000000000000002



Test 41: 100%|██████████| 100/100 [00:02<00:00, 43.00batch/s]

[Epoch 42] Train Loss: 0.000440 - Test Loss: 0.000192 - Train Accuracy: 98.54% - Test Accuracy: 99.28%



Epoch 42: 100%|██████████| 600/600 [00:26<00:00, 22.23batch/s]

	LR:  0.010000000000000002



Test 42: 100%|██████████| 100/100 [00:02<00:00, 36.40batch/s]

[Epoch 43] Train Loss: 0.000427 - Test Loss: 0.000199 - Train Accuracy: 98.62% - Test Accuracy: 99.27%



Epoch 43: 100%|██████████| 600/600 [00:26<00:00, 22.67batch/s]

	LR:  0.010000000000000002



Test 43: 100%|██████████| 100/100 [00:02<00:00, 43.47batch/s]

[Epoch 44] Train Loss: 0.000445 - Test Loss: 0.000189 - Train Accuracy: 98.56% - Test Accuracy: 99.29%



Epoch 44: 100%|██████████| 600/600 [00:26<00:00, 22.62batch/s]

	LR:  0.010000000000000002



Test 44: 100%|██████████| 100/100 [00:02<00:00, 41.33batch/s]

[Epoch 45] Train Loss: 0.000446 - Test Loss: 0.000186 - Train Accuracy: 98.53% - Test Accuracy: 99.34%



Epoch 45: 100%|██████████| 600/600 [00:28<00:00, 21.17batch/s]

	LR:  0.010000000000000002



Test 45: 100%|██████████| 100/100 [00:02<00:00, 38.45batch/s]

[Epoch 46] Train Loss: 0.000441 - Test Loss: 0.000186 - Train Accuracy: 98.54% - Test Accuracy: 99.29%



Epoch 46: 100%|██████████| 600/600 [00:28<00:00, 21.23batch/s]

	LR:  0.010000000000000002



Test 46: 100%|██████████| 100/100 [00:02<00:00, 40.60batch/s]

[Epoch 47] Train Loss: 0.000436 - Test Loss: 0.000193 - Train Accuracy: 98.59% - Test Accuracy: 99.31%



Epoch 47: 100%|██████████| 600/600 [00:27<00:00, 21.92batch/s]

	LR:  0.010000000000000002



Test 47: 100%|██████████| 100/100 [00:02<00:00, 44.07batch/s]

[Epoch 48] Train Loss: 0.000428 - Test Loss: 0.000197 - Train Accuracy: 98.63% - Test Accuracy: 99.26%



Epoch 48: 100%|██████████| 600/600 [00:26<00:00, 22.74batch/s]

	LR:  0.010000000000000002



Test 48: 100%|██████████| 100/100 [00:02<00:00, 44.60batch/s]

[Epoch 49] Train Loss: 0.000434 - Test Loss: 0.000182 - Train Accuracy: 98.57% - Test Accuracy: 99.33%



Epoch 49: 100%|██████████| 600/600 [00:26<00:00, 22.38batch/s]

	LR:  0.0010000000000000002



Test 49: 100%|██████████| 100/100 [00:02<00:00, 35.77batch/s]

[Epoch 50] Train Loss: 0.000421 - Test Loss: 0.000185 - Train Accuracy: 98.61% - Test Accuracy: 99.30%



Epoch 50: 100%|██████████| 600/600 [00:26<00:00, 22.61batch/s]

	LR:  0.0010000000000000002



Test 50: 100%|██████████| 100/100 [00:02<00:00, 43.67batch/s]

[Epoch 51] Train Loss: 0.000422 - Test Loss: 0.000180 - Train Accuracy: 98.64% - Test Accuracy: 99.30%



Epoch 51: 100%|██████████| 600/600 [00:26<00:00, 22.33batch/s]

	LR:  0.0010000000000000002



Test 51: 100%|██████████| 100/100 [00:02<00:00, 41.54batch/s]

[Epoch 52] Train Loss: 0.000397 - Test Loss: 0.000181 - Train Accuracy: 98.65% - Test Accuracy: 99.29%



Epoch 52: 100%|██████████| 600/600 [00:27<00:00, 22.01batch/s]

	LR:  0.0010000000000000002



Test 52: 100%|██████████| 100/100 [00:02<00:00, 43.52batch/s]

[Epoch 53] Train Loss: 0.000421 - Test Loss: 0.000175 - Train Accuracy: 98.63% - Test Accuracy: 99.32%



Epoch 53: 100%|██████████| 600/600 [00:26<00:00, 22.26batch/s]

	LR:  0.0010000000000000002



Test 53: 100%|██████████| 100/100 [00:02<00:00, 42.29batch/s]

[Epoch 54] Train Loss: 0.000433 - Test Loss: 0.000186 - Train Accuracy: 98.58% - Test Accuracy: 99.31%



Epoch 54: 100%|██████████| 600/600 [00:27<00:00, 21.73batch/s]

	LR:  0.0010000000000000002



Test 54: 100%|██████████| 100/100 [00:02<00:00, 40.16batch/s]

[Epoch 55] Train Loss: 0.000399 - Test Loss: 0.000179 - Train Accuracy: 98.69% - Test Accuracy: 99.31%



Epoch 55: 100%|██████████| 600/600 [00:27<00:00, 21.85batch/s]

	LR:  0.0010000000000000002



Test 55: 100%|██████████| 100/100 [00:02<00:00, 43.33batch/s]

[Epoch 56] Train Loss: 0.000400 - Test Loss: 0.000181 - Train Accuracy: 98.72% - Test Accuracy: 99.35%



Epoch 56: 100%|██████████| 600/600 [00:26<00:00, 22.24batch/s]

	LR:  0.0010000000000000002



Test 56: 100%|██████████| 100/100 [00:02<00:00, 35.48batch/s]

[Epoch 57] Train Loss: 0.000418 - Test Loss: 0.000184 - Train Accuracy: 98.64% - Test Accuracy: 99.32%



Epoch 57: 100%|██████████| 600/600 [00:26<00:00, 22.35batch/s]

	LR:  0.0010000000000000002



Test 57: 100%|██████████| 100/100 [00:02<00:00, 43.39batch/s]

[Epoch 58] Train Loss: 0.000418 - Test Loss: 0.000183 - Train Accuracy: 98.63% - Test Accuracy: 99.30%



Epoch 58: 100%|██████████| 600/600 [00:26<00:00, 22.50batch/s]

	LR:  0.0010000000000000002



Test 58: 100%|██████████| 100/100 [00:02<00:00, 41.34batch/s]

[Epoch 59] Train Loss: 0.000405 - Test Loss: 0.000182 - Train Accuracy: 98.69% - Test Accuracy: 99.33%



Epoch 59: 100%|██████████| 600/600 [00:27<00:00, 22.22batch/s]

	LR:  0.0010000000000000002



Test 59: 100%|██████████| 100/100 [00:02<00:00, 44.68batch/s]

[Epoch 60] Train Loss: 0.000414 - Test Loss: 0.000179 - Train Accuracy: 98.69% - Test Accuracy: 99.32%



Epoch 60: 100%|██████████| 600/600 [00:26<00:00, 22.55batch/s]

	LR:  0.0010000000000000002



Test 60: 100%|██████████| 100/100 [00:02<00:00, 44.14batch/s]

[Epoch 61] Train Loss: 0.000403 - Test Loss: 0.000181 - Train Accuracy: 98.71% - Test Accuracy: 99.32%



Epoch 61: 100%|██████████| 600/600 [00:26<00:00, 22.53batch/s]

	LR:  0.0010000000000000002



Test 61: 100%|██████████| 100/100 [00:02<00:00, 35.07batch/s]

[Epoch 62] Train Loss: 0.000409 - Test Loss: 0.000179 - Train Accuracy: 98.65% - Test Accuracy: 99.33%



Epoch 62: 100%|██████████| 600/600 [00:26<00:00, 22.50batch/s]

	LR:  0.0010000000000000002



Test 62: 100%|██████████| 100/100 [00:02<00:00, 44.35batch/s]

[Epoch 63] Train Loss: 0.000420 - Test Loss: 0.000179 - Train Accuracy: 98.68% - Test Accuracy: 99.31%



Epoch 63: 100%|██████████| 600/600 [00:26<00:00, 22.54batch/s]

	LR:  0.0010000000000000002



Test 63: 100%|██████████| 100/100 [00:02<00:00, 43.78batch/s]

[Epoch 64] Train Loss: 0.000419 - Test Loss: 0.000186 - Train Accuracy: 98.64% - Test Accuracy: 99.29%



Epoch 64: 100%|██████████| 600/600 [00:27<00:00, 21.96batch/s]

	LR:  0.0010000000000000002



Test 64: 100%|██████████| 100/100 [00:02<00:00, 44.64batch/s]

[Epoch 65] Train Loss: 0.000382 - Test Loss: 0.000182 - Train Accuracy: 98.75% - Test Accuracy: 99.31%



Epoch 65: 100%|██████████| 600/600 [00:26<00:00, 22.57batch/s]

	LR:  0.0010000000000000002



Test 65: 100%|██████████| 100/100 [00:02<00:00, 43.60batch/s]

[Epoch 66] Train Loss: 0.000407 - Test Loss: 0.000181 - Train Accuracy: 98.67% - Test Accuracy: 99.33%



Epoch 66: 100%|██████████| 600/600 [00:26<00:00, 22.52batch/s]

	LR:  0.0010000000000000002



Test 66: 100%|██████████| 100/100 [00:02<00:00, 34.28batch/s]

[Epoch 67] Train Loss: 0.000413 - Test Loss: 0.000178 - Train Accuracy: 98.64% - Test Accuracy: 99.39%



Epoch 67: 100%|██████████| 600/600 [00:26<00:00, 22.39batch/s]

	LR:  0.0010000000000000002



Test 67: 100%|██████████| 100/100 [00:02<00:00, 42.54batch/s]

[Epoch 68] Train Loss: 0.000417 - Test Loss: 0.000176 - Train Accuracy: 98.66% - Test Accuracy: 99.33%



Epoch 68: 100%|██████████| 600/600 [00:26<00:00, 22.46batch/s]

	LR:  0.0010000000000000002



Test 68: 100%|██████████| 100/100 [00:02<00:00, 44.34batch/s]

[Epoch 69] Train Loss: 0.000403 - Test Loss: 0.000176 - Train Accuracy: 98.70% - Test Accuracy: 99.30%



Epoch 69: 100%|██████████| 600/600 [00:27<00:00, 22.03batch/s]

	LR:  0.0010000000000000002



Test 69: 100%|██████████| 100/100 [00:02<00:00, 43.73batch/s]

[Epoch 70] Train Loss: 0.000405 - Test Loss: 0.000177 - Train Accuracy: 98.70% - Test Accuracy: 99.32%



Epoch 70: 100%|██████████| 600/600 [00:26<00:00, 22.56batch/s]

	LR:  0.0010000000000000002



Test 70: 100%|██████████| 100/100 [00:02<00:00, 44.06batch/s]

[Epoch 71] Train Loss: 0.000414 - Test Loss: 0.000183 - Train Accuracy: 98.68% - Test Accuracy: 99.34%



Epoch 71: 100%|██████████| 600/600 [00:26<00:00, 22.56batch/s]

	LR:  0.0010000000000000002



Test 71: 100%|██████████| 100/100 [00:02<00:00, 34.85batch/s]

[Epoch 72] Train Loss: 0.000400 - Test Loss: 0.000180 - Train Accuracy: 98.71% - Test Accuracy: 99.32%



Epoch 72: 100%|██████████| 600/600 [00:26<00:00, 22.68batch/s]

	LR:  0.0010000000000000002



Test 72: 100%|██████████| 100/100 [00:02<00:00, 44.37batch/s]

[Epoch 73] Train Loss: 0.000406 - Test Loss: 0.000183 - Train Accuracy: 98.73% - Test Accuracy: 99.31%



Epoch 73: 100%|██████████| 600/600 [00:27<00:00, 21.93batch/s]

	LR:  0.0010000000000000002



Test 73: 100%|██████████| 100/100 [00:02<00:00, 43.16batch/s]

[Epoch 74] Train Loss: 0.000396 - Test Loss: 0.000174 - Train Accuracy: 98.75% - Test Accuracy: 99.32%



Epoch 74: 100%|██████████| 600/600 [00:27<00:00, 21.72batch/s]

	LR:  0.0010000000000000002



Test 74: 100%|██████████| 100/100 [00:02<00:00, 43.32batch/s]

[Epoch 75] Train Loss: 0.000395 - Test Loss: 0.000177 - Train Accuracy: 98.72% - Test Accuracy: 99.32%

BEST TEST ACCURACY:  99.39  in epoch  66



Test 74: 100%|██████████| 100/100 [00:02<00:00, 44.06batch/s]

Final best acc:  99.39



