# **Achieving 99.4% accuracy in MNIST using MLPs**

In [1]:
import torch
import torchvision
import pandas as pd
import torch.nn as nn
from tqdm import tqdm
import multiprocessing
import torch.optim as optim
import torch.nn.functional as  F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

print("Torch version: ", torch. __version__)

####################################################################
# Set Device
####################################################################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: ", device)

Torch version:  2.10.0.dev20251124+cu130
Device:  cuda


In [3]:
####################################################################
# Prepare Data
####################################################################

train_set = torchvision.datasets.MNIST('.data/', train=True, download=True)
#train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)

test_set = torchvision.datasets.MNIST('.data/', train=False, download=True)
#test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True)

print("Train images: ", train_set)
print("Image: ", train_set[0][0])
print("Label: ", train_set[0][1])
print("Label one hot: ", F.one_hot(torch.tensor(train_set[0][1]), num_classes=10))

Train images:  Dataset MNIST
    Number of datapoints: 60000
    Root location: .data/
    Split: Train
Image:  <PIL.Image.Image image mode=L size=28x28 at 0x222FB8CA410>
Label:  5
Label one hot:  tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])


In [4]:
####################################################################
# Dataset Class
####################################################################

class GaussianNoise(object):
    def __init__(self, std=0.05):
        self.std = std

    def __call__(self, tensor):
        return tensor + torch.randn_like(tensor) * self.std

class MNIST_dataset(Dataset):

    def __init__(self, data, partition = "train"):

        print("\nLoading MNIST ", partition, " Dataset...")
        self.data = data
        self.partition = partition
        self.transform_train = transforms.Compose([
            # Rotaciones suaves
            transforms.RandomRotation(10),
            # Desplazamientos y escalados
            transforms.RandomAffine(
                degrees=0,
                translate=(0.1, 0.1),
                scale=(0.9, 1.1)
            ),
            # Deformaciones elásticas
            transforms.ElasticTransform(alpha=15.0, sigma=5.0),
            transforms.ToTensor(),
            GaussianNoise(0.05),
            # Normalización
            transforms.Normalize((0.1307,), (0.3081,)),
        ])
        self.transform_test = transforms.Compose([
            transforms.ToTensor(),
            # Normalización
            transforms.Normalize((0.1307,), (0.3081,)),
        ])
        print("\tTotal Len.: ", len(self.data), "\n", 50*"-")

    # Métodos de herencia obligatorios __len__ y __item__
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Image
        image = self.data[idx][0]
        # PIL Image to torch tensor
        if self.partition == "train":
            image_tensor = self.transform_train(image)
        else:
            image_tensor = self.transform_test(image)
        # care! net expect a 784 size vector and our dataset
        # provide 1x28x28 (channels, height, width) -> Reshape!
        # Con el view -1 pasamos de 28x28 (28, 28) a 764 (, 764)
        image_tensor = image_tensor.view(-1)

        # Label
        label = torch.tensor(self.data[idx][1])
        label = F.one_hot(label, num_classes=10).float()

        return {"img": image_tensor, "label": label}

train_dataset = MNIST_dataset(train_set, partition="train")
test_dataset = MNIST_dataset(test_set, partition="test")


Loading MNIST  train  Dataset...
	Total Len.:  60000 
 --------------------------------------------------

Loading MNIST  test  Dataset...
	Total Len.:  10000 
 --------------------------------------------------


In [5]:
####################################################################
# DataLoader Class
####################################################################

batch_size = 100
# num_workers = multiprocessing.cpu_count()-1
num_workers = 0
print("Num workers", num_workers)
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

Num workers 0


In [6]:
####################################################################
# Neural Network Class
####################################################################

# Creating our Neural Network - Fully Connected
# Hereda de nn.Module
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        # Inicio
        self.linear1 = nn.Linear(784, 1024)
        self.batch_norm1 = nn.BatchNorm1d(1024)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)

        # BLoque residual estándar
        self.residual_bloq1 = nn.Sequential(
            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024)
        )

        # BLoque residual estándar
        self.residual_bloq2 = nn.Sequential(
            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024, 1024),
            nn.BatchNorm1d(1024)
        )
        

        self.classifier = nn.Linear(1024, num_classes)

    # Determina el orden de las capas
    def forward(self, x):
        # Capa inicial
        out = self.linear1(x)
        out = self.batch_norm1(out)
        out = self.relu1(out)
        out = self.dropout1(out)

        # Bloque residual 1
        residual = out
        out = self.residual_bloq1(out)
        out = out + residual
        out = torch.relu(out) # Activación después de la residual

        # Bloque residual 2
        residual = out
        out = self.residual_bloq2(out)
        out = out + residual
        out = torch.relu(out) # Activación después de la residual

        # Clasificador
        out = self.classifier(out)
        return out



# Instantiating the network and printing its architecture
num_classes = 10
net = Net(num_classes)
print(net)

# Cuenta parámetros entrenables
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Params: ", count_parameters(net))

Net(
  (linear1): Linear(in_features=784, out_features=1024, bias=True)
  (batch_norm1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (dropout1): Dropout(p=0.2, inplace=False)
  (residual_bloq1): Sequential(
    (0): Linear(in_features=1024, out_features=1024, bias=True)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=1024, out_features=1024, bias=True)
    (5): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (residual_bloq2): Sequential(
    (0): Linear(in_features=1024, out_features=1024, bias=True)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=1024, out_features=1024, bias=True)
    (5): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=T

In [6]:
####################################################################
# Training settings
####################################################################

# Training hyperparameters
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, weight_decay=1e-6, momentum=0.9)
# scheduler (OneCycleLR)
# steps = longitud del dataset / tamaño de batch
epochs = 100

In [7]:
####################################################################
# Training
####################################################################

# Load model in GPU
net.to(device)

print("\n---- Start Training ----")
best_accuracy = -1
best_epoch = 0
for epoch in range(epochs):


    # TRAIN NETWORK
    train_loss, train_correct = 0, 0
    # Modo de entrenamiento del modelo
    net.train()
    with tqdm(iter(train_dataloader), desc="Epoch " + str(epoch), unit="batch") as tepoch:
        for batch in tepoch:

            # Returned values of Dataset Class
            images = batch["img"].to(device)
            labels = batch["label"].to(device)

            # zero the parameter gradients
            # Por cada step, reiniciar los gradientes para que no haga max
            optimizer.zero_grad()

            # Forward
            outputs = net(images)
            loss = criterion(outputs, labels)

            # Calculate gradients
            loss.backward()

            # Update gradients
            optimizer.step()

            # one hot -> labels
            labels = torch.argmax(labels, dim=1)
            pred = torch.argmax(outputs, dim=1)
            train_correct += pred.eq(labels).sum().item()

            # print statistics
            train_loss += loss.item()

    train_loss /= len(train_dataloader.dataset)

    # TEST NETWORK
    test_loss, test_correct = 0, 0
    net.eval()
    with torch.no_grad():
      with tqdm(iter(test_dataloader), desc="Test " + str(epoch), unit="batch") as tepoch:
          for batch in tepoch:

            images = batch["img"].to(device)
            labels = batch["label"].to(device)

            # Forward
            outputs = net(images)
            test_loss += criterion(outputs, labels)

            # one hot -> labels
            labels = torch.argmax(labels, dim=1)
            pred = torch.argmax(outputs, dim=1)

            test_correct += pred.eq(labels).sum().item()

    test_loss /= len(test_dataloader.dataset)
    test_accuracy = 100. * test_correct / len(test_dataloader.dataset)

    print("[Epoch {}] Train Loss: {:.6f} - Test Loss: {:.6f} - Train Accuracy: {:.2f}% - Test Accuracy: {:.2f}%".format(
        epoch + 1, train_loss, test_loss, 100. * train_correct / len(train_dataloader.dataset), test_accuracy
    ))

    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_epoch = epoch

        # Save best weights
        torch.save(net.state_dict(), "best_model.pt")

print("\nBEST TEST ACCURACY: ", best_accuracy, " in epoch ", best_epoch)


---- Start Training ----


Epoch 0: 100%|██████████| 600/600 [01:39<00:00,  6.04batch/s]
Test 0: 100%|██████████| 100/100 [00:02<00:00, 40.33batch/s]


[Epoch 1] Train Loss: 0.005713 - Test Loss: 0.001174 - Train Accuracy: 82.63% - Test Accuracy: 96.25%


Epoch 1: 100%|██████████| 600/600 [01:39<00:00,  6.05batch/s]
Test 1: 100%|██████████| 100/100 [00:02<00:00, 41.40batch/s]


[Epoch 2] Train Loss: 0.003107 - Test Loss: 0.000827 - Train Accuracy: 90.64% - Test Accuracy: 97.39%


Epoch 2: 100%|██████████| 600/600 [01:41<00:00,  5.93batch/s]
Test 2: 100%|██████████| 100/100 [00:02<00:00, 38.42batch/s]


[Epoch 3] Train Loss: 0.002500 - Test Loss: 0.000696 - Train Accuracy: 92.48% - Test Accuracy: 97.61%


Epoch 3: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 3: 100%|██████████| 100/100 [00:02<00:00, 39.18batch/s]


[Epoch 4] Train Loss: 0.002128 - Test Loss: 0.000639 - Train Accuracy: 93.48% - Test Accuracy: 97.98%


Epoch 4: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 4: 100%|██████████| 100/100 [00:02<00:00, 38.91batch/s]


[Epoch 5] Train Loss: 0.001921 - Test Loss: 0.000623 - Train Accuracy: 94.10% - Test Accuracy: 98.22%


Epoch 5: 100%|██████████| 600/600 [01:45<00:00,  5.71batch/s]
Test 5: 100%|██████████| 100/100 [00:02<00:00, 38.96batch/s]


[Epoch 6] Train Loss: 0.001785 - Test Loss: 0.000557 - Train Accuracy: 94.52% - Test Accuracy: 98.39%


Epoch 6: 100%|██████████| 600/600 [01:44<00:00,  5.72batch/s]
Test 6: 100%|██████████| 100/100 [00:02<00:00, 39.28batch/s]


[Epoch 7] Train Loss: 0.001657 - Test Loss: 0.000494 - Train Accuracy: 94.90% - Test Accuracy: 98.39%


Epoch 7: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 7: 100%|██████████| 100/100 [00:02<00:00, 38.87batch/s]


[Epoch 8] Train Loss: 0.001564 - Test Loss: 0.000502 - Train Accuracy: 95.10% - Test Accuracy: 98.46%


Epoch 8: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 8: 100%|██████████| 100/100 [00:02<00:00, 38.92batch/s]


[Epoch 9] Train Loss: 0.001505 - Test Loss: 0.000415 - Train Accuracy: 95.38% - Test Accuracy: 98.73%


Epoch 9: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 9: 100%|██████████| 100/100 [00:02<00:00, 38.70batch/s]


[Epoch 10] Train Loss: 0.001423 - Test Loss: 0.000423 - Train Accuracy: 95.65% - Test Accuracy: 98.70%


Epoch 10: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 10: 100%|██████████| 100/100 [00:02<00:00, 38.73batch/s]


[Epoch 11] Train Loss: 0.001356 - Test Loss: 0.000434 - Train Accuracy: 95.81% - Test Accuracy: 98.68%


Epoch 11: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 11: 100%|██████████| 100/100 [00:02<00:00, 38.98batch/s]


[Epoch 12] Train Loss: 0.001343 - Test Loss: 0.000386 - Train Accuracy: 95.85% - Test Accuracy: 98.69%


Epoch 12: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 12: 100%|██████████| 100/100 [00:02<00:00, 38.77batch/s]


[Epoch 13] Train Loss: 0.001253 - Test Loss: 0.000398 - Train Accuracy: 96.15% - Test Accuracy: 98.65%


Epoch 13: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 13: 100%|██████████| 100/100 [00:02<00:00, 38.96batch/s]


[Epoch 14] Train Loss: 0.001224 - Test Loss: 0.000344 - Train Accuracy: 96.25% - Test Accuracy: 98.93%


Epoch 14: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 14: 100%|██████████| 100/100 [00:02<00:00, 38.66batch/s]


[Epoch 15] Train Loss: 0.001221 - Test Loss: 0.000409 - Train Accuracy: 96.26% - Test Accuracy: 98.74%


Epoch 15: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 15: 100%|██████████| 100/100 [00:02<00:00, 39.01batch/s]


[Epoch 16] Train Loss: 0.001149 - Test Loss: 0.000358 - Train Accuracy: 96.40% - Test Accuracy: 98.83%


Epoch 16: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 16: 100%|██████████| 100/100 [00:02<00:00, 38.52batch/s]


[Epoch 17] Train Loss: 0.001077 - Test Loss: 0.000353 - Train Accuracy: 96.61% - Test Accuracy: 98.94%


Epoch 17: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 17: 100%|██████████| 100/100 [00:02<00:00, 38.77batch/s]


[Epoch 18] Train Loss: 0.001066 - Test Loss: 0.000337 - Train Accuracy: 96.69% - Test Accuracy: 98.98%


Epoch 18: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 18: 100%|██████████| 100/100 [00:02<00:00, 39.45batch/s]


[Epoch 19] Train Loss: 0.001080 - Test Loss: 0.000331 - Train Accuracy: 96.67% - Test Accuracy: 99.01%


Epoch 19: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 19: 100%|██████████| 100/100 [00:02<00:00, 39.01batch/s]


[Epoch 20] Train Loss: 0.001037 - Test Loss: 0.000371 - Train Accuracy: 96.84% - Test Accuracy: 98.92%


Epoch 20: 100%|██████████| 600/600 [01:45<00:00,  5.67batch/s]
Test 20: 100%|██████████| 100/100 [00:02<00:00, 39.37batch/s]


[Epoch 21] Train Loss: 0.001030 - Test Loss: 0.000316 - Train Accuracy: 96.78% - Test Accuracy: 99.04%


Epoch 21: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 21: 100%|██████████| 100/100 [00:02<00:00, 39.21batch/s]


[Epoch 22] Train Loss: 0.000994 - Test Loss: 0.000310 - Train Accuracy: 96.89% - Test Accuracy: 98.94%


Epoch 22: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 22: 100%|██████████| 100/100 [00:02<00:00, 38.95batch/s]


[Epoch 23] Train Loss: 0.000940 - Test Loss: 0.000309 - Train Accuracy: 97.02% - Test Accuracy: 99.03%


Epoch 23: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 23: 100%|██████████| 100/100 [00:02<00:00, 39.24batch/s]


[Epoch 24] Train Loss: 0.000962 - Test Loss: 0.000312 - Train Accuracy: 96.98% - Test Accuracy: 99.05%


Epoch 24: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 24: 100%|██████████| 100/100 [00:02<00:00, 38.97batch/s]


[Epoch 25] Train Loss: 0.000988 - Test Loss: 0.000306 - Train Accuracy: 96.97% - Test Accuracy: 99.11%


Epoch 25: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 25: 100%|██████████| 100/100 [00:02<00:00, 38.75batch/s]


[Epoch 26] Train Loss: 0.000920 - Test Loss: 0.000295 - Train Accuracy: 97.13% - Test Accuracy: 99.14%


Epoch 26: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 26: 100%|██████████| 100/100 [00:02<00:00, 38.92batch/s]


[Epoch 27] Train Loss: 0.000898 - Test Loss: 0.000310 - Train Accuracy: 97.13% - Test Accuracy: 99.05%


Epoch 27: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 27: 100%|██████████| 100/100 [00:02<00:00, 38.22batch/s]


[Epoch 28] Train Loss: 0.000893 - Test Loss: 0.000286 - Train Accuracy: 97.16% - Test Accuracy: 99.13%


Epoch 28: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 28: 100%|██████████| 100/100 [00:02<00:00, 39.17batch/s]


[Epoch 29] Train Loss: 0.000887 - Test Loss: 0.000305 - Train Accuracy: 97.22% - Test Accuracy: 99.12%


Epoch 29: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 29: 100%|██████████| 100/100 [00:02<00:00, 39.22batch/s]


[Epoch 30] Train Loss: 0.000869 - Test Loss: 0.000295 - Train Accuracy: 97.21% - Test Accuracy: 99.12%


Epoch 30: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 30: 100%|██████████| 100/100 [00:02<00:00, 38.84batch/s]


[Epoch 31] Train Loss: 0.000843 - Test Loss: 0.000272 - Train Accuracy: 97.33% - Test Accuracy: 99.11%


Epoch 31: 100%|██████████| 600/600 [01:45<00:00,  5.67batch/s]
Test 31: 100%|██████████| 100/100 [00:02<00:00, 38.95batch/s]


[Epoch 32] Train Loss: 0.000860 - Test Loss: 0.000291 - Train Accuracy: 97.30% - Test Accuracy: 99.11%


Epoch 32: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 32: 100%|██████████| 100/100 [00:02<00:00, 38.46batch/s]


[Epoch 33] Train Loss: 0.000827 - Test Loss: 0.000276 - Train Accuracy: 97.36% - Test Accuracy: 99.06%


Epoch 33: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 33: 100%|██████████| 100/100 [00:02<00:00, 38.70batch/s]


[Epoch 34] Train Loss: 0.000839 - Test Loss: 0.000278 - Train Accuracy: 97.38% - Test Accuracy: 99.13%


Epoch 34: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 34: 100%|██████████| 100/100 [00:02<00:00, 38.82batch/s]


[Epoch 35] Train Loss: 0.000816 - Test Loss: 0.000251 - Train Accuracy: 97.41% - Test Accuracy: 99.19%


Epoch 35: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 35: 100%|██████████| 100/100 [00:02<00:00, 38.88batch/s]


[Epoch 36] Train Loss: 0.000791 - Test Loss: 0.000251 - Train Accuracy: 97.51% - Test Accuracy: 99.17%


Epoch 36: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 36: 100%|██████████| 100/100 [00:02<00:00, 38.74batch/s]


[Epoch 37] Train Loss: 0.000787 - Test Loss: 0.000226 - Train Accuracy: 97.57% - Test Accuracy: 99.26%


Epoch 37: 100%|██████████| 600/600 [01:45<00:00,  5.67batch/s]
Test 37: 100%|██████████| 100/100 [00:02<00:00, 38.70batch/s]


[Epoch 38] Train Loss: 0.000782 - Test Loss: 0.000244 - Train Accuracy: 97.49% - Test Accuracy: 99.13%


Epoch 38: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 38: 100%|██████████| 100/100 [00:02<00:00, 37.95batch/s]


[Epoch 39] Train Loss: 0.000741 - Test Loss: 0.000230 - Train Accuracy: 97.70% - Test Accuracy: 99.14%


Epoch 39: 100%|██████████| 600/600 [01:45<00:00,  5.66batch/s]
Test 39: 100%|██████████| 100/100 [00:02<00:00, 38.20batch/s]


[Epoch 40] Train Loss: 0.000770 - Test Loss: 0.000247 - Train Accuracy: 97.56% - Test Accuracy: 99.14%


Epoch 40: 100%|██████████| 600/600 [01:45<00:00,  5.67batch/s]
Test 40: 100%|██████████| 100/100 [00:02<00:00, 39.20batch/s]


[Epoch 41] Train Loss: 0.000761 - Test Loss: 0.000224 - Train Accuracy: 97.59% - Test Accuracy: 99.26%


Epoch 41: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 41: 100%|██████████| 100/100 [00:02<00:00, 39.25batch/s]


[Epoch 42] Train Loss: 0.000744 - Test Loss: 0.000261 - Train Accuracy: 97.73% - Test Accuracy: 99.17%


Epoch 42: 100%|██████████| 600/600 [01:46<00:00,  5.66batch/s]
Test 42: 100%|██████████| 100/100 [00:02<00:00, 38.64batch/s]


[Epoch 43] Train Loss: 0.000733 - Test Loss: 0.000243 - Train Accuracy: 97.62% - Test Accuracy: 99.21%


Epoch 43: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 43: 100%|██████████| 100/100 [00:02<00:00, 38.94batch/s]


[Epoch 44] Train Loss: 0.000743 - Test Loss: 0.000258 - Train Accuracy: 97.67% - Test Accuracy: 99.17%


Epoch 44: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 44: 100%|██████████| 100/100 [00:02<00:00, 38.47batch/s]


[Epoch 45] Train Loss: 0.000712 - Test Loss: 0.000251 - Train Accuracy: 97.72% - Test Accuracy: 99.13%


Epoch 45: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 45: 100%|██████████| 100/100 [00:02<00:00, 38.17batch/s]


[Epoch 46] Train Loss: 0.000713 - Test Loss: 0.000314 - Train Accuracy: 97.69% - Test Accuracy: 99.08%


Epoch 46: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 46: 100%|██████████| 100/100 [00:02<00:00, 39.16batch/s]


[Epoch 47] Train Loss: 0.000741 - Test Loss: 0.000260 - Train Accuracy: 97.65% - Test Accuracy: 99.16%


Epoch 47: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 47: 100%|██████████| 100/100 [00:02<00:00, 38.92batch/s]


[Epoch 48] Train Loss: 0.000693 - Test Loss: 0.000220 - Train Accuracy: 97.79% - Test Accuracy: 99.25%


Epoch 48: 100%|██████████| 600/600 [01:45<00:00,  5.67batch/s]
Test 48: 100%|██████████| 100/100 [00:02<00:00, 38.47batch/s]


[Epoch 49] Train Loss: 0.000691 - Test Loss: 0.000255 - Train Accuracy: 97.77% - Test Accuracy: 99.21%


Epoch 49: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 49: 100%|██████████| 100/100 [00:02<00:00, 38.97batch/s]


[Epoch 50] Train Loss: 0.000690 - Test Loss: 0.000243 - Train Accuracy: 97.80% - Test Accuracy: 99.28%


Epoch 50: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 50: 100%|██████████| 100/100 [00:02<00:00, 39.61batch/s]


[Epoch 51] Train Loss: 0.000697 - Test Loss: 0.000221 - Train Accuracy: 97.77% - Test Accuracy: 99.35%


Epoch 51: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 51: 100%|██████████| 100/100 [00:02<00:00, 38.81batch/s]


[Epoch 52] Train Loss: 0.000670 - Test Loss: 0.000242 - Train Accuracy: 97.92% - Test Accuracy: 99.27%


Epoch 52: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 52: 100%|██████████| 100/100 [00:02<00:00, 38.98batch/s]


[Epoch 53] Train Loss: 0.000674 - Test Loss: 0.000255 - Train Accuracy: 97.86% - Test Accuracy: 99.22%


Epoch 53: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 53: 100%|██████████| 100/100 [00:02<00:00, 38.94batch/s]


[Epoch 54] Train Loss: 0.000656 - Test Loss: 0.000218 - Train Accuracy: 97.95% - Test Accuracy: 99.29%


Epoch 54: 100%|██████████| 600/600 [01:45<00:00,  5.71batch/s]
Test 54: 100%|██████████| 100/100 [00:02<00:00, 38.97batch/s]


[Epoch 55] Train Loss: 0.000659 - Test Loss: 0.000229 - Train Accuracy: 97.87% - Test Accuracy: 99.22%


Epoch 55: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 55: 100%|██████████| 100/100 [00:02<00:00, 38.75batch/s]


[Epoch 56] Train Loss: 0.000663 - Test Loss: 0.000191 - Train Accuracy: 97.92% - Test Accuracy: 99.37%


Epoch 56: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 56: 100%|██████████| 100/100 [00:02<00:00, 38.97batch/s]


[Epoch 57] Train Loss: 0.000658 - Test Loss: 0.000227 - Train Accuracy: 97.94% - Test Accuracy: 99.33%


Epoch 57: 100%|██████████| 600/600 [01:45<00:00,  5.67batch/s]
Test 57: 100%|██████████| 100/100 [00:02<00:00, 38.65batch/s]


[Epoch 58] Train Loss: 0.000639 - Test Loss: 0.000262 - Train Accuracy: 97.95% - Test Accuracy: 99.23%


Epoch 58: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 58: 100%|██████████| 100/100 [00:02<00:00, 38.92batch/s]


[Epoch 59] Train Loss: 0.000662 - Test Loss: 0.000202 - Train Accuracy: 97.90% - Test Accuracy: 99.25%


Epoch 59: 100%|██████████| 600/600 [01:45<00:00,  5.67batch/s]
Test 59: 100%|██████████| 100/100 [00:02<00:00, 38.70batch/s]


[Epoch 60] Train Loss: 0.000624 - Test Loss: 0.000235 - Train Accuracy: 98.03% - Test Accuracy: 99.30%


Epoch 60: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 60: 100%|██████████| 100/100 [00:02<00:00, 38.77batch/s]


[Epoch 61] Train Loss: 0.000612 - Test Loss: 0.000234 - Train Accuracy: 97.94% - Test Accuracy: 99.27%


Epoch 61: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 61: 100%|██████████| 100/100 [00:02<00:00, 38.58batch/s]


[Epoch 62] Train Loss: 0.000644 - Test Loss: 0.000205 - Train Accuracy: 97.97% - Test Accuracy: 99.32%


Epoch 62: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 62: 100%|██████████| 100/100 [00:02<00:00, 38.96batch/s]


[Epoch 63] Train Loss: 0.000638 - Test Loss: 0.000208 - Train Accuracy: 97.95% - Test Accuracy: 99.42%


Epoch 63: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 63: 100%|██████████| 100/100 [00:02<00:00, 38.71batch/s]


[Epoch 64] Train Loss: 0.000589 - Test Loss: 0.000188 - Train Accuracy: 98.10% - Test Accuracy: 99.36%


Epoch 64: 100%|██████████| 600/600 [01:45<00:00,  5.67batch/s]
Test 64: 100%|██████████| 100/100 [00:02<00:00, 38.46batch/s]


[Epoch 65] Train Loss: 0.000652 - Test Loss: 0.000233 - Train Accuracy: 97.96% - Test Accuracy: 99.28%


Epoch 65: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 65: 100%|██████████| 100/100 [00:02<00:00, 39.12batch/s]


[Epoch 66] Train Loss: 0.000620 - Test Loss: 0.000192 - Train Accuracy: 98.01% - Test Accuracy: 99.47%


Epoch 66: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 66: 100%|██████████| 100/100 [00:02<00:00, 38.91batch/s]


[Epoch 67] Train Loss: 0.000594 - Test Loss: 0.000251 - Train Accuracy: 98.15% - Test Accuracy: 99.23%


Epoch 67: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 67: 100%|██████████| 100/100 [00:02<00:00, 38.97batch/s]


[Epoch 68] Train Loss: 0.000593 - Test Loss: 0.000199 - Train Accuracy: 98.06% - Test Accuracy: 99.36%


Epoch 68: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 68: 100%|██████████| 100/100 [00:02<00:00, 38.17batch/s]


[Epoch 69] Train Loss: 0.000634 - Test Loss: 0.000216 - Train Accuracy: 98.05% - Test Accuracy: 99.30%


Epoch 69: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 69: 100%|██████████| 100/100 [00:02<00:00, 38.52batch/s]


[Epoch 70] Train Loss: 0.000589 - Test Loss: 0.000218 - Train Accuracy: 98.13% - Test Accuracy: 99.27%


Epoch 70: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 70: 100%|██████████| 100/100 [00:02<00:00, 38.93batch/s]


[Epoch 71] Train Loss: 0.000566 - Test Loss: 0.000238 - Train Accuracy: 98.15% - Test Accuracy: 99.32%


Epoch 71: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 71: 100%|██████████| 100/100 [00:02<00:00, 39.21batch/s]


[Epoch 72] Train Loss: 0.000584 - Test Loss: 0.000233 - Train Accuracy: 98.14% - Test Accuracy: 99.37%


Epoch 72: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 72: 100%|██████████| 100/100 [00:02<00:00, 39.00batch/s]


[Epoch 73] Train Loss: 0.000568 - Test Loss: 0.000238 - Train Accuracy: 98.13% - Test Accuracy: 99.22%


Epoch 73: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 73: 100%|██████████| 100/100 [00:02<00:00, 39.46batch/s]


[Epoch 74] Train Loss: 0.000553 - Test Loss: 0.000222 - Train Accuracy: 98.24% - Test Accuracy: 99.28%


Epoch 74: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 74: 100%|██████████| 100/100 [00:02<00:00, 38.49batch/s]


[Epoch 75] Train Loss: 0.000559 - Test Loss: 0.000221 - Train Accuracy: 98.22% - Test Accuracy: 99.27%


Epoch 75: 100%|██████████| 600/600 [01:46<00:00,  5.66batch/s]
Test 75: 100%|██████████| 100/100 [00:02<00:00, 38.76batch/s]


[Epoch 76] Train Loss: 0.000562 - Test Loss: 0.000194 - Train Accuracy: 98.22% - Test Accuracy: 99.39%


Epoch 76: 100%|██████████| 600/600 [01:45<00:00,  5.68batch/s]
Test 76: 100%|██████████| 100/100 [00:02<00:00, 38.77batch/s]


[Epoch 77] Train Loss: 0.000562 - Test Loss: 0.000213 - Train Accuracy: 98.22% - Test Accuracy: 99.33%


Epoch 77: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 77: 100%|██████████| 100/100 [00:02<00:00, 38.76batch/s]


[Epoch 78] Train Loss: 0.000522 - Test Loss: 0.000210 - Train Accuracy: 98.29% - Test Accuracy: 99.35%


Epoch 78: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 78: 100%|██████████| 100/100 [00:02<00:00, 38.71batch/s]


[Epoch 79] Train Loss: 0.000556 - Test Loss: 0.000216 - Train Accuracy: 98.18% - Test Accuracy: 99.29%


Epoch 79: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 79: 100%|██████████| 100/100 [00:02<00:00, 39.23batch/s]


[Epoch 80] Train Loss: 0.000552 - Test Loss: 0.000209 - Train Accuracy: 98.23% - Test Accuracy: 99.33%


Epoch 80: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 80: 100%|██████████| 100/100 [00:02<00:00, 39.52batch/s]


[Epoch 81] Train Loss: 0.000545 - Test Loss: 0.000200 - Train Accuracy: 98.22% - Test Accuracy: 99.43%


Epoch 81: 100%|██████████| 600/600 [01:45<00:00,  5.67batch/s]
Test 81: 100%|██████████| 100/100 [00:02<00:00, 38.73batch/s]


[Epoch 82] Train Loss: 0.000536 - Test Loss: 0.000233 - Train Accuracy: 98.26% - Test Accuracy: 99.31%


Epoch 82: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 82: 100%|██████████| 100/100 [00:02<00:00, 38.94batch/s]


[Epoch 83] Train Loss: 0.000537 - Test Loss: 0.000218 - Train Accuracy: 98.30% - Test Accuracy: 99.27%


Epoch 83: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 83: 100%|██████████| 100/100 [00:02<00:00, 38.95batch/s]


[Epoch 84] Train Loss: 0.000535 - Test Loss: 0.000206 - Train Accuracy: 98.25% - Test Accuracy: 99.30%


Epoch 84: 100%|██████████| 600/600 [01:44<00:00,  5.72batch/s]
Test 84: 100%|██████████| 100/100 [00:02<00:00, 39.08batch/s]


[Epoch 85] Train Loss: 0.000542 - Test Loss: 0.000200 - Train Accuracy: 98.31% - Test Accuracy: 99.34%


Epoch 85: 100%|██████████| 600/600 [01:44<00:00,  5.73batch/s]
Test 85: 100%|██████████| 100/100 [00:02<00:00, 38.46batch/s]


[Epoch 86] Train Loss: 0.000536 - Test Loss: 0.000216 - Train Accuracy: 98.28% - Test Accuracy: 99.23%


Epoch 86: 100%|██████████| 600/600 [01:45<00:00,  5.71batch/s]
Test 86: 100%|██████████| 100/100 [00:02<00:00, 38.69batch/s]


[Epoch 87] Train Loss: 0.000521 - Test Loss: 0.000200 - Train Accuracy: 98.30% - Test Accuracy: 99.36%


Epoch 87: 100%|██████████| 600/600 [01:45<00:00,  5.71batch/s]
Test 87: 100%|██████████| 100/100 [00:02<00:00, 38.94batch/s]


[Epoch 88] Train Loss: 0.000544 - Test Loss: 0.000236 - Train Accuracy: 98.19% - Test Accuracy: 99.22%


Epoch 88: 100%|██████████| 600/600 [01:44<00:00,  5.73batch/s]
Test 88: 100%|██████████| 100/100 [00:02<00:00, 38.51batch/s]


[Epoch 89] Train Loss: 0.000541 - Test Loss: 0.000184 - Train Accuracy: 98.31% - Test Accuracy: 99.35%


Epoch 89: 100%|██████████| 600/600 [01:45<00:00,  5.71batch/s]
Test 89: 100%|██████████| 100/100 [00:02<00:00, 38.96batch/s]


[Epoch 90] Train Loss: 0.000523 - Test Loss: 0.000178 - Train Accuracy: 98.29% - Test Accuracy: 99.39%


Epoch 90: 100%|██████████| 600/600 [01:44<00:00,  5.72batch/s]
Test 90: 100%|██████████| 100/100 [00:02<00:00, 39.41batch/s]


[Epoch 91] Train Loss: 0.000529 - Test Loss: 0.000183 - Train Accuracy: 98.28% - Test Accuracy: 99.41%


Epoch 91: 100%|██████████| 600/600 [01:45<00:00,  5.71batch/s]
Test 91: 100%|██████████| 100/100 [00:02<00:00, 39.23batch/s]


[Epoch 92] Train Loss: 0.000504 - Test Loss: 0.000218 - Train Accuracy: 98.35% - Test Accuracy: 99.26%


Epoch 92: 100%|██████████| 600/600 [01:45<00:00,  5.69batch/s]
Test 92: 100%|██████████| 100/100 [00:02<00:00, 39.21batch/s]


[Epoch 93] Train Loss: 0.000513 - Test Loss: 0.000196 - Train Accuracy: 98.36% - Test Accuracy: 99.33%


Epoch 93: 100%|██████████| 600/600 [01:44<00:00,  5.72batch/s]
Test 93: 100%|██████████| 100/100 [00:02<00:00, 38.95batch/s]


[Epoch 94] Train Loss: 0.000526 - Test Loss: 0.000186 - Train Accuracy: 98.31% - Test Accuracy: 99.35%


Epoch 94: 100%|██████████| 600/600 [01:44<00:00,  5.74batch/s]
Test 94: 100%|██████████| 100/100 [00:02<00:00, 38.66batch/s]


[Epoch 95] Train Loss: 0.000499 - Test Loss: 0.000225 - Train Accuracy: 98.41% - Test Accuracy: 99.36%


Epoch 95: 100%|██████████| 600/600 [01:44<00:00,  5.73batch/s]
Test 95: 100%|██████████| 100/100 [00:02<00:00, 39.34batch/s]


[Epoch 96] Train Loss: 0.000523 - Test Loss: 0.000202 - Train Accuracy: 98.31% - Test Accuracy: 99.38%


Epoch 96: 100%|██████████| 600/600 [01:44<00:00,  5.73batch/s]
Test 96: 100%|██████████| 100/100 [00:02<00:00, 38.72batch/s]


[Epoch 97] Train Loss: 0.000511 - Test Loss: 0.000181 - Train Accuracy: 98.37% - Test Accuracy: 99.44%


Epoch 97: 100%|██████████| 600/600 [01:44<00:00,  5.73batch/s]
Test 97: 100%|██████████| 100/100 [00:02<00:00, 39.22batch/s]


[Epoch 98] Train Loss: 0.000511 - Test Loss: 0.000179 - Train Accuracy: 98.37% - Test Accuracy: 99.46%


Epoch 98: 100%|██████████| 600/600 [01:45<00:00,  5.70batch/s]
Test 98: 100%|██████████| 100/100 [00:02<00:00, 38.96batch/s]


[Epoch 99] Train Loss: 0.000520 - Test Loss: 0.000197 - Train Accuracy: 98.36% - Test Accuracy: 99.40%


Epoch 99: 100%|██████████| 600/600 [01:44<00:00,  5.73batch/s]
Test 99: 100%|██████████| 100/100 [00:02<00:00, 38.97batch/s]

[Epoch 100] Train Loss: 0.000499 - Test Loss: 0.000230 - Train Accuracy: 98.38% - Test Accuracy: 99.31%

BEST TEST ACCURACY:  99.47  in epoch  65





In [8]:
import torch 
print("Torch:", torch.__version__)

Torch: 2.10.0.dev20251124+cu130
