In [6]:
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

In [2]:
import tensorflow

(x_train, y_train), (x_test, y_test) = tensorflow.keras.datasets.mnist.load_data()
x_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.uint8)
x_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.uint8)

2025-01-22 07:09:28.778886: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-22 07:09:28.880290: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737509968.918919    6301 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737509968.930178    6301 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-22 07:09:29.023216: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [18]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.ff1 = nn.Linear(784, 100)
        self.ff2 = nn.Linear(100, 100)
        self.ff3 = nn.Linear(100, 100)
        self.output = nn.Linear(100, 10)
        self.activation = nn.ReLU()
        self.loss = nn.CrossEntropyLoss()
        self.trainloader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(x_train, y_train),
            batch_size=1000,
            shuffle=True,
        )
        self.validloader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(x_test, y_test),
            batch_size=1000,
            shuffle=True,
        )
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer, mode="min", factor=0.5, patience=15, verbose=True
        )

    def forward(self, x):
        x = self.ff1(x)
        x = self.activation(x)
        x = self.ff2(x)
        x = self.activation(x)
        x = self.ff3(x)
        x = self.activation(x)
        x = self.output(x)
        return x

    def train_loop(self, run_name, epochs=100):
        previous_lr = self.optimizer.param_groups[0]["lr"]
        writer = SummaryWriter(log_dir=f"runs/{run_name}")
        for i in range(epochs):
            t = 0
            self.train()
            train_progress = tqdm(
                enumerate(self.trainloader),
                desc=f"Epoch {i} - Training",
                total=len(self.trainloader),
            )
            for j, k in train_progress:
                image, label = k
                image, label = image.to("cuda"), label.to("cuda")
                self.optimizer.zero_grad()
                output = self(image.view(-1, 784))
                l = self.loss(output, label)
                l.backward()
                self.optimizer.step()
                t += l.item()
                train_progress.set_postfix(loss=t / (j + 1))
            t /= len(self.trainloader)
            val_loss = self.val_loop(i)
            self.lr_scheduler.step(val_loss)
            writer.add_scalars("Losses", {"Train": t, "Validation": val_loss}, i)
            writer.flush()
            current_lr = self.optimizer.param_groups[0]["lr"]
            if current_lr != previous_lr:
                print(f"Learning Rate changed to {current_lr}")
                previous_lr = current_lr
        writer.close()

    def val_loop(self, i):
        t = 0
        self.eval()
        val_progress = tqdm(
            enumerate(self.validloader),
            desc=f"Epoch {i} - Validation",
            total=len(self.validloader),
        )
        correct = 0
        for j, k in val_progress:
            image, label = k
            image, label = image.to("cuda"), label.to("cuda")
            output = self(image.view(-1, 784))
            l = self.loss(output, label)
            pred = output.softmax(dim=1).argmax(dim=1)
            correct += (pred == label).sum().item()
            t += l.item()
            val_progress.set_postfix(loss=t / (j + 1))
            val_progress.set_postfix(accuracy=correct / ((j + 1) * 1000))
        t /= len(self.validloader)
        return t

In [19]:
model = Model().to("cuda")
model.train_loop("No-Regularization")

Epoch 0 - Training: 100%|██████████| 60/60 [00:00<00:00, 152.68it/s, loss=0.553]
Epoch 0 - Validation: 100%|██████████| 10/10 [00:00<00:00, 145.38it/s, accuracy=0.943]
Epoch 1 - Training: 100%|██████████| 60/60 [00:00<00:00, 123.40it/s, loss=0.158]
Epoch 1 - Validation: 100%|██████████| 10/10 [00:00<00:00, 172.04it/s, accuracy=0.957]
Epoch 2 - Training: 100%|██████████| 60/60 [00:00<00:00, 147.36it/s, loss=0.113]
Epoch 2 - Validation: 100%|██████████| 10/10 [00:00<00:00, 179.76it/s, accuracy=0.964]
Epoch 3 - Training: 100%|██████████| 60/60 [00:00<00:00, 123.70it/s, loss=0.086] 
Epoch 3 - Validation: 100%|██████████| 10/10 [00:00<00:00, 148.30it/s, accuracy=0.967]
Epoch 4 - Training: 100%|██████████| 60/60 [00:00<00:00, 168.44it/s, loss=0.0658]
Epoch 4 - Validation: 100%|██████████| 10/10 [00:00<00:00, 170.42it/s, accuracy=0.968]
Epoch 5 - Training: 100%|██████████| 60/60 [00:00<00:00, 166.16it/s, loss=0.0532]
Epoch 5 - Validation: 100%|██████████| 10/10 [00:00<00:00, 54.23it/s, accura

Learning Rate changed to 0.0005


Epoch 23 - Training: 100%|██████████| 60/60 [00:00<00:00, 114.35it/s, loss=0.00294]
Epoch 23 - Validation: 100%|██████████| 10/10 [00:00<00:00, 159.18it/s, accuracy=0.977]
Epoch 24 - Training: 100%|██████████| 60/60 [00:00<00:00, 161.77it/s, loss=0.000865]
Epoch 24 - Validation: 100%|██████████| 10/10 [00:00<00:00, 155.23it/s, accuracy=0.978]
Epoch 25 - Training: 100%|██████████| 60/60 [00:00<00:00, 118.28it/s, loss=0.000383]
Epoch 25 - Validation: 100%|██████████| 10/10 [00:00<00:00, 157.31it/s, accuracy=0.978]
Epoch 26 - Training: 100%|██████████| 60/60 [00:00<00:00, 150.67it/s, loss=0.000302]
Epoch 26 - Validation: 100%|██████████| 10/10 [00:00<00:00, 168.85it/s, accuracy=0.978]
Epoch 27 - Training: 100%|██████████| 60/60 [00:00<00:00, 156.26it/s, loss=0.000265]
Epoch 27 - Validation: 100%|██████████| 10/10 [00:00<00:00, 151.23it/s, accuracy=0.978]
Epoch 28 - Training: 100%|██████████| 60/60 [00:00<00:00, 125.45it/s, loss=0.000234]
Epoch 28 - Validation: 100%|██████████| 10/10 [00:0

Learning Rate changed to 0.00025


Epoch 39 - Training: 100%|██████████| 60/60 [00:00<00:00, 169.00it/s, loss=0.000111]
Epoch 39 - Validation: 100%|██████████| 10/10 [00:00<00:00, 127.97it/s, accuracy=0.979]
Epoch 40 - Training: 100%|██████████| 60/60 [00:00<00:00, 130.58it/s, loss=0.000108]
Epoch 40 - Validation: 100%|██████████| 10/10 [00:00<00:00, 142.99it/s, accuracy=0.979]
Epoch 41 - Training: 100%|██████████| 60/60 [00:00<00:00, 169.22it/s, loss=0.000105]
Epoch 41 - Validation: 100%|██████████| 10/10 [00:00<00:00, 152.74it/s, accuracy=0.979]
Epoch 42 - Training: 100%|██████████| 60/60 [00:00<00:00, 170.26it/s, loss=0.000103]
Epoch 42 - Validation: 100%|██████████| 10/10 [00:00<00:00, 139.74it/s, accuracy=0.979]
Epoch 43 - Training: 100%|██████████| 60/60 [00:00<00:00, 128.91it/s, loss=0.0001]  
Epoch 43 - Validation: 100%|██████████| 10/10 [00:00<00:00, 125.26it/s, accuracy=0.979]
Epoch 44 - Training: 100%|██████████| 60/60 [00:00<00:00, 167.19it/s, loss=9.79e-5] 
Epoch 44 - Validation: 100%|██████████| 10/10 [00:

Learning Rate changed to 0.000125


Epoch 55 - Training: 100%|██████████| 60/60 [00:00<00:00, 127.41it/s, loss=7.34e-5]
Epoch 55 - Validation: 100%|██████████| 10/10 [00:00<00:00, 144.08it/s, accuracy=0.979]
Epoch 56 - Training: 100%|██████████| 60/60 [00:00<00:00, 160.46it/s, loss=7.25e-5]
Epoch 56 - Validation: 100%|██████████| 10/10 [00:00<00:00, 180.67it/s, accuracy=0.979]
Epoch 57 - Training: 100%|██████████| 60/60 [00:00<00:00, 166.07it/s, loss=7.15e-5]
Epoch 57 - Validation: 100%|██████████| 10/10 [00:00<00:00, 54.61it/s, accuracy=0.979]
Epoch 58 - Training: 100%|██████████| 60/60 [00:00<00:00, 155.11it/s, loss=7.06e-5]
Epoch 58 - Validation: 100%|██████████| 10/10 [00:00<00:00, 183.41it/s, accuracy=0.979]
Epoch 59 - Training: 100%|██████████| 60/60 [00:00<00:00, 158.31it/s, loss=6.97e-5]
Epoch 59 - Validation: 100%|██████████| 10/10 [00:00<00:00, 173.17it/s, accuracy=0.979]
Epoch 60 - Training: 100%|██████████| 60/60 [00:00<00:00, 120.02it/s, loss=6.87e-5]
Epoch 60 - Validation: 100%|██████████| 10/10 [00:00<00:0

Learning Rate changed to 6.25e-05


Epoch 71 - Training: 100%|██████████| 60/60 [00:00<00:00, 148.69it/s, loss=5.81e-5]
Epoch 71 - Validation: 100%|██████████| 10/10 [00:00<00:00, 151.17it/s, accuracy=0.979]
Epoch 72 - Training: 100%|██████████| 60/60 [00:00<00:00, 114.17it/s, loss=5.76e-5]
Epoch 72 - Validation: 100%|██████████| 10/10 [00:00<00:00, 167.05it/s, accuracy=0.979]
Epoch 73 - Training: 100%|██████████| 60/60 [00:00<00:00, 164.01it/s, loss=5.71e-5]
Epoch 73 - Validation: 100%|██████████| 10/10 [00:00<00:00, 155.88it/s, accuracy=0.979]
Epoch 74 - Training: 100%|██████████| 60/60 [00:00<00:00, 166.14it/s, loss=5.66e-5]
Epoch 74 - Validation: 100%|██████████| 10/10 [00:00<00:00, 172.39it/s, accuracy=0.979]
Epoch 75 - Training: 100%|██████████| 60/60 [00:00<00:00, 118.99it/s, loss=5.61e-5]
Epoch 75 - Validation: 100%|██████████| 10/10 [00:00<00:00, 181.00it/s, accuracy=0.979]
Epoch 76 - Training: 100%|██████████| 60/60 [00:00<00:00, 160.54it/s, loss=5.56e-5]
Epoch 76 - Validation: 100%|██████████| 10/10 [00:00<00:

Learning Rate changed to 3.125e-05


Epoch 87 - Training: 100%|██████████| 60/60 [00:00<00:00, 120.95it/s, loss=4.97e-5]
Epoch 87 - Validation: 100%|██████████| 10/10 [00:00<00:00, 164.07it/s, accuracy=0.979]
Epoch 88 - Training: 100%|██████████| 60/60 [00:00<00:00, 166.68it/s, loss=4.95e-5]
Epoch 88 - Validation: 100%|██████████| 10/10 [00:00<00:00, 161.31it/s, accuracy=0.979]
Epoch 89 - Training: 100%|██████████| 60/60 [00:00<00:00, 156.01it/s, loss=4.92e-5]
Epoch 89 - Validation: 100%|██████████| 10/10 [00:00<00:00, 147.28it/s, accuracy=0.979]
Epoch 90 - Training: 100%|██████████| 60/60 [00:00<00:00, 128.79it/s, loss=4.89e-5]
Epoch 90 - Validation: 100%|██████████| 10/10 [00:00<00:00, 157.40it/s, accuracy=0.979]
Epoch 91 - Training: 100%|██████████| 60/60 [00:00<00:00, 157.82it/s, loss=4.86e-5]
Epoch 91 - Validation: 100%|██████████| 10/10 [00:00<00:00, 156.13it/s, accuracy=0.979]
Epoch 92 - Training: 100%|██████████| 60/60 [00:00<00:00, 125.34it/s, loss=4.83e-5]
Epoch 92 - Validation: 100%|██████████| 10/10 [00:00<00: