In [1]:
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

2025-01-22 21:33:22.679363: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-22 21:33:22.769173: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737561802.808723    6029 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737561802.820075    6029 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-22 21:33:22.915395: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
import tensorflow

(x_train, y_train), (x_test, y_test) = tensorflow.keras.datasets.mnist.load_data()
x_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.uint8)
x_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.uint8)
x_train = x_train
x_test = x_test

In [3]:
class Model(nn.Module):
    def __init__(self, patience=4, dropout=0):
        super(Model, self).__init__()
        self.ff1 = nn.Linear(784, 100)
        self.ff2 = nn.Linear(100, 100)
        self.ff3 = nn.Linear(100, 100)
        self.output = nn.Linear(100, 10)
        self.activation = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.loss = nn.CrossEntropyLoss()
        self.trainloader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(x_train, y_train),
            batch_size=1000,
            shuffle=True,
        )
        self.validloader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(x_test, y_test),
            batch_size=1000,
            shuffle=True,
        )
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer, mode="min", factor=0.5, patience=15
        )
        self.early_stopping_patience = patience
        self.early_stopping_counter = 0
        self.early_stopping_best_loss = float("inf")

    def forward(self, x):
        x = self.ff1(x)
        x = self.activation(x)
        x = self.dropout(x)
        x = self.ff2(x)
        x = self.activation(x)
        x = self.dropout(x)
        x = self.ff3(x)
        x = self.activation(x)
        x = self.dropout(x)
        x = self.output(x)
        return x

    def early_stopping(self, val_loss):
        if val_loss < self.early_stopping_best_loss:
            self.early_stopping_best_loss = val_loss
            self.early_stopping_counter = 0
        else:
            self.early_stopping_counter += 1
            if self.early_stopping_counter >= self.early_stopping_patience:
                return True
        return False



    def train_loop(self, run_name, epochs=100, l1=False, l1_lambda=0.0001, early_stopping=False):
        previous_lr = self.optimizer.param_groups[0]["lr"]
        writer = SummaryWriter(log_dir=f"runs/{run_name}")
        for i in range(epochs):
            t = 0
            self.train()
            train_progress = tqdm(
                enumerate(self.trainloader),
                desc=f"Epoch {i} - Training",
                total=len(self.trainloader),
            )
            for j, k in train_progress:
                image, label = k
                image, label = image.to("cuda"), label.to("cuda")
                self.optimizer.zero_grad()
                output = self(image.view(-1, 784))
                l = self.loss(output, label)
                if l1:
                    l1_norm = sum(p.abs().sum() for p in self.parameters())
                    l += l + l1_lambda * l1_norm
                l.backward()
                self.optimizer.step()
                t += l.item()
                train_progress.set_postfix(loss=t / (j + 1))
            t /= len(self.trainloader)
            val_loss = self.val_loop(i)
            if early_stopping and self.early_stopping(val_loss):
                print("No improvement, early stopping")
                break
            self.lr_scheduler.step(val_loss)
            writer.add_scalars("Losses", {"Train": t, "Validation": val_loss}, i)
            writer.flush()
            current_lr = self.optimizer.param_groups[0]["lr"]
            if current_lr != previous_lr:
                print(f"Learning Rate changed to {current_lr}")
                previous_lr = current_lr
        writer.close()

    def val_loop(self, i):
        t = 0
        self.eval()
        val_progress = tqdm(
            enumerate(self.validloader),
            desc=f"Epoch {i} - Validation",
            total=len(self.validloader),
        )
        correct = 0
        for j, k in val_progress:
            image, label = k
            image, label = image.to("cuda"), label.to("cuda")
            output = self(image.view(-1, 784))
            l = self.loss(output, label)
            pred = output.softmax(dim=1).argmax(dim=1)
            correct += (pred == label).sum().item()
            t += l.item()
            val_progress.set_postfix(
                loss=t / (j + 1), accuracy=correct / ((j + 1) * 1000)
            )
        t /= len(self.validloader)
        return t

In [4]:
model = Model().to("cuda")
model.train_loop("No-Regularization", 50)

Epoch 0 - Training: 100%|██████████| 60/60 [00:00<00:00, 100.03it/s, loss=0.881]
Epoch 0 - Validation: 100%|██████████| 10/10 [00:00<00:00, 130.87it/s, accuracy=0.919, loss=0.269]
Epoch 1 - Training: 100%|██████████| 60/60 [00:00<00:00, 166.16it/s, loss=0.224]
Epoch 1 - Validation: 100%|██████████| 10/10 [00:00<00:00, 184.48it/s, accuracy=0.946, loss=0.185]
Epoch 2 - Training: 100%|██████████| 60/60 [00:00<00:00, 132.15it/s, loss=0.159]
Epoch 2 - Validation: 100%|██████████| 10/10 [00:00<00:00, 182.90it/s, accuracy=0.955, loss=0.155]
Epoch 3 - Training: 100%|██████████| 60/60 [00:00<00:00, 163.48it/s, loss=0.127]
Epoch 3 - Validation: 100%|██████████| 10/10 [00:00<00:00, 202.48it/s, accuracy=0.959, loss=0.138]
Epoch 4 - Training: 100%|██████████| 60/60 [00:00<00:00, 169.87it/s, loss=0.106]
Epoch 4 - Validation: 100%|██████████| 10/10 [00:00<00:00, 61.18it/s, accuracy=0.963, loss=0.122]
Epoch 5 - Training: 100%|██████████| 60/60 [00:00<00:00, 162.21it/s, loss=0.0843]
Epoch 5 - Validatio

Learning Rate changed to 0.0005


Epoch 28 - Training: 100%|██████████| 60/60 [00:00<00:00, 115.11it/s, loss=0.00884]
Epoch 28 - Validation: 100%|██████████| 10/10 [00:00<00:00, 123.50it/s, accuracy=0.972, loss=0.143]
Epoch 29 - Training: 100%|██████████| 60/60 [00:00<00:00, 124.00it/s, loss=0.00222]
Epoch 29 - Validation: 100%|██████████| 10/10 [00:00<00:00, 170.76it/s, accuracy=0.975, loss=0.135]
Epoch 30 - Training: 100%|██████████| 60/60 [00:00<00:00, 114.56it/s, loss=0.00118]
Epoch 30 - Validation: 100%|██████████| 10/10 [00:00<00:00, 177.26it/s, accuracy=0.975, loss=0.135]
Epoch 31 - Training: 100%|██████████| 60/60 [00:00<00:00, 159.07it/s, loss=0.000809]
Epoch 31 - Validation: 100%|██████████| 10/10 [00:00<00:00, 199.45it/s, accuracy=0.976, loss=0.137]
Epoch 32 - Training: 100%|██████████| 60/60 [00:00<00:00, 128.38it/s, loss=0.000642]
Epoch 32 - Validation: 100%|██████████| 10/10 [00:00<00:00, 138.08it/s, accuracy=0.976, loss=0.138]
Epoch 33 - Training: 100%|██████████| 60/60 [00:00<00:00, 161.67it/s, loss=0.0

Learning Rate changed to 0.00025


Epoch 44 - Training: 100%|██████████| 60/60 [00:00<00:00, 126.32it/s, loss=0.000235]
Epoch 44 - Validation: 100%|██████████| 10/10 [00:00<00:00, 197.11it/s, accuracy=0.975, loss=0.147]
Epoch 45 - Training: 100%|██████████| 60/60 [00:00<00:00, 162.11it/s, loss=0.000229]
Epoch 45 - Validation: 100%|██████████| 10/10 [00:00<00:00, 158.28it/s, accuracy=0.975, loss=0.147]
Epoch 46 - Training: 100%|██████████| 60/60 [00:00<00:00, 127.04it/s, loss=0.000224]
Epoch 46 - Validation: 100%|██████████| 10/10 [00:00<00:00, 182.00it/s, accuracy=0.975, loss=0.148]
Epoch 47 - Training: 100%|██████████| 60/60 [00:00<00:00, 162.48it/s, loss=0.000218]
Epoch 47 - Validation: 100%|██████████| 10/10 [00:00<00:00, 150.37it/s, accuracy=0.975, loss=0.148]
Epoch 48 - Training: 100%|██████████| 60/60 [00:00<00:00, 124.37it/s, loss=0.000212]
Epoch 48 - Validation: 100%|██████████| 10/10 [00:00<00:00, 177.08it/s, accuracy=0.975, loss=0.148]
Epoch 49 - Training: 100%|██████████| 60/60 [00:00<00:00, 162.87it/s, loss=

In [5]:
model = Model().to("cuda")
model.train_loop("L1-Regularization", 50, l1=True)

Epoch 0 - Training: 100%|██████████| 60/60 [00:00<00:00, 143.27it/s, loss=1.61]
Epoch 0 - Validation: 100%|██████████| 10/10 [00:00<00:00, 191.96it/s, accuracy=0.927, loss=0.237]
Epoch 1 - Training: 100%|██████████| 60/60 [00:00<00:00, 111.00it/s, loss=0.58] 
Epoch 1 - Validation: 100%|██████████| 10/10 [00:00<00:00, 172.16it/s, accuracy=0.951, loss=0.165]
Epoch 2 - Training: 100%|██████████| 60/60 [00:00<00:00, 144.35it/s, loss=0.463]
Epoch 2 - Validation: 100%|██████████| 10/10 [00:00<00:00, 159.68it/s, accuracy=0.958, loss=0.138]
Epoch 3 - Training: 100%|██████████| 60/60 [00:00<00:00, 118.65it/s, loss=0.391]
Epoch 3 - Validation: 100%|██████████| 10/10 [00:00<00:00, 179.99it/s, accuracy=0.966, loss=0.119]
Epoch 4 - Training: 100%|██████████| 60/60 [00:00<00:00, 148.87it/s, loss=0.343]
Epoch 4 - Validation: 100%|██████████| 10/10 [00:00<00:00, 150.69it/s, accuracy=0.969, loss=0.105]
Epoch 5 - Training: 100%|██████████| 60/60 [00:00<00:00, 121.55it/s, loss=0.315]
Epoch 5 - Validation

Learning Rate changed to 0.0005


Epoch 33 - Training: 100%|██████████| 60/60 [00:00<00:00, 120.48it/s, loss=0.144]
Epoch 33 - Validation: 100%|██████████| 10/10 [00:00<00:00, 189.36it/s, accuracy=0.978, loss=0.0923]
Epoch 34 - Training: 100%|██████████| 60/60 [00:00<00:00, 154.15it/s, loss=0.132]
Epoch 34 - Validation: 100%|██████████| 10/10 [00:00<00:00, 172.15it/s, accuracy=0.978, loss=0.0934]
Epoch 35 - Training: 100%|██████████| 60/60 [00:00<00:00, 119.09it/s, loss=0.128]
Epoch 35 - Validation: 100%|██████████| 10/10 [00:00<00:00, 160.57it/s, accuracy=0.979, loss=0.092]
Epoch 36 - Training: 100%|██████████| 60/60 [00:00<00:00, 154.99it/s, loss=0.125]
Epoch 36 - Validation: 100%|██████████| 10/10 [00:00<00:00, 188.52it/s, accuracy=0.979, loss=0.0922]
Epoch 37 - Training: 100%|██████████| 60/60 [00:00<00:00, 119.96it/s, loss=0.123]
Epoch 37 - Validation: 100%|██████████| 10/10 [00:00<00:00, 168.81it/s, accuracy=0.979, loss=0.0922]
Epoch 38 - Training: 100%|██████████| 60/60 [00:00<00:00, 145.09it/s, loss=0.12] 
Epoc

In [6]:
model = Model().to("cuda")
model.optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
model.train_loop("L2-Regularization", 50)

Epoch 0 - Training: 100%|██████████| 60/60 [00:00<00:00, 172.24it/s, loss=0.634]
Epoch 0 - Validation: 100%|██████████| 10/10 [00:00<00:00, 193.67it/s, accuracy=0.932, loss=0.223]
Epoch 1 - Training: 100%|██████████| 60/60 [00:00<00:00, 121.95it/s, loss=0.18] 
Epoch 1 - Validation: 100%|██████████| 10/10 [00:00<00:00, 171.93it/s, accuracy=0.953, loss=0.152]
Epoch 2 - Training: 100%|██████████| 60/60 [00:00<00:00, 164.36it/s, loss=0.123]
Epoch 2 - Validation: 100%|██████████| 10/10 [00:00<00:00, 160.42it/s, accuracy=0.963, loss=0.119]
Epoch 3 - Training: 100%|██████████| 60/60 [00:00<00:00, 168.75it/s, loss=0.0968]
Epoch 3 - Validation: 100%|██████████| 10/10 [00:00<00:00, 59.92it/s, accuracy=0.961, loss=0.122]
Epoch 4 - Training: 100%|██████████| 60/60 [00:00<00:00, 170.98it/s, loss=0.077] 
Epoch 4 - Validation: 100%|██████████| 10/10 [00:00<00:00, 179.32it/s, accuracy=0.966, loss=0.117]
Epoch 5 - Training: 100%|██████████| 60/60 [00:00<00:00, 162.38it/s, loss=0.0639]
Epoch 5 - Validat

In [7]:
from torchvision import transforms


transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
    transforms.ToTensor()
])

x_train_transformed = torch.stack([transform(img) for img in x_train.unsqueeze(1)])
x_test_transformed = torch.stack([transform(img) for img in x_test.unsqueeze(1)])

In [8]:
model = Model().to("cuda")
model.trainloader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(x_train_transformed, y_train),
    batch_size=1000,
    shuffle=True,
)
model.validloader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(x_test_transformed, y_test),
    batch_size=1000,
    shuffle=True,
)

model.train_loop("DataAug", 50)

Epoch 0 - Training: 100%|██████████| 60/60 [00:00<00:00, 168.00it/s, loss=1.87]
Epoch 0 - Validation: 100%|██████████| 10/10 [00:00<00:00, 198.36it/s, accuracy=0.51, loss=1.43]
Epoch 1 - Training: 100%|██████████| 60/60 [00:00<00:00, 155.46it/s, loss=1.34]
Epoch 1 - Validation: 100%|██████████| 10/10 [00:00<00:00, 191.13it/s, accuracy=0.568, loss=1.25]
Epoch 2 - Training: 100%|██████████| 60/60 [00:00<00:00, 126.48it/s, loss=1.17]
Epoch 2 - Validation: 100%|██████████| 10/10 [00:00<00:00, 192.23it/s, accuracy=0.623, loss=1.09]
Epoch 3 - Training: 100%|██████████| 60/60 [00:00<00:00, 160.72it/s, loss=1.02]
Epoch 3 - Validation: 100%|██████████| 10/10 [00:00<00:00, 192.34it/s, accuracy=0.671, loss=0.957]
Epoch 4 - Training: 100%|██████████| 60/60 [00:00<00:00, 124.59it/s, loss=0.893]
Epoch 4 - Validation: 100%|██████████| 10/10 [00:00<00:00, 208.87it/s, accuracy=0.697, loss=0.874]
Epoch 5 - Training: 100%|██████████| 60/60 [00:00<00:00, 158.11it/s, loss=0.804]
Epoch 5 - Validation: 100%|

Learning Rate changed to 0.0005


Epoch 32 - Training: 100%|██████████| 60/60 [00:00<00:00, 166.65it/s, loss=0.153]
Epoch 32 - Validation: 100%|██████████| 10/10 [00:00<00:00, 59.57it/s, accuracy=0.78, loss=0.913]
Epoch 33 - Training: 100%|██████████| 60/60 [00:00<00:00, 168.81it/s, loss=0.14] 
Epoch 33 - Validation: 100%|██████████| 10/10 [00:00<00:00, 191.72it/s, accuracy=0.781, loss=0.933]
Epoch 34 - Training: 100%|██████████| 60/60 [00:00<00:00, 163.29it/s, loss=0.133]
Epoch 34 - Validation: 100%|██████████| 10/10 [00:00<00:00, 179.67it/s, accuracy=0.782, loss=0.951]
Epoch 35 - Training: 100%|██████████| 60/60 [00:00<00:00, 123.48it/s, loss=0.13] 
Epoch 35 - Validation: 100%|██████████| 10/10 [00:00<00:00, 196.10it/s, accuracy=0.778, loss=0.973]
Epoch 36 - Training: 100%|██████████| 60/60 [00:00<00:00, 157.86it/s, loss=0.124]
Epoch 36 - Validation: 100%|██████████| 10/10 [00:00<00:00, 201.31it/s, accuracy=0.78, loss=0.99]
Epoch 37 - Training: 100%|██████████| 60/60 [00:00<00:00, 120.68it/s, loss=0.117]
Epoch 37 - V

Learning Rate changed to 0.00025


Epoch 48 - Training: 100%|██████████| 60/60 [00:00<00:00, 172.92it/s, loss=0.0653]
Epoch 48 - Validation: 100%|██████████| 10/10 [00:00<00:00, 162.42it/s, accuracy=0.776, loss=1.24]
Epoch 49 - Training: 100%|██████████| 60/60 [00:00<00:00, 126.44it/s, loss=0.0624]
Epoch 49 - Validation: 100%|██████████| 10/10 [00:00<00:00, 185.19it/s, accuracy=0.776, loss=1.25]


In [9]:
model = Model().to("cuda")
model.train_loop("EarlyStopping", 50, early_stopping=True)

Epoch 0 - Training: 100%|██████████| 60/60 [00:00<00:00, 172.63it/s, loss=0.621]
Epoch 0 - Validation: 100%|██████████| 10/10 [00:00<00:00, 204.60it/s, accuracy=0.934, loss=0.21]
Epoch 1 - Training: 100%|██████████| 60/60 [00:00<00:00, 113.65it/s, loss=0.168]
Epoch 1 - Validation: 100%|██████████| 10/10 [00:00<00:00, 181.98it/s, accuracy=0.953, loss=0.148]
Epoch 2 - Training: 100%|██████████| 60/60 [00:00<00:00, 169.68it/s, loss=0.115]
Epoch 2 - Validation: 100%|██████████| 10/10 [00:00<00:00, 211.37it/s, accuracy=0.961, loss=0.124]
Epoch 3 - Training: 100%|██████████| 60/60 [00:00<00:00, 156.30it/s, loss=0.0881]
Epoch 3 - Validation: 100%|██████████| 10/10 [00:00<00:00, 171.92it/s, accuracy=0.967, loss=0.114]
Epoch 4 - Training: 100%|██████████| 60/60 [00:00<00:00, 124.76it/s, loss=0.0726]
Epoch 4 - Validation: 100%|██████████| 10/10 [00:00<00:00, 189.28it/s, accuracy=0.968, loss=0.11]
Epoch 5 - Training: 100%|██████████| 60/60 [00:00<00:00, 167.33it/s, loss=0.0599]
Epoch 5 - Validati

No improvement, early stopping





In [10]:
model = Model(dropout=0.5).to("cuda")
model.train_loop("Dropout", 100)

Epoch 0 - Training: 100%|██████████| 60/60 [00:00<00:00, 119.49it/s, loss=2.58]
Epoch 0 - Validation: 100%|██████████| 10/10 [00:00<00:00, 186.02it/s, accuracy=0.48, loss=1.58]
Epoch 1 - Training: 100%|██████████| 60/60 [00:00<00:00, 156.06it/s, loss=1.73]
Epoch 1 - Validation: 100%|██████████| 10/10 [00:00<00:00, 188.11it/s, accuracy=0.502, loss=1.45]
Epoch 2 - Training: 100%|██████████| 60/60 [00:00<00:00, 122.72it/s, loss=1.57]
Epoch 2 - Validation: 100%|██████████| 10/10 [00:00<00:00, 176.68it/s, accuracy=0.564, loss=1.27]
Epoch 3 - Training: 100%|██████████| 60/60 [00:00<00:00, 154.57it/s, loss=1.44]
Epoch 3 - Validation: 100%|██████████| 10/10 [00:00<00:00, 186.06it/s, accuracy=0.629, loss=1.14]
Epoch 4 - Training: 100%|██████████| 60/60 [00:00<00:00, 127.95it/s, loss=1.36]
Epoch 4 - Validation: 100%|██████████| 10/10 [00:00<00:00, 208.42it/s, accuracy=0.652, loss=1.04]
Epoch 5 - Training: 100%|██████████| 60/60 [00:00<00:00, 169.28it/s, loss=1.31]
Epoch 5 - Validation: 100%|████