In [1]:
!nvidia-smi

Wed Feb 12 00:14:04 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   55C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
!pip install datasets



In [3]:
from datasets import load_dataset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import torch
from torch import nn
import torch.optim as optim

from tqdm import tqdm

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

NUM_EPOCHS = 10
PATIENCE = 3

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [5]:
def load_mnist_dataset():
    mnist_dataset = load_dataset("mnist")
    transform = transforms.ToTensor()

    def _transform_example(example):
        example["image"] = transform(example["image"])
        return example

    mnist_dataset = mnist_dataset.map(_transform_example)
    mnist_dataset.set_format(type="torch", columns=["image", "label"])

    train_dataset = mnist_dataset["train"]
    test_dataset = mnist_dataset["test"]

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

    return train_loader, test_loader

In [6]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(28 * 28, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        self.decoder = nn.Sequential(
            nn.Linear(64, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 28 * 28),
            nn.Sigmoid()  # Normalise output to [0, 1]
        )

    def forward(self, x):
        x = x.view(x.size(0), -1) # [Batch, 1, 28, 28] → [Batch, 784]
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        decoded = decoded.view(x.size(0), 1, 28, 28) # [Batch, 784] → [Batch, 1, 28, 28]
        return decoded

    def get_latent(self, x):
        x = x.view(x.size(0), -1)
        latent = self.encoder(x)
        return latent

In [7]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, stride: int = 1, dropout_prob: float = 0.2):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.dropout = nn.Dropout(dropout_prob)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.downsample = None
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        out = self.relu(out)
        return out

class ResidualCNN(nn.Module):
    def __init__(self):
        super(ResidualCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = ResidualBlock(16, 16, stride=1, dropout_prob=0.2)
        self.layer2 = ResidualBlock(16, 32, stride=2, dropout_prob=0.2)
        self.layer3 = ResidualBlock(32, 64, stride=2, dropout_prob=0.2)
        self.layer4 = ResidualBlock(64, 64, stride=1, dropout_prob=0.2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout_fc = nn.Dropout(0.5)
        self.fc = nn.Linear(64, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout_fc(x)
        x = self.fc(x)
        return x

In [8]:
class LatentCNN(nn.Module):
    def __init__(self):
        super(LatentCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(p=0.5)  # Dropout-Wahrscheinlichkeit von 50%
        self.fc = nn.Linear(32 * 8 * 8, 10)

    def forward(self, x):
        x = x.view(-1, 1, 8, 8)

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [9]:
# ---------------------------
# Evaluation functions
# ---------------------------
def evaluate_autoencoder(model, data_loader, criterion, device):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for batch in data_loader:
            imgs = batch["image"].to(device)
            outputs = model(imgs)
            loss = criterion(outputs, imgs)
            total_loss += loss.item() * imgs.size(0)
    avg_loss = total_loss / len(data_loader.dataset)
    return avg_loss

def evaluate_classifier(model, data_loader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in data_loader:
            imgs = batch["image"].to(device)
            labels = batch["label"].to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * imgs.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    avg_loss = total_loss / len(data_loader.dataset)
    accuracy = correct / total
    return avg_loss, accuracy

def evaluate_latentcnn(autoencoder, latentcnn, data_loader, criterion, device):
    autoencoder.eval()
    latentcnn.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in data_loader:
            imgs = batch["image"].to(device)
            labels = batch["label"].to(device)
            latent_imgs = autoencoder.get_latent(imgs)
            outputs = latentcnn(latent_imgs)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * imgs.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    avg_loss = total_loss / len(data_loader.dataset)
    accuracy = correct / total
    return avg_loss, accuracy

In [10]:
train_loader, test_loader = load_mnist_dataset()

autoencoder_model = Autoencoder().to(device)
latentcnn_model = LatentCNN().to(device)
residualcnn_model = ResidualCNN().to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [11]:
# ---------------------------------
# Training Autoencoder (Rekonstruktion, MSELoss)
# ---------------------------------
print("Training Autoencoder")
criterion_ae = nn.MSELoss()
optimizer_ae = optim.Adam(autoencoder_model.parameters(), lr=1e-3)

best_val_loss = float('inf')
epochs_no_improve = 0

for epoch in range(NUM_EPOCHS):
    autoencoder_model.train()
    progress_bar = tqdm(train_loader, desc=f"Autoencoder Epoch {epoch+1}/{NUM_EPOCHS}", unit="batch")
    for batch in progress_bar:
        imgs = batch["image"].to(device)
        outputs = autoencoder_model(imgs)
        loss = criterion_ae(outputs, imgs)

        optimizer_ae.zero_grad()
        loss.backward()
        optimizer_ae.step()

        progress_bar.set_postfix(loss=loss.item())

    val_loss = evaluate_autoencoder(autoencoder_model, test_loader, criterion_ae, device)
    print(f"Autoencoder Epoch {epoch+1} Validation Loss: {val_loss:.4f}")

    # Early Stopping Logik
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        # Hier kann man den Modelldump speichern, z. B. torch.save(...)
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= PATIENCE:
            print("Early stopping for Autoencoder")
            break

Training Autoencoder


Autoencoder Epoch 1/10: 100%|██████████| 938/938 [00:50<00:00, 18.71batch/s, loss=0.0307]


Autoencoder Epoch 1 Validation Loss: 0.0222


Autoencoder Epoch 2/10: 100%|██████████| 938/938 [00:37<00:00, 24.85batch/s, loss=0.0259]


Autoencoder Epoch 2 Validation Loss: 0.0190


Autoencoder Epoch 3/10: 100%|██████████| 938/938 [00:35<00:00, 26.36batch/s, loss=0.0308]


Autoencoder Epoch 3 Validation Loss: 0.0179


Autoencoder Epoch 4/10: 100%|██████████| 938/938 [00:35<00:00, 26.33batch/s, loss=0.0287]


Autoencoder Epoch 4 Validation Loss: 0.0171


Autoencoder Epoch 5/10: 100%|██████████| 938/938 [00:36<00:00, 25.47batch/s, loss=0.0289]


Autoencoder Epoch 5 Validation Loss: 0.0166


Autoencoder Epoch 6/10: 100%|██████████| 938/938 [00:36<00:00, 26.06batch/s, loss=0.0253]


Autoencoder Epoch 6 Validation Loss: 0.0163


Autoencoder Epoch 7/10: 100%|██████████| 938/938 [00:36<00:00, 25.98batch/s, loss=0.0271]


Autoencoder Epoch 7 Validation Loss: 0.0159


Autoencoder Epoch 8/10: 100%|██████████| 938/938 [00:36<00:00, 25.91batch/s, loss=0.0231]


Autoencoder Epoch 8 Validation Loss: 0.0158


Autoencoder Epoch 9/10: 100%|██████████| 938/938 [00:37<00:00, 24.84batch/s, loss=0.0248]


Autoencoder Epoch 9 Validation Loss: 0.0155


Autoencoder Epoch 10/10: 100%|██████████| 938/938 [00:36<00:00, 26.01batch/s, loss=0.0267]


Autoencoder Epoch 10 Validation Loss: 0.0153


In [12]:
autoencoder_model.eval()

Autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.2, inplace=False)
  )
  (decoder): Sequential(
    (0): Linear(in_features=64, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=128, out_features=784, bias=True)
    (5): Sigmoid()
  )
)

In [13]:
# ---------------------------------
# Training LatentCNN (Klassifikation über den latenten Raum des Autoencoders)
# ---------------------------------
print("\nTraining LatentCNN")
# Wir verwenden wieder CrossEntropyLoss; der Optimizer wird für latentcnn_model definiert.
criterion_cls = nn.CrossEntropyLoss()
optimizer_lat = optim.Adam(latentcnn_model.parameters(), lr=1e-3)

best_val_loss = float('inf')
epochs_no_improve = 0

for epoch in range(NUM_EPOCHS):
    latentcnn_model.train()
    progress_bar = tqdm(train_loader, desc=f"LatentCNN Epoch {epoch+1}/{NUM_EPOCHS}", unit="batch")
    for batch in progress_bar:
        imgs = batch["image"].to(device)
        labels = batch["label"].to(device)
        # Hole die latente Darstellung vom Autoencoder
        latent_imgs = autoencoder_model.get_latent(imgs)
        outputs = latentcnn_model(latent_imgs)
        loss = criterion_cls(outputs, labels)

        optimizer_lat.zero_grad()
        loss.backward()
        optimizer_lat.step()

        progress_bar.set_postfix(loss=loss.item())

    val_loss, val_acc = evaluate_latentcnn(autoencoder_model, latentcnn_model, test_loader, criterion_cls, device)
    print(f"LatentCNN Epoch {epoch+1} Validation Loss: {val_loss:.4f}, Accuracy: {val_acc:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= PATIENCE:
            print("Early stopping for LatentCNN")
            break


Training LatentCNN


LatentCNN Epoch 1/10: 100%|██████████| 938/938 [00:36<00:00, 25.65batch/s, loss=0.375]


LatentCNN Epoch 1 Validation Loss: 0.1968, Accuracy: 0.9424


LatentCNN Epoch 2/10: 100%|██████████| 938/938 [00:36<00:00, 25.37batch/s, loss=0.413]


LatentCNN Epoch 2 Validation Loss: 0.1677, Accuracy: 0.9507


LatentCNN Epoch 3/10: 100%|██████████| 938/938 [00:38<00:00, 24.32batch/s, loss=0.118]


LatentCNN Epoch 3 Validation Loss: 0.1574, Accuracy: 0.9533


LatentCNN Epoch 4/10: 100%|██████████| 938/938 [00:35<00:00, 26.32batch/s, loss=0.27]


LatentCNN Epoch 4 Validation Loss: 0.1513, Accuracy: 0.9552


LatentCNN Epoch 5/10: 100%|██████████| 938/938 [00:36<00:00, 25.56batch/s, loss=0.332]


LatentCNN Epoch 5 Validation Loss: 0.1378, Accuracy: 0.9602


LatentCNN Epoch 6/10: 100%|██████████| 938/938 [00:35<00:00, 26.33batch/s, loss=0.0488]


LatentCNN Epoch 6 Validation Loss: 0.1334, Accuracy: 0.9610


LatentCNN Epoch 7/10: 100%|██████████| 938/938 [00:37<00:00, 24.74batch/s, loss=0.129]


LatentCNN Epoch 7 Validation Loss: 0.1283, Accuracy: 0.9623


LatentCNN Epoch 8/10: 100%|██████████| 938/938 [00:37<00:00, 24.74batch/s, loss=0.342]


LatentCNN Epoch 8 Validation Loss: 0.1294, Accuracy: 0.9618


LatentCNN Epoch 9/10: 100%|██████████| 938/938 [00:37<00:00, 24.76batch/s, loss=0.192]


LatentCNN Epoch 9 Validation Loss: 0.1256, Accuracy: 0.9636


LatentCNN Epoch 10/10: 100%|██████████| 938/938 [00:37<00:00, 25.06batch/s, loss=0.211]


LatentCNN Epoch 10 Validation Loss: 0.1251, Accuracy: 0.9640


In [14]:
# ---------------------------------
# Training ResidualCNN (Klassifikation, CrossEntropyLoss)
# ---------------------------------
print("\nTraining ResidualCNN")
criterion_cls = nn.CrossEntropyLoss()
optimizer_res = optim.Adam(residualcnn_model.parameters(), lr=1e-3)

best_val_loss = float('inf')
epochs_no_improve = 0

for epoch in range(NUM_EPOCHS):
    residualcnn_model.train()
    progress_bar = tqdm(train_loader, desc=f"ResidualCNN Epoch {epoch+1}/{NUM_EPOCHS}", unit="batch")
    for batch in progress_bar:
        imgs = batch["image"].to(device)
        labels = batch["label"].to(device)
        outputs = residualcnn_model(imgs)
        loss = criterion_cls(outputs, labels)

        optimizer_res.zero_grad()
        loss.backward()
        optimizer_res.step()

        progress_bar.set_postfix(loss=loss.item())

    val_loss, val_acc = evaluate_classifier(residualcnn_model, test_loader, criterion_cls, device)
    print(f"ResidualCNN Epoch {epoch+1} Validation Loss: {val_loss:.4f}, Accuracy: {val_acc:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= PATIENCE:
            print("Early stopping for ResidualCNN")
            break


Training ResidualCNN


ResidualCNN Epoch 1/10: 100%|██████████| 938/938 [00:43<00:00, 21.47batch/s, loss=0.019]


ResidualCNN Epoch 1 Validation Loss: 0.0400, Accuracy: 0.9880


ResidualCNN Epoch 2/10: 100%|██████████| 938/938 [00:42<00:00, 22.26batch/s, loss=0.0994]


ResidualCNN Epoch 2 Validation Loss: 0.0357, Accuracy: 0.9883


ResidualCNN Epoch 3/10: 100%|██████████| 938/938 [00:42<00:00, 21.89batch/s, loss=0.0191]


ResidualCNN Epoch 3 Validation Loss: 0.0265, Accuracy: 0.9915


ResidualCNN Epoch 4/10: 100%|██████████| 938/938 [00:42<00:00, 21.91batch/s, loss=0.274]


ResidualCNN Epoch 4 Validation Loss: 0.0244, Accuracy: 0.9927


ResidualCNN Epoch 5/10: 100%|██████████| 938/938 [00:41<00:00, 22.46batch/s, loss=0.207]


ResidualCNN Epoch 5 Validation Loss: 0.0279, Accuracy: 0.9915


ResidualCNN Epoch 6/10: 100%|██████████| 938/938 [00:41<00:00, 22.82batch/s, loss=0.245]


ResidualCNN Epoch 6 Validation Loss: 0.0208, Accuracy: 0.9933


ResidualCNN Epoch 7/10: 100%|██████████| 938/938 [00:41<00:00, 22.41batch/s, loss=0.00429]


ResidualCNN Epoch 7 Validation Loss: 0.0232, Accuracy: 0.9935


ResidualCNN Epoch 8/10: 100%|██████████| 938/938 [00:42<00:00, 22.03batch/s, loss=0.00395]


ResidualCNN Epoch 8 Validation Loss: 0.0205, Accuracy: 0.9938


ResidualCNN Epoch 9/10: 100%|██████████| 938/938 [00:40<00:00, 22.88batch/s, loss=0.0229]


ResidualCNN Epoch 9 Validation Loss: 0.0222, Accuracy: 0.9934


ResidualCNN Epoch 10/10: 100%|██████████| 938/938 [00:40<00:00, 22.88batch/s, loss=0.00708]


ResidualCNN Epoch 10 Validation Loss: 0.0177, Accuracy: 0.9948


In [16]:
latentcnn_model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Evaluating", unit="batch"):
        imgs = batch["image"].to(device)
        labels = batch["label"].to(device)
        x = autoencoder_model.get_latent(imgs)
        outputs = latentcnn_model(x)
        preds = outputs.argmax(dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Berechnung der Metriken mit scikit-learn
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='macro')
recall = recall_score(all_labels, all_preds, average='macro')
f1 = f1_score(all_labels, all_preds, average='macro')

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1 Score: {f1:.4f}")

Evaluating: 100%|██████████| 157/157 [00:05<00:00, 29.07batch/s]


Test Accuracy: 0.9640
Test Precision: 0.9638
Test Recall: 0.9636
Test F1 Score: 0.9637


In [17]:
residualcnn_model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Evaluating", unit="batch"):
        imgs = batch["image"].to(device)
        labels = batch["label"].to(device)
        outputs = residualcnn_model(imgs)
        preds = outputs.argmax(dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Berechnung der Metriken mit scikit-learn
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='macro')
recall = recall_score(all_labels, all_preds, average='macro')
f1 = f1_score(all_labels, all_preds, average='macro')

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1 Score: {f1:.4f}")

Evaluating: 100%|██████████| 157/157 [00:05<00:00, 31.36batch/s]


Test Accuracy: 0.9948
Test Precision: 0.9949
Test Recall: 0.9947
Test F1 Score: 0.9948
