# KIBV übung 3

In [39]:
import torch
import torchvision
import os
import random

from RobSceneSegDataset import RobSceneSegDataset
from config import directory

In [40]:
random.seed(42)

## Fully Convolutional Network (FCN)

In [41]:
print(torchvision.models.vgg.vgg16())

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [42]:
from FCN32s import FCN32s

In [43]:
model = FCN32s()

In [44]:
print(model)

FCN32s(
  (vgg16): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace=True)
      (16): M

## Training

In [45]:
sequences = [f for f in os.listdir(directory) if os.path.isdir(os.path.join(directory, f))]
print(sequences)

['seq_1', 'seq_10', 'seq_11', 'seq_12', 'seq_13', 'seq_14', 'seq_15', 'seq_16', 'seq_2', 'seq_3', 'seq_4', 'seq_5', 'seq_6', 'seq_7', 'seq_9']


In [46]:
sample_size = int(len(sequences) * 0.7)

train_sequences = random.sample(sequences, sample_size, )
val_sequences = [item for item in sequences if item not in train_sequences]

print(f"{train_sequences=}\n{val_sequences=}")

train_sequences=['seq_4', 'seq_10', 'seq_1', 'seq_5', 'seq_13', 'seq_12', 'seq_3', 'seq_11', 'seq_14', 'seq_6']
val_sequences=['seq_15', 'seq_16', 'seq_2', 'seq_7', 'seq_9']


In [47]:
# Transformationen für Bilder und Labels
image_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
label_transform = torchvision.transforms.ToTensor()

In [48]:
# Datensätze erstellen
train_dataset = RobSceneSegDataset(directory, train_sequences, transform=image_transform, target_transform=label_transform)
val_dataset = RobSceneSegDataset(directory, val_sequences, transform=image_transform, target_transform=label_transform)

In [50]:
batch_size = 16

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [52]:
# Modell initialisieren
model = FCN32s() #.to('cuda')

In [53]:
criterion = torch.nn.CrossEntropyLoss()

In [54]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [56]:
num_epochs = 20
best_val_loss = float('inf')

for epoch in range(num_epochs):
    # Training
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        # images, labels = images.to('cuda'), labels.long().to('cuda')

        # Gradienten zurücksetzen
        optimizer.zero_grad()

        # Forward + Loss + Backward + Optimierung
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

    # Validierung
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to('cuda'), labels.long().to('cuda')

            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    print(f"Validation Loss: {val_loss/len(val_loader):.4f}")

    # Save the best model checkpoint
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': best_val_loss,
        }, 'fcn32s_best_checkpoint.pth')
        print("Best model checkpoint saved.")

RuntimeError: [enforce fail at alloc_cpu.cpp:114] data. DefaultCPUAllocator: not enough memory: you tried to allocate 5368709120 bytes.

In [None]:
checkpoint = torch.load('fcn32s_checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']