In [1]:
import torch
import torch.nn as nn

# Zelle 2: Beispiel CNN-Modell
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256*16*16, 128),
            nn.ReLU(),
            nn.Linear(128, 2)  # (x, y)-Position des Objekts
        )

    def forward(self, x):
        return self.fc(self.conv(x))


model = ConvNet()
print(model)


ConvNet(
  (conv): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_fe

In [6]:
import cv2
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import os

class VideoTrackingDataset(Dataset):
    def __init__(self, video_dir, label_dir, limit, resize=(512, 512), transform=None):
        self.video_dir = video_dir
        self.label_dir = label_dir
        self.resize = resize
        self.transform = transform or transforms.Compose([
            transforms.Resize(resize),
            transforms.ToTensor()
        ])

        self.samples = self._gather_samples()
        if limit is not None:
            self.samples = self.samples[:limit]

    def _gather_samples(self):
        samples = []
        for file in os.listdir(self.video_dir):
            if file.endswith(".mp4"):
                base = file[:-4]
                video_path = os.path.join(self.video_dir, file)
                label_path = os.path.join(self.label_dir, f"{base}.csv")
                if os.path.exists(label_path):
                    samples.append((video_path, label_path))
        return samples

    def __len__(self):
        return sum([self._count_frames(v) for v, _ in self.samples])

    def _count_frames(self, video_path):
        cap = cv2.VideoCapture(video_path)
        count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        cap.release()
        return count

    def __getitem__(self, index):
        # Finde das passende Video + Frame
        cumulative = 0
        for video_path, label_path in self.samples:
            num_frames = self._count_frames(video_path)
            if index < cumulative + num_frames:
                frame_idx = index - cumulative
                return self._get_sample(video_path, label_path, frame_idx)
            cumulative += num_frames
        raise IndexError("Index außerhalb des Datasets")

    def _get_sample(self, video_path, label_path, frame_idx):
        # Lade Frame mit OpenCV
        cap = cv2.VideoCapture(video_path)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()
        cap.release()
        if not ret:
            raise ValueError(f"Frame {frame_idx} konnte nicht gelesen werden.")

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(frame)
        original_width, original_height = image.size

        # Lade Label
        df = pd.read_csv(label_path)
        frame_data = df[df['t'] == frame_idx]
        num_bugs = frame_data['hexbug'].max() + 1  # oder fixe Anzahl, z.B. 3

        # Defaultwerte mit 0 oder -1
        coords = np.full((num_bugs, 2), -1.0, dtype=np.float32)

        for _, row in frame_data.iterrows():
            bug_id = int(row['hexbug'])
            coords[bug_id, 0] = row['x'] / original_width
            coords[bug_id, 1] = row['y'] / original_height

        image = self.transform(image)
        return image, torch.tensor(coords, dtype=torch.float32)


IndentationError: expected an indented block after 'if' statement on line 21 (3855890932.py, line 23)

In [3]:
dataset = VideoTrackingDataset("../training", "../training", limit=1)
loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)



In [5]:
from torch import optim

# Zelle 4: Training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ConvNet().to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

for epoch in range(1):
    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)
        preds = model(imgs)
        loss = criterion(preds, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")


TypeError: 'DataLoader' object is not subscriptable

In [None]:
torch.save(model, "conv_net.pth")