In [1]:
import os
import sys
if os.getcwd().endswith('notebooks'):
    os.chdir('..')
sys.path.insert(0, os.getcwd())
print(f"Aktuelles Arbeitsverzeichnis: {os.getcwd()}")

Aktuelles Arbeitsverzeichnis: c:\Users\hp\OneDrive\Desktop\DBU\wai81-ai-theory\ml_picture_recognition


In [2]:
import torch
from utils.model import HockeyActionModelResNet18, HockeyActionModelResNet34, R3D18Model
from utils.dataloader import HockeyDataset  # dein HockeyDataset
from torch.utils.data import DataLoader
import pandas as pd
import os
import numpy as np
from torchvision import transforms
from PIL import Image


# --- Setup ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# --- Test Dataset Klasse anpassen: Labels sind ja 'Unknown' ---
class HockeyTestDataset(torch.utils.data.Dataset):
    def __init__(self, csv_file, frames_root, transform=None, frames_per_clip=100):
        self.df = pd.read_csv(csv_file)
        self.frames_root = frames_root
        self.frames_per_clip = frames_per_clip

        if transform is None:
            self.transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor()
            ])
        else:
            self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        clip_name = self.df.iloc[idx]['clip_name']
        frames_dir = os.path.join(self.frames_root, clip_name.replace('.mp4', ''))

        frames = sorted([
            os.path.join(frames_dir, f)
            for f in os.listdir(frames_dir)
            if f.endswith('.jpg')
        ])

        if not frames:
            raise ValueError(f"Keine Frames gefunden: {frames_dir}")

        if len(frames) >= self.frames_per_clip:
            frames = frames[-self.frames_per_clip:]
        else:
            last_frame = frames[-1]
            while len(frames) < self.frames_per_clip:
                frames.insert(0, last_frame)

        images = []
        for frame_path in frames:
            image = Image.open(frame_path).convert("RGB")
            image = self.transform(image)
            images.append(image)

        video_tensor = torch.stack(images)  # [T, 3, 224, 224]

        return video_tensor, clip_name

# --- Lade Test-Daten ---
test_dataset = HockeyTestDataset(
    csv_file='data/labels_test.csv',
    frames_root='data/test_frames/',
    frames_per_clip=100
)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# --- Modell auswählen ---
model_paths = {
    'ResNet18_LSTM': 'models/best_resnet18_lstm.pth',
    'ResNet34_LSTM': 'models/best_resnet34_lstm.pth',
    'R3D18': 'models/best_r3d18.pth'
}

model_classes = {
    'ResNet18_LSTM': HockeyActionModelResNet18,
    'ResNet34_LSTM': HockeyActionModelResNet34,
    'R3D18': R3D18Model
}

# --- Ergebnisse speichern ---
for model_name in model_paths.keys():
    print(f"\nPredicting with {model_name}...")

    # Modell initialisieren
    model = model_classes[model_name](num_classes=4)
    model.load_state_dict(torch.load(model_paths[model_name], map_location=device), strict=False)
    model = model.to(device)
    model.eval()

    results = []

    with torch.no_grad():
        for videos, clip_names in test_loader:
            videos = videos.to(device)
            outputs = model(videos)

            outputs = torch.sigmoid(outputs)
            preds = (outputs > 0.5).cpu().numpy().astype(int)  # [B, 4]

            for clip_name, pred in zip(clip_names, preds):
                labels = []
                if pred[0] == 1:
                    labels.append('Check')
                if pred[1] == 1:
                    labels.append('Neutral')
                if pred[2] == 1:
                    labels.append('Schuss')
                if pred[3] == 1:
                    labels.append('Tor')

                label_string = ",".join(labels) if labels else "None"
                results.append({
                    'clip_name': clip_name,
                    'predicted_labels': label_string
                })

    # --- Speichere Vorhersagen ---
    results_df = pd.DataFrame(results)
    results_df.to_csv(f'predictions_{model_name}.csv', index=False)

    print(f"Prediction CSV saved as predictions_{model_name}.csv")



Predicting with ResNet18_LSTM...


  model.load_state_dict(torch.load(model_paths[model_name], map_location=device), strict=False)


Prediction CSV saved as predictions_ResNet18_LSTM.csv

Predicting with ResNet34_LSTM...




Prediction CSV saved as predictions_ResNet34_LSTM.csv

Predicting with R3D18...




RuntimeError: Given groups=1, weight of size [64, 3, 3, 7, 7], expected input[1, 100, 3, 224, 224] to have 3 channels, but got 100 channels instead