In [None]:
import yaml
base_dir = Path(__file__).resolve().parent.parent
config_path = base_dir / "notebooks" / "00_config.yaml"
print(base_dir)

with open(config_path, "r") as file:
    config = yaml.safe_load(file)


In [None]:
# Logging und Speicherorte aus der YAML-Datei laden
tensorboard_logs = config["foldernames"]["tensorboard_logs"]
checkpoints = config["foldernames"]["checkpoints"]
logfiles = config["foldernames"]["logfiles"]
class_counts_file = config["foldernames"]["class_counts_file"]
sample_weights_file = config["foldernames"]["sample_weights_file"]

# Definiere Pfade für Experiment-Verzeichnis (kann auch je nach Inferenz-Setup angepasst werden)
experiment_group = config["data"]["experiment_group"]
experiment_id = config["data"]["experiment_id"]

# Beispiel: Pfad für TensorBoard Logs
tensorboard_dir = Path(f"/dss/dsshome1/08/{USER}/{experiment_group}/tensorboard_logs/{experiment_id}")
tensorboard_dir.mkdir(parents=True, exist_ok=True)

# Beispiel: Pfad für Checkpoints
checkpoint_dir = Path(f"/dss/dsshome1/08/{USER}/{experiment_group}/checkpoints")
checkpoint_dir.mkdir(parents=True, exist_ok=True)

# Beispiel: Lade Checkpoint (falls notwendig)
checkpoint_path = checkpoint_dir / f"{experiment_group}_{experiment_id}_best_siamese_unet_state.pth"
if checkpoint_path.exists():
    model.load_state_dict(torch.load(checkpoint_path))
    print(f"Checkpoint geladen: {checkpoint_path}")
else:
    print(f"Kein Checkpoint unter {checkpoint_path} gefunden.")
    

In [None]:
from sklearn.metrics import accuracy_score, f1_score

In [4]:
import torch
import numpy as np
import yaml
from pathlib import Path
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from tqdm import tqdm

from utils.helperfunctions import get_data_folder
from utils.dataset import xView2Dataset, collate_fn, transform, image_transform
from model.siameseNetwork import SiameseUnet
from utils.helperfunctions import find_best_checkpoint, load_checkpoint

import os
base_dir = Path(os.getcwd()).parent  # Gehe einen Ordner zurück vom aktuellen Arbeitsverzeichnis
config_path = base_dir / "notebooks" / "00_config.yaml"
with open(config_path, "r") as file:
    config = yaml.safe_load(file)

# Pfade einrichten
USER = config["data"]["user"]
USER_HOME_PATH = Path(f"/dss/dsshome1/08/{USER}")
EXPERIMENT_GROUP = config["data"]["experiment_group"]
EXPERIMENT_ID = config["data"]["experiment_id"]
CHECKPOINTS_DIR = USER_HOME_PATH / EXPERIMENT_GROUP / "checkpoints"

# Test-Daten Ordner abrufen
DATA_ROOT, EVAL_ROOT, EVAL_IMG, EVAL_LABEL, EVAL_TARGET, EVAL_PNG_IMAGES = get_data_folder(
    config["data"]["validation_name"], 
    main_dataset=config["data"]["use_main_dataset"]
)

# Test-Dataset erstellen
test_dataset = xView2Dataset(
    png_path=EVAL_PNG_IMAGES, 
    target_path=EVAL_TARGET, 
    transform=transform(), 
    image_transform=image_transform()
)

# Gerät festlegen
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Verwende Gerät: {device}")

# Modell erstellen
model = SiameseUnet(num_pre_classes=2, num_post_classes=6)
if torch.cuda.device_count() > 1:
    print(f"Verwende {torch.cuda.device_count()} GPUs!")
    model = torch.nn.DataParallel(model)
model = model.to(device)

best_checkpoint_path = find_best_checkpoint(CHECKPOINTS_DIR, EXPERIMENT_ID)
# Besten Checkpoint laden
model = load_checkpoint(model, best_checkpoint_path)
#model.eval()

# Dataloader erstellen
test_dataloader = DataLoader(
    test_dataset,
    batch_size=config["training"]["batch_size"],
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn,
    pin_memory=True
)

def evaluate_model(model, dataloader, device):
    model.eval()
    all_pre_preds = []
    all_pre_true = []
    all_post_preds = []
    all_post_true = []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluiere"):
            # Batch auspacken
            pre_images, post_images, pre_labels, post_labels = batch

            # Auf Gerät verschieben
            pre_images = pre_images.to(device)
            post_images = post_images.to(device)
            pre_labels = pre_labels.to(device)
            post_labels = post_labels.to(device)

            # Forward pass
            outputs = model(pre_images, post_images)

            # Annahme: Das Modell gibt das fusionierte Ergebnis zurück
            # → Splitte die Vorhersagen in pre/post
            pre_output = outputs[:, :2, :, :]   # erste 2 Kanäle → pre
            post_output = outputs[:, 2:, :, :]  # restliche 4 Kanäle → post

            # Vorhersagen abrufen
            pre_preds = torch.argmax(pre_output, dim=1)      # (B, H, W)
            post_preds = torch.argmax(post_output, dim=1)

            # Labels auf richtige Form bringen
            pre_labels = pre_labels.squeeze(1).long()        # (B, 1, H, W) → (B, H, W)
            post_labels = post_labels.squeeze(1).long()

            # Flatten und sammeln
            all_pre_preds.extend(pre_preds.view(-1).cpu().numpy())
            all_pre_true.extend(pre_labels.view(-1).cpu().numpy())
            all_post_preds.extend(post_preds.view(-1).cpu().numpy())
            all_post_true.extend(post_labels.view(-1).cpu().numpy())

    return {
        'pre_preds': np.array(all_pre_preds),
        'pre_true': np.array(all_pre_true),
        'post_preds': np.array(all_post_preds),
        'post_true': np.array(all_post_true)
    }

# Evaluierung durchführen
print("Starte Modell-Evaluierung...")
results = evaluate_model(model, test_dataloader, device)

# Kennzahlen berechnen
pre_accuracy = accuracy_score(results['pre_true'], results['pre_preds'])
pre_f1_weighted = f1_score(results['pre_true'], results['pre_preds'], average='weighted')
pre_f1_macro = f1_score(results['pre_true'], results['pre_preds'], average='macro')
pre_cm = confusion_matrix(results['pre_true'], results['pre_preds'])

post_accuracy = accuracy_score(results['post_true'], results['post_preds'])
post_f1_weighted = f1_score(results['post_true'], results['post_preds'], average='weighted')
post_f1_macro = f1_score(results['post_true'], results['post_preds'], average='macro')
post_cm = confusion_matrix(results['post_true'], results['post_preds'])

# Ergebnisse anzeigen
print("\nPre-Disaster Performance:")
print(f"Accuracy: {pre_accuracy:.4f}")
print(f"F1 Score (weighted): {pre_f1_weighted:.4f}")
print(f"F1 Score (macro): {pre_f1_macro:.4f}")
print("Confusion Matrix:")
print(pre_cm)

print("\nPost-Disaster Performance:")
print(f"Accuracy: {post_accuracy:.4f}")
print(f"F1 Score (weighted): {post_f1_weighted:.4f}")
print(f"F1 Score (macro): {post_f1_macro:.4f}")
print("Confusion Matrix:")
print(post_cm)

# Ergebnisse speichern
results_dir = USER_HOME_PATH / EXPERIMENT_GROUP / "evaluation_results"
results_dir.mkdir(parents=True, exist_ok=True)
result_file = results_dir / f"{EXPERIMENT_ID}_evaluation_results.txt"

with open(result_file, 'w') as f:
    f.write("MODEL EVALUATION RESULTS\n")
    f.write("=======================\n\n")
    
    f.write("Pre-Disaster Performance:\n")
    f.write(f"Accuracy: {pre_accuracy:.4f}\n")
    f.write(f"F1 Score (weighted): {pre_f1_weighted:.4f}\n")
    f.write(f"F1 Score (macro): {pre_f1_macro:.4f}\n")
    f.write("Confusion Matrix:\n")
    f.write(str(pre_cm) + "\n\n")
    
    f.write("Post-Disaster Performance:\n")
    f.write(f"Accuracy: {post_accuracy:.4f}\n")
    f.write(f"F1 Score (weighted): {post_f1_weighted:.4f}\n")
    f.write(f"F1 Score (macro): {post_f1_macro:.4f}\n")
    f.write("Confusion Matrix:\n")
    f.write(str(post_cm) + "\n")

print(f"\nEvaluierungsergebnisse gespeichert in: {result_file}")

Verwende Gerät: cuda
Verwende 3 GPUs!
Loaded raw state_dict from /dss/dsshome1/08/di97ren/xView2_Subset/checkpoints/003_best_siamese_unet_state.pth
Checkpoint erfolgreich in DataParallel-Modell geladen.
Starte Modell-Evaluierung...


Evaluiere:   9%|▊         | 10/117 [00:18<02:39,  1.49s/it]