In [22]:
# System
import os
import json
import random
import shutil
import logging
from pathlib import Path
from typing import Dict, List, Tuple, Union, Optional, Any
from dataclasses import dataclass

# Data analysis & visualization
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

# Image processing
from PIL import Image

# ML/DL Libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split, Subset
import torch.optim as optim
import torchvision
import torchvision.transforms as T
from torchvision import transforms, models
from torchvision.datasets import ImageFolder

# Other
from tqdm import tqdm
from collections import Counter
import time
import pickle
from datetime import datetime

---
### 1. CONFIGURATION CLASS 
---

In [2]:
@dataclass
class Config:
    """Zentrale Konfigurationsklasse für alle Hyperparameter und Pfade."""
    
    # Training Hyperparameter
    BATCH_SIZE: int = 16
    LEARNING_RATE: float = 1e-4
    EPOCHS: int = 10
    IMG_SIZE: int = 224
    SEED: int = 42
    
    # Pfade
    DATA_FOLDER: Path = Path('popular_street_foods/dataset')
    MODEL_FOLDER: str = "models"
    SAMPLE_FOLDER: str = "sample"
    
    # Dateinamen
    PLOT_IMAGE_PATH: str = 'plot_image.png'
    HISTORY_PATH: str = "history.json"
    BEST_F1_MODEL_PATH: str = 'best_f1_model.pth'
    BEST_ACC_MODEL_PATH: str = 'best_acc_model.pth'
    BEST_LOSS_MODEL_PATH: str = 'best_loss_model.pth'
    
    # Training Parameter
    PATIENCE: int = 5
    GAMMA: float = 0.1
    
    # Prediction
    PREDICTION_THRESHOLD: float = 0.4
    UNKNOWN_LABEL: str = "unknown"
    
    # Data Split
    TRAIN_SPLIT: float = 0.8
    
    def __post_init__(self):
        """Erstelle Ordner falls sie nicht existieren."""
        os.makedirs(self.MODEL_FOLDER, exist_ok=True)
        os.makedirs(self.SAMPLE_FOLDER, exist_ok=True)

---
### 2. LOGGER SETUP
---

In [3]:
def setup_logger(name: str = __name__, level: int = logging.INFO) -> logging.Logger:
    """
    Erstellt einen strukturierten Logger für das Projekt.
    Windows-kompatibel ohne Unicode-Probleme.
    
    Args:
        name: Name des Loggers
        level: Logging Level
        
    Returns:
        Konfigurierter Logger
    """
    logger = logging.getLogger(name)
    logger.setLevel(level)
    
    # Verhindere doppelte Handler
    if not logger.handlers:
        # File Handler mit UTF-8 Encoding für Windows
        file_handler = logging.FileHandler('training.log', encoding='utf-8')
        file_handler.setLevel(level)
        
        # Console Handler - nur für wichtige Meldungen
        console_handler = logging.StreamHandler()
        console_handler.setLevel(logging.WARNING)  # Weniger Console-Output
        
        # Formatter ohne Emojis für Windows-Kompatibilität
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )
        file_handler.setFormatter(formatter)
        console_handler.setFormatter(formatter)
        
        logger.addHandler(file_handler)
        logger.addHandler(console_handler)
    
    return logger

---
### 3. UTILITY FUNCTIONS
---

In [4]:
def seed_everything(seed: int) -> None:
    """
    Setzt alle Random Seeds für Reproduzierbarkeit.
    
    Args:
        seed: Random Seed Wert
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ["PYTHONHASHSEED"] = str(seed)

def get_device() -> torch.device:
    """
    Bestimmt das beste verfügbare Device.
    
    Returns:
        torch.device: CUDA falls verfügbar, sonst CPU
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logger = logging.getLogger(__name__)
    logger.info(f"Using device: {device}")
    return device


---
### 4. DATA HANDLING CLASS
---

In [5]:
class DataManager:
    """Verwaltet alle Datenoperationen."""
    
    def __init__(self, config: Config):
        self.config = config
        self.logger = logging.getLogger(__name__)
        
        # Validierung dass config korrekt ist
        if not hasattr(config, 'DATA_FOLDER'):
            raise AttributeError("Config must have DATA_FOLDER attribute")
        if not hasattr(config, 'BATCH_SIZE'):
            raise AttributeError("Config must have BATCH_SIZE attribute")
            
    def get_transforms(self) -> Tuple[T.Compose, T.Compose]:
        """
        Erstellt Transformationen für Training und Validierung.
        
        Returns:
            Tuple mit (train_transform, val_transform)
        """
        train_transform = T.Compose([
            T.Resize((self.config.IMG_SIZE, self.config.IMG_SIZE)),
            T.RandomHorizontalFlip(0.5),
            T.RandomVerticalFlip(0.5),
            T.RandomRotation(45),
            T.ToTensor(),
            T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        val_transform = T.Compose([
            T.Resize((self.config.IMG_SIZE, self.config.IMG_SIZE)),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        
        return train_transform, val_transform
    
    def create_dataloaders(self) -> Tuple[DataLoader, DataLoader, int, List[str]]:
        """
        Erstellt DataLoader für Training und Validierung mit Stratified Split.
        
        Returns:
            Tuple mit (train_loader, val_loader, num_classes, class_names)
        """
        train_transform, val_transform = self.get_transforms()
        
        # Vollständigen Dataset laden
        full_dataset = ImageFolder(self.config.DATA_FOLDER, transform=None)
        num_classes = len(full_dataset.classes)
        class_names = full_dataset.classes
        
        # Labels für Stratified Split extrahieren
        targets = [full_dataset[i][1] for i in range(len(full_dataset))]
        indices = list(range(len(full_dataset)))
        
        # Stratified Split durchführen
        from sklearn.model_selection import train_test_split
        train_indices, val_indices = train_test_split(
            indices,
            test_size=1-self.config.TRAIN_SPLIT,
            stratify=targets,
            random_state=self.config.SEED
        )
        
        # Separate Datasets mit entsprechenden Transformationen erstellen
        train_dataset_transformed = ImageFolder(self.config.DATA_FOLDER, transform=train_transform)
        val_dataset_transformed = ImageFolder(self.config.DATA_FOLDER, transform=val_transform)
        
        # Subsets erstellen
        from torch.utils.data import Subset
        train_subset = Subset(train_dataset_transformed, train_indices)
        val_subset = Subset(val_dataset_transformed, val_indices)
        
        # DataLoader erstellen
        num_workers = min(os.cpu_count(), 8)  # Begrenzt auf max 8 Worker
        
        train_loader = DataLoader(
            train_subset, 
            batch_size=self.config.BATCH_SIZE, 
            shuffle=True, 
            num_workers=num_workers,
            pin_memory=torch.cuda.is_available()
        )
        
        val_loader = DataLoader(
            val_subset, 
            batch_size=self.config.BATCH_SIZE, 
            shuffle=False, 
            num_workers=num_workers,
            pin_memory=torch.cuda.is_available()
        )
        
        self.logger.info(f'[STRATIFIED] Split completed')
        self.logger.info(f'Train Dataset: {len(train_subset)} images')
        self.logger.info(f'Val Dataset: {len(val_subset)} images')
        self.logger.info(f'Number of classes: {num_classes}')
        
        return train_loader, val_loader, num_classes, class_names

---
### 5. MODEL MANAGER CLASS
---

In [6]:
class ModelManager:
    """Verwaltet Model-bezogene Operationen."""
    
    def __init__(self, config: Config, num_classes: int, device: torch.device):
        self.config = config
        self.num_classes = num_classes
        self.device = device
        self.logger = logging.getLogger(__name__)
        
    def create_model(self) -> nn.Module:
        """
        Erstellt und konfiguriert das ResNet-18 Model.
        
        Returns:
            Konfiguriertes PyTorch Model
        """
        model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        model.fc = nn.Linear(model.fc.in_features, self.num_classes)
        model = model.to(self.device)
        
        self.logger.info(f"Model created with {self.num_classes} classes")
        return model
    
    def create_optimizer(self, model: nn.Module) -> torch.optim.Optimizer:
        """
        Erstellt den Optimizer.
        
        Args:
            model: PyTorch Model
            
        Returns:
            Konfigurierter Optimizer
        """
        return optim.Adam(model.parameters(), lr=self.config.LEARNING_RATE)
    
    def load_model(self, model: nn.Module, model_path: str) -> nn.Module:
        """
        Lädt ein gespeichertes Model.
        
        Args:
            model: PyTorch Model
            model_path: Pfad zum Model
            
        Returns:
            Model mit geladenen Gewichten
        """
        full_path = os.path.join(self.config.MODEL_FOLDER, model_path)
        
        if os.path.exists(full_path):
            if torch.cuda.is_available():
                model.load_state_dict(torch.load(full_path, weights_only=True))
            else:
                model.load_state_dict(
                    torch.load(full_path, weights_only=True, map_location=torch.device('cpu'))
                )
            model.to(self.device)
            self.logger.info(f"Model loaded from {full_path}")
        else:
            self.logger.warning(f"Model file {full_path} not found")
            
        return model

---
### 6. METRICS
---

In [7]:
@dataclass
class Metrics:
    """Datenklasse für Trainingsmetriken."""
    loss: float
    accuracy: float
    f1: float
    
    def __str__(self) -> str:
        return f"Loss: {self.loss:.4f}, Acc: {self.accuracy:.4f}, F1: {self.f1:.4f}"

class MetricsCalculator:
    """Berechnet verschiedene Metriken."""
    
    @staticmethod
    def calculate_metrics(predictions: np.ndarray, labels: np.ndarray, 
                         total_loss: float, dataset_size: int) -> Metrics:
        """
        Berechnet Metriken aus Predictions und Labels.
        
        Args:
            predictions: Predicted labels
            labels: True labels
            total_loss: Gesamtverlust
            dataset_size: Größe des Datasets
            
        Returns:
            Metrics object
        """
        avg_loss = total_loss / dataset_size
        accuracy = accuracy_score(labels, predictions)
        f1 = f1_score(labels, predictions, average='weighted')
        
        return Metrics(loss=avg_loss, accuracy=accuracy, f1=f1)

---
### 7. METRICS MANAGER
---

In [18]:
class MetricsManager:
    """Verwaltet das Speichern und Laden von Metriken und Evaluationsergebnissen."""
    
    def __init__(self, config):
        self.config = config
        self.results_folder = Path(config.SAMPLE_FOLDER) / "evaluation_results"
        self.results_folder.mkdir(exist_ok=True)
        
    def save_evaluation_results(self, 
                              results: Dict, 
                              class_names: List[str],
                              model_name: str = "model",
                              dataset_type: str = "validation") -> str:
        """
        Speichert Evaluationsergebnisse mit korrekter JSON-Serialisierung.
        
        Args:
            results: Ergebnisse von trainer.evaluate()
            class_names: Namen der Klassen
            model_name: Name des Models
            dataset_type: Art des Datasets (train/validation/test)
            
        Returns:
            Pfad zur gespeicherten JSON-Datei
        """
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"{model_name}_{dataset_type}_results_{timestamp}"
        
        # Daten für JSON serialisierbar machen mit convert_numpy_types
        save_data = {
            'model_name': model_name,
            'dataset_type': dataset_type,
            'timestamp': timestamp,
            'class_names': class_names,
            'metrics': {
                'loss': convert_numpy_types(results['loss']),
                'accuracy': convert_numpy_types(results['accuracy']),
                'f1': convert_numpy_types(results['f1'])
            },
            'predictions': convert_numpy_types(results['predictions']),
            'labels': convert_numpy_types(results['labels']),
            'num_classes': len(class_names),
            'dataset_size': len(results['labels'])
        }
        
        # Als JSON speichern mit NumpyEncoder als Backup
        json_path = self.results_folder / f"{filename}.json"
        try:
            with open(json_path, 'w', encoding='utf-8') as f:
                json.dump(save_data, f, indent=2, ensure_ascii=False, cls=NumpyEncoder)
        except TypeError as e:
            print(f"⚠️  JSON serialization error: {e}")
            # Fallback: Speichere nur die konvertierten Daten
            with open(json_path, 'w', encoding='utf-8') as f:
                json.dump(save_data, f, indent=2, ensure_ascii=False)
            
        # Als Pickle für vollständige Python-Objekte (immer sicher)
        pickle_path = self.results_folder / f"{filename}.pkl"
        with open(pickle_path, 'wb') as f:
            pickle.dump(save_data, f)
            
        print(f"[SAVED] Evaluation results saved:")
        print(f"  JSON: {json_path}")
        print(f"  Pickle: {pickle_path}")
        
        return str(json_path)
    
    def load_evaluation_results(self, file_path: Union[str, Path]) -> Dict:
        """
        Lädt gespeicherte Evaluationsergebnisse.
        
        Args:
            file_path: Pfad zur Datei (.json oder .pkl)
            
        Returns:
            Dictionary mit Evaluationsergebnissen
        """
        file_path = Path(file_path)
        
        if file_path.suffix == '.json':
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
        elif file_path.suffix == '.pkl':
            with open(file_path, 'rb') as f:
                data = pickle.load(f)
        else:
            raise ValueError("File must be .json or .pkl")
            
        # Arrays zurück konvertieren
        data['predictions'] = np.array(data['predictions'])
        data['labels'] = np.array(data['labels'])
        
        print(f"[LOADED] Evaluation results from {file_path}")
        print(f"  Model: {data['model_name']}")
        print(f"  Dataset: {data['dataset_type']}") 
        print(f"  Timestamp: {data['timestamp']}")
        print(f"  Accuracy: {data['metrics']['accuracy']:.4f}")
        
        return data
    
    def list_saved_results(self) -> List[Dict]:
        """Listet alle gespeicherten Evaluationsergebnisse auf."""
        results = []
        
        for json_file in self.results_folder.glob("*.json"):
            try:
                with open(json_file, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                    results.append({
                        'file': str(json_file),
                        'model_name': data['model_name'],
                        'dataset_type': data['dataset_type'],
                        'timestamp': data['timestamp'],
                        'accuracy': data['metrics']['accuracy'],
                        'f1': data['metrics']['f1']
                    })
            except Exception as e:
                print(f"Error reading {json_file}: {e}")
                
        # Nach Timestamp sortieren
        results.sort(key=lambda x: x['timestamp'], reverse=True)
        
        print(f"\n[AVAILABLE RESULTS] Found {len(results)} saved evaluation results:")
        print("-" * 80)
        for i, result in enumerate(results):
            print(f"{i+1:2d}. {result['model_name']} ({result['dataset_type']}) - "
                  f"Acc: {result['accuracy']:.4f} - {result['timestamp']}")
        print("-" * 80)
        
        return results

---
### 8. TRAINER CLASS
---

In [9]:
class Trainer:
    """Hauptklasse für das Training."""
    
    def __init__(self, config: Config, model: nn.Module, optimizer: torch.optim.Optimizer,
                 device: torch.device):
        """
        Initialisiert den Trainer.
        
        Args:
            config: Konfigurationsobjekt
            model: PyTorch Model
            optimizer: Optimizer
            device: Device (CPU/GPU)
        """
        self.config = config
        self.model = model
        self.optimizer = optimizer
        self.device = device
        self.logger = logging.getLogger(__name__)
        
        # Best values tracking
        self.best_loss = float('inf')
        self.best_accuracy = 0.0
        self.best_f1 = 0.0
        
        # History
        self.history = {
            "train": {"loss": [], "accuracy": [], "f1": []}, 
            "val": {"loss": [], "accuracy": [], "f1": []}
        }
        
        # Scheduler
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=self.config.GAMMA, patience=3
        )
    
    @torch.no_grad()
    def evaluate(self, data_loader):
        """Evaluiert das Model."""
        self.model.eval()
        all_preds = []
        all_labels = []
        total_loss = 0
        
        for images, labels in tqdm(data_loader, desc="Evaluating", leave=False):
            images, labels = images.to(self.device), labels.to(self.device)
            outputs = self.model(images)
            loss = F.cross_entropy(outputs, labels)
            total_loss += loss.item() * images.size(0)
            
            preds = outputs.argmax(dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())
        
        avg_loss = total_loss / len(data_loader.dataset)
        accuracy = accuracy_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds, average='weighted')
        
        return {
            'loss': avg_loss, 
            'accuracy': accuracy, 
            'f1': f1,
            'predictions': all_preds,
            'labels': all_labels
        }
    
    def train_epoch(self, train_loader: DataLoader) -> Dict[str, float]:
        """
        Trainiert eine Epoche.
        
        Args:
            train_loader: DataLoader für Training
            
        Returns:
            Dictionary mit Trainingsergebnissen (konsistent mit evaluate())
        """
        self.model.train()
        
        losses = []
        all_preds = []
        all_labels = []
        
        loop = tqdm(train_loader, desc="Training", leave=False)
        for images, labels in loop:
            images, labels = images.to(self.device), labels.to(self.device)
            
            # Forward pass
            outputs = self.model(images)
            loss = F.cross_entropy(outputs, labels)
            losses.append(loss.item())
            
            # Backward pass
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            
            # Predictions für Metriken
            predicted = outputs.argmax(dim=1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            # Update progress bar
            batch_acc = (predicted == labels).float().mean().item()
            loop.set_postfix(loss=loss.item(), accuracy=batch_acc)
        
        # Metriken berechnen und als Dictionary zurückgeben
        avg_loss = sum(losses) / len(losses)
        accuracy = accuracy_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds, average='weighted')
        
        return {
            'loss': avg_loss,
            'accuracy': accuracy,
            'f1': f1
        }
    
    def save_best_models(self, val_metrics, epoch):
        """
        Speichert beste Models basierend auf verschiedenen Metriken.
        """
        updated = False
        
        # Best Loss Model
        if val_metrics['loss'] < self.best_loss:
            self.best_loss = val_metrics['loss']
            loss_path = os.path.join(self.config.MODEL_FOLDER, 'best_loss_model.pth')
            torch.save(self.model.state_dict(), loss_path)
            print(f"[BEST LOSS] Model saved at epoch {epoch + 1} (Loss: {val_metrics['loss']:.4f})")
            updated = True
        
        # Best Accuracy Model
        if val_metrics['accuracy'] > self.best_accuracy:
            self.best_accuracy = val_metrics['accuracy']
            acc_path = os.path.join(self.config.MODEL_FOLDER, 'best_acc_model.pth')
            torch.save(self.model.state_dict(), acc_path)
            print(f"[BEST ACC] Model saved at epoch {epoch + 1} (Acc: {val_metrics['accuracy']:.4f})")
            updated = True
            
        # Best F1 Model
        if val_metrics['f1'] > self.best_f1:
            self.best_f1 = val_metrics['f1']
            f1_path = os.path.join(self.config.MODEL_FOLDER, 'best_f1_model.pth')
            torch.save(self.model.state_dict(), f1_path)
            print(f"[BEST F1] Model saved at epoch {epoch + 1} (F1: {val_metrics['f1']:.4f})")
            updated = True
            
        return updated
    
    def _save_model(self, filename: str) -> None:
        """Speichert Model."""
        path = os.path.join(self.config.MODEL_FOLDER, filename)
        torch.save(self.model.state_dict(), path)
    
    def fit(self, train_loader, val_loader):
        """Haupttraining Loop."""
        early_stop_counter = 0
        
        for epoch in range(self.config.EPOCHS):
            start_time = time.time()
            
            print(f"{'='*20} Epoch {epoch + 1}/{self.config.EPOCHS} {'='*20}")
            
            # Training
            train_metrics = self.train_epoch(train_loader)
            
            # Validation
            val_metrics = self.evaluate(val_loader)
            
            # Update history - Dictionary Zugriff
            self.history['train']['loss'].append(train_metrics['loss'])
            self.history['train']['accuracy'].append(train_metrics['accuracy'])
            self.history['train']['f1'].append(train_metrics['f1'])
            
            self.history['val']['loss'].append(val_metrics['loss'])
            self.history['val']['accuracy'].append(val_metrics['accuracy'])
            self.history['val']['f1'].append(val_metrics['f1'])
            
            # Save best models
            if self.save_best_models(val_metrics, epoch):
                early_stop_counter = 0
            else:
                early_stop_counter += 1
                print(f"[NO IMPROVEMENT] Early stop counter: {early_stop_counter}/{self.config.PATIENCE}")
            
            # Save epoch model
            # self.save_epoch_model(epoch)
            
            # Epoch time logging
            epoch_time = time.time() - start_time
            current_lr = self.optimizer.param_groups[0]['lr']
            
            # Logging - Dictionary Zugriff
            print(f"Train - Loss: {train_metrics['loss']:.4f}, "
                  f"Acc: {train_metrics['accuracy']:.4f}, F1: {train_metrics['f1']:.4f}")
            print(f"Val   - Loss: {val_metrics['loss']:.4f}, "
                  f"Acc: {val_metrics['accuracy']:.4f}, F1: {val_metrics['f1']:.4f}")
            print(f"LR: {current_lr:.6f}, Time: {epoch_time:.2f}s")
            
            # Early stopping
            if early_stop_counter >= self.config.PATIENCE:
                print("[EARLY STOP] Training stopped due to no improvement!")
                break
            
            # Learning rate scheduling - Dictionary Zugriff
            self.scheduler.step(val_metrics['f1'])
            
            print()  # Leere Zeile für bessere Lesbarkeit
        
        return self.history

---
### 8. VISUALIZATION CLASS
---

In [10]:
class Visualizer:
    """Handhabt alle Visualisierungen."""
    
    def __init__(self, config):
        self.config = config
    
    def plot_history(self, history, save: bool = True):
        """
        Plottet Training History.
        
        Args:
            history: Training history dictionary
            save: Ob Plot gespeichert werden soll
        """
        train_history = history['train']
        val_history = history['val']
        epochs = range(1, len(train_history['loss']) + 1)
        
        plt.figure(figsize=(15, 5))
        
        # Loss Plot
        plt.subplot(1, 3, 1)
        plt.plot(epochs, train_history['loss'], 'bo-', label='Train Loss', linewidth=2)
        plt.plot(epochs, val_history['loss'], 'ro-', label='Val Loss', linewidth=2)
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Training and Validation Loss')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # Accuracy Plot
        plt.subplot(1, 3, 2)
        plt.plot(epochs, train_history['accuracy'], 'bo-', label='Train Accuracy', linewidth=2)
        plt.plot(epochs, val_history['accuracy'], 'ro-', label='Val Accuracy', linewidth=2)
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.title('Training and Validation Accuracy')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # F1 Score Plot
        plt.subplot(1, 3, 3)
        plt.plot(epochs, train_history['f1'], 'bo-', label='Train F1', linewidth=2)
        plt.plot(epochs, val_history['f1'], 'ro-', label='Val F1', linewidth=2)
        plt.xlabel('Epoch')
        plt.ylabel('F1 Score')
        plt.title('Training and Validation F1 Score')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        
        if save:
            plot_path = os.path.join(self.config.SAMPLE_FOLDER, 'training_history.png')
            history_path = os.path.join(self.config.SAMPLE_FOLDER, 'history.json')
            
            plt.savefig(plot_path, dpi=300, bbox_inches='tight')
            with open(history_path, 'w') as f:
                json.dump(history, f, indent=4)
            
            print(f"[SAVED] Training plots saved to: {plot_path}")
            print(f"[SAVED] History saved to: {history_path}")
        
        plt.show()
    
    def plot_confusion_matrix(self, y_true, y_pred, class_names, 
                            title="Confusion Matrix", save: bool = True, 
                            save_name: Optional[str] = None, show: bool = True):
        """
        Plottet eine detaillierte Confusion Matrix.
        
        Args:
            y_true: True labels
            y_pred: Predicted labels
            class_names: Namen der Klassen
            title: Titel des Plots
            save: Ob Plot gespeichert werden soll
            save_name: Benutzerdefinierter Dateiname
            show: Ob Plot angezeigt werden soll
            
        Returns:
            Pfad zur gespeicherten Datei (falls save=True)
        """
        # Confusion Matrix berechnen
        cm = confusion_matrix(y_true, y_pred)
        
        # Normalized Confusion Matrix
        cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        
        # Zwei Subplots: Absolute und Normalized
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
        
        # Plot 1: Absolute Confusion Matrix
        disp1 = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
        disp1.plot(ax=ax1, cmap='Blues', xticks_rotation=45)
        ax1.set_title(f'{title} - Absolute Values')
        ax1.grid(False)
        
        # Plot 2: Normalized Confusion Matrix
        disp2 = ConfusionMatrixDisplay(confusion_matrix=cm_normalized, display_labels=class_names)
        disp2.plot(ax=ax2, cmap='Blues', xticks_rotation=45, values_format='.2f')
        ax2.set_title(f'{title} - Normalized')
        ax2.grid(False)
        
        plt.tight_layout()
        
        saved_path = None  # Default return value
        if save:
            if save_name is None:
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                save_name = f"confusion_matrix_{timestamp}.png"
            
            saved_path = os.path.join(self.config.SAMPLE_FOLDER, save_name)
            plt.savefig(saved_path, dpi=300, bbox_inches='tight')
            print(f"[SAVED] Confusion Matrix saved to: {saved_path}")
        
        if show:
            plt.show()
        else:
            plt.close()
        
        # Klassifikationsreport ausgeben
        self.print_classification_report(y_true, y_pred, class_names)
        
        return saved_path  # Gibt None zurück falls nicht gespeichert
    
    def print_classification_report(self, y_true, y_pred, class_names):
        """
        Druckt detaillierten Klassifikationsreport.
        
        Args:
            y_true: True labels
            y_pred: Predicted labels  
            class_names: Namen der Klassen
        """
        from sklearn.metrics import classification_report
        
        print("\n" + "="*80)
        print("DETAILED CLASSIFICATION REPORT")
        print("="*80)
        
        report = classification_report(
            y_true, y_pred, 
            target_names=class_names,
            digits=4,
            zero_division=0
        )
        print(report)
        
        # Zusätzliche Metriken
        overall_accuracy = accuracy_score(y_true, y_pred)
        macro_f1 = f1_score(y_true, y_pred, average='macro')
        weighted_f1 = f1_score(y_true, y_pred, average='weighted')
        
        print(f"\nOVERALL METRICS:")
        print(f"  Overall Accuracy: {overall_accuracy:.4f}")
        print(f"  Macro F1 Score:   {macro_f1:.4f}")
        print(f"  Weighted F1 Score: {weighted_f1:.4f}")
        
        # Per-class Accuracy
        cm = confusion_matrix(y_true, y_pred)
        per_class_acc = cm.diagonal() / cm.sum(axis=1)
        
        print(f"\nPER-CLASS ACCURACY:")
        for i, (class_name, acc) in enumerate(zip(class_names, per_class_acc)):
            print(f"  {class_name[:20]:<20}: {acc:.4f}")
    
    def plot_model_performance_summary(self, history, y_true, y_pred, class_names):
        """
        Erstellt ein umfassendes Performance Dashboard.
        
        Args:
            history: Training history
            y_true: True labels
            y_pred: Predicted labels
            class_names: Namen der Klassen
        """
        fig = plt.figure(figsize=(20, 12))
        
        # Training History (oben)
        train_history = history['train']
        val_history = history['val']
        epochs = range(1, len(train_history['loss']) + 1)
        
        # Loss Plot
        plt.subplot(2, 3, 1)
        plt.plot(epochs, train_history['loss'], 'b-', label='Train', linewidth=2)
        plt.plot(epochs, val_history['loss'], 'r-', label='Validation', linewidth=2)
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Training Loss')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # Accuracy Plot
        plt.subplot(2, 3, 2)
        plt.plot(epochs, train_history['accuracy'], 'b-', label='Train', linewidth=2)
        plt.plot(epochs, val_history['accuracy'], 'r-', label='Validation', linewidth=2)
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.title('Training Accuracy')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # F1 Score Plot
        plt.subplot(2, 3, 3)
        plt.plot(epochs, train_history['f1'], 'b-', label='Train', linewidth=2)
        plt.plot(epochs, val_history['f1'], 'r-', label='Validation', linewidth=2)
        plt.xlabel('Epoch')
        plt.ylabel('F1 Score')
        plt.title('Training F1 Score')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # Confusion Matrix (unten links)
        plt.subplot(2, 3, 4)
        cm = confusion_matrix(y_true, y_pred)
        cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        
        im = plt.imshow(cm_normalized, interpolation='nearest', cmap='Blues')
        plt.title('Confusion Matrix (Normalized)')
        plt.colorbar(im, fraction=0.046, pad=0.04)
        
        # Klassen-Labels (verkürzt für bessere Lesbarkeit)
        short_labels = [name[:8] for name in class_names]
        tick_marks = np.arange(len(class_names))
        plt.xticks(tick_marks, short_labels, rotation=45)
        plt.yticks(tick_marks, short_labels)
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        
        # Per-Class Performance (unten mitte)
        plt.subplot(2, 3, 5)
        per_class_acc = cm.diagonal() / cm.sum(axis=1)
        
        bars = plt.bar(range(len(class_names)), per_class_acc, color='skyblue', alpha=0.7)
        plt.xlabel('Class')
        plt.ylabel('Accuracy')
        plt.title('Per-Class Accuracy')
        plt.xticks(range(len(class_names)), short_labels, rotation=45)
        plt.grid(True, alpha=0.3)
        
        # Highlight best and worst performing classes
        best_idx = np.argmax(per_class_acc)
        worst_idx = np.argmin(per_class_acc)
        bars[best_idx].set_color('green')
        bars[worst_idx].set_color('red')
        
        # Summary Statistics (unten rechts)
        plt.subplot(2, 3, 6)
        plt.axis('off')
        
        # Calculate summary stats
        overall_acc = accuracy_score(y_true, y_pred)
        macro_f1 = f1_score(y_true, y_pred, average='macro')
        weighted_f1 = f1_score(y_true, y_pred, average='weighted')
        
        best_class = class_names[best_idx]
        worst_class = class_names[worst_idx]
        
        summary_text = f"""
        FINAL PERFORMANCE SUMMARY
        
        Overall Accuracy: {overall_acc:.3f}
        Macro F1 Score: {macro_f1:.3f}
        Weighted F1 Score: {weighted_f1:.3f}
        
        Best Performing Class:
        {best_class} ({per_class_acc[best_idx]:.3f})
        
        Worst Performing Class:
        {worst_class} ({per_class_acc[worst_idx]:.3f})
        
        Total Training Epochs: {len(epochs)}
        Final Train Loss: {train_history['loss'][-1]:.4f}
        Final Val Loss: {val_history['loss'][-1]:.4f}
        """
        
        plt.text(0.1, 0.9, summary_text, transform=plt.gca().transAxes, 
                fontsize=12, verticalalignment='top', fontfamily='monospace',
                bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
        
        plt.tight_layout()
        
        # Save the dashboard
        dashboard_path = os.path.join(self.config.SAMPLE_FOLDER, 'performance_dashboard.png')
        plt.savefig(dashboard_path, dpi=300, bbox_inches='tight')
        print(f"[SAVED] Performance Dashboard saved to: {dashboard_path}")
        
        plt.show()

    
    def create_confusion_matrix_from_data(self, evaluation_data: Dict, 
                                        title: Optional[str] = None,
                                        save: bool = True, show: bool = True, 
                                        save_name: Optional[str] = None) -> Optional[str]:
        """
        Erstellt Confusion Matrix aus geladenen Evaluationsdaten.
        
        Args:
            evaluation_data: Geladene Evaluationsdaten
            title: Titel für den Plot
            save: Ob Plot gespeichert werden soll
            show: Ob Plot angezeigt werden soll
            save_name: Benutzerdefinierter Dateiname
            
        Returns:
            Pfad zur gespeicherten Datei (falls save=True)
        """
        y_true = evaluation_data['labels']
        y_pred = evaluation_data['predictions']
        class_names = evaluation_data['class_names']
        
        if title is None:
            title = f"{evaluation_data['model_name']} - {evaluation_data['dataset_type'].title()}"
        
        # Verwende die erweiterte plot_confusion_matrix Methode
        return self.plot_confusion_matrix(y_true, y_pred, class_names, title, save, save_name, show)
    
    def create_performance_comparison(self, evaluation_results: List[Dict],
                                    save: bool = True, show: bool = True,
                                    save_name: Optional[str] = None) -> Optional[str]:
        """
        Erstellt Vergleichsplot für mehrere Evaluationsergebnisse.
        
        Args:
            evaluation_results: Liste von Evaluationsdaten
            save: Ob Plot gespeichert werden soll
            show: Ob Plot angezeigt werden soll
            save_name: Benutzerdefinierter Dateiname
            
        Returns:
            Pfad zur gespeicherten Datei (falls save=True)
        """
        if len(evaluation_results) < 2:
            print("Need at least 2 evaluation results for comparison")
            return None
            
        # Daten extrahieren
        models = []
        accuracies = []
        f1_scores = []
        losses = []
        
        for result in evaluation_results:
            models.append(f"{result['model_name']}\n({result['dataset_type']})")
            accuracies.append(result['metrics']['accuracy'])
            f1_scores.append(result['metrics']['f1'])
            losses.append(result['metrics']['loss'])
        
        # Plot erstellen
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6))
        
        # Accuracy Comparison
        bars1 = ax1.bar(models, accuracies, color='skyblue', alpha=0.7)
        ax1.set_title('Accuracy Comparison')
        ax1.set_ylabel('Accuracy')
        ax1.tick_params(axis='x', rotation=45)
        ax1.grid(True, alpha=0.3)
        
        # F1 Score Comparison
        bars2 = ax2.bar(models, f1_scores, color='lightgreen', alpha=0.7)
        ax2.set_title('F1 Score Comparison')
        ax2.set_ylabel('F1 Score')
        ax2.tick_params(axis='x', rotation=45)
        ax2.grid(True, alpha=0.3)
        
        # Loss Comparison
        bars3 = ax3.bar(models, losses, color='lightcoral', alpha=0.7)
        ax3.set_title('Loss Comparison')
        ax3.set_ylabel('Loss')
        ax3.tick_params(axis='x', rotation=45)
        ax3.grid(True, alpha=0.3)
        
        # Werte auf Balken anzeigen
        for bars, values in [(bars1, accuracies), (bars2, f1_scores), (bars3, losses)]:
            for bar, value in zip(bars, values):
                height = bar.get_height()
                bars[0].axes.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
                                f'{value:.3f}', ha='center', va='bottom', fontsize=10)
        
        plt.tight_layout()
        
        saved_path = None
        if save:
            if save_name is None:
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                save_name = f"model_comparison_{timestamp}.png"
                
            saved_path = os.path.join(self.config.SAMPLE_FOLDER, save_name)
            plt.savefig(saved_path, dpi=300, bbox_inches='tight')
            print(f"[SAVED] Model Comparison: {saved_path}")
            
        if show:
            plt.show()
        else:
            plt.close()
            
        return saved_path
    
    def create_training_history_plot(self, history_data: Dict,
                                   title: Optional[str] = None,
                                   save: bool = True, show: bool = True,
                                   save_name: Optional[str] = None) -> Optional[str]:
        """
        Erstellt Training History Plot aus gespeicherten Daten.
        
        Args:
            history_data: Training History Dictionary
            title: Titel für den Plot
            save: Ob Plot gespeichert werden soll
            show: Ob Plot angezeigt werden soll
            save_name: Benutzerdefinierter Dateiname
            
        Returns:
            Pfad zur gespeicherten Datei (falls save=True)
        """
        train_history = history_data['train']
        val_history = history_data['val']
        epochs = range(1, len(train_history['loss']) + 1)
        
        if title is None:
            title = "Training History"
            
        plt.figure(figsize=(15, 5))
        
        # Loss Plot
        plt.subplot(1, 3, 1)
        plt.plot(epochs, train_history['loss'], 'bo-', label='Train Loss', linewidth=2)
        plt.plot(epochs, val_history['loss'], 'ro-', label='Val Loss', linewidth=2)
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title(f'{title} - Loss')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # Accuracy Plot
        plt.subplot(1, 3, 2)
        plt.plot(epochs, train_history['accuracy'], 'bo-', label='Train Accuracy', linewidth=2)
        plt.plot(epochs, val_history['accuracy'], 'ro-', label='Val Accuracy', linewidth=2)
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.title(f'{title} - Accuracy')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # F1 Score Plot
        plt.subplot(1, 3, 3)
        plt.plot(epochs, train_history['f1'], 'bo-', label='Train F1', linewidth=2)
        plt.plot(epochs, val_history['f1'], 'ro-', label='Val F1', linewidth=2)
        plt.xlabel('Epoch')
        plt.ylabel('F1 Score')
        plt.title(f'{title} - F1 Score')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        
        saved_path = None
        if save:
            if save_name is None:
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                save_name = f"training_history_{timestamp}.png"
            
            saved_path = os.path.join(self.config.SAMPLE_FOLDER, save_name)
            plt.savefig(saved_path, dpi=300, bbox_inches='tight')
            print(f"[SAVED] Training History: {saved_path}")
            
        if show:
            plt.show()
        else:
            plt.close()
            
        return saved_path

---
### 9. EVALUATION WORKFLOW KLASSE
---

In [11]:
class EvaluationWorkflow:
    """Vereinfacht den Workflow für Evaluation und Visualisierung."""
    
    def __init__(self, config: Config):
        self.config = config
        self.metrics_manager = MetricsManager(config)
        self.visualizer = Visualizer(config)
        
    def save_training_results(self, 
                            classifier,
                            model_name: str = "resnet18",
                            save_train: bool = True,
                            save_val: bool = True) -> Tuple[Optional[str], Optional[str]]:
        """
        Speichert Training- und Validierungsergebnisse nach dem Training.
        """
        train_path = None
        val_path = None
        
        if save_train:
            train_results = classifier.trainer.evaluate(classifier.train_loader)
            train_path = self.metrics_manager.save_evaluation_results(
                train_results, classifier.class_names, model_name, "training"
            )
            
        if save_val:
            val_results = classifier.trainer.evaluate(classifier.val_loader)
            val_path = self.metrics_manager.save_evaluation_results(
                val_results, classifier.class_names, model_name, "validation"
            )
            
        return train_path, val_path
    
    def load_and_visualize(self, 
                          result_file: Union[str, Path],
                          create_confusion_matrix: bool = True,
                          save_plots: bool = True,
                          show_plots: bool = True) -> Dict[str, Optional[str]]:
        """
        Lädt Evaluationsergebnisse und erstellt Visualisierungen.
        """
        # Daten laden
        evaluation_data = self.metrics_manager.load_evaluation_results(result_file)
        
        saved_plots = {}
        
        if create_confusion_matrix:
            cm_path = self.visualizer.create_confusion_matrix_from_data(
                evaluation_data, save=save_plots, show=show_plots
            )
            saved_plots['confusion_matrix'] = cm_path
            
        return saved_plots
    
    def compare_models(self, 
                      result_files: List[Union[str, Path]],
                      save_comparison: bool = True,
                      show_comparison: bool = True) -> Optional[str]:
        """
        Vergleicht mehrere Models.
        """
        evaluation_results = []
        
        for file_path in result_files:
            data = self.metrics_manager.load_evaluation_results(file_path)
            evaluation_results.append(data)
            
        return self.visualizer.create_performance_comparison(
            evaluation_results, save=save_comparison, show=show_comparison
        )


---
### 10. STANDALONE MODEL EVALUATOR KLASSE
---

In [12]:
class StandaloneModelEvaluator:
    """
    Lädt gespeicherte Models und evaluiert sie unabhängig vom Training.
    """
    
    def __init__(self, config: Config):
        self.config = config
        self.device = get_device()
        
        # Data Manager für DataLoader
        self.data_manager = DataManager(config)
        (self.train_loader, self.val_loader, 
         self.num_classes, self.class_names) = self.data_manager.create_dataloaders()
        
        # Model Manager
        self.model_manager = ModelManager(config, self.num_classes, self.device)
        
        # Evaluation Workflow
        self.eval_workflow = EvaluationWorkflow(config)
        
    def evaluate_saved_model(self, 
                           model_path: str,
                           model_name: str = "loaded_model",
                           visualize: bool = True,
                           save_results: bool = True) -> Dict:
        """
        Lädt ein gespeichertes Model und evaluiert es vollständig.
        
        Args:
            model_path: Pfad zum Model (.pth Datei)
            model_name: Name für die Speicherung
            visualize: Ob Visualisierungen erstellt werden sollen
            save_results: Ob Ergebnisse gespeichert werden sollen
            
        Returns:
            Dictionary mit Evaluationsergebnissen
        """
        print(f"[EVALUATING] Loading model from {model_path}")
        
        # Model erstellen und laden
        model = self.model_manager.create_model()
        model = self.model_manager.load_model(model, model_path)
        
        # Trainer für Evaluation erstellen
        optimizer = self.model_manager.create_optimizer(model)  # Dummy optimizer
        trainer = Trainer(self.config, model, optimizer, self.device)
        
        # Validation Set evaluieren
        print("[EVALUATING] Running evaluation on validation set...")
        val_results = trainer.evaluate(self.val_loader)
        
        # Optional: Training Set evaluieren
        print("[EVALUATING] Running evaluation on training set...")
        train_results = trainer.evaluate(self.train_loader)
        
        results = {
            'validation': val_results,
            'training': train_results,
            'class_names': self.class_names,
            'model_name': model_name
        }
        
        if save_results:
            print("[SAVING] Saving evaluation results...")
            val_path = self.eval_workflow.metrics_manager.save_evaluation_results(
                val_results, self.class_names, model_name, "validation"
            )
            train_path = self.eval_workflow.metrics_manager.save_evaluation_results(
                train_results, self.class_names, model_name, "training"
            )
            results['saved_files'] = {'validation': val_path, 'training': train_path}
        
        if visualize:
            print("[VISUALIZING] Creating visualizations...")
            # Confusion Matrix für Validation
            self.eval_workflow.visualizer.plot_confusion_matrix(
                val_results['labels'], 
                val_results['predictions'], 
                self.class_names,
                title=f"{model_name} - Validation Results",
                save=True
            )
            
            # Optional: Confusion Matrix für Training
            self.eval_workflow.visualizer.plot_confusion_matrix(
                train_results['labels'], 
                train_results['predictions'], 
                self.class_names,
                title=f"{model_name} - Training Results", 
                save=True
            )
        
        return results


---
### 11. PREDICTOR CLASS
---

In [13]:
class Predictor:
    """Handhabt Model Predictions."""
    
    def __init__(self, model: nn.Module, val_transform: transforms.Compose,
                 class_names: List[str], device: torch.device, config: Config):
        self.model = model
        self.val_transform = val_transform
        self.class_names = class_names
        self.device = device
        self.config = config
    
    def predict_image(self, source: Union[str, np.ndarray]) -> str:
        """
        Sagt Klasse für ein einzelnes Bild vorher.
        
        Args:
            source: Dateipfad oder Numpy Array
            
        Returns:
            Vorhergesagte Klasse
        """
        self.model.eval()
        
        # Bild laden
        if isinstance(source, str):
            if not os.path.exists(source):
                raise FileNotFoundError(f"{source} does not exist")
            image = Image.open(source).convert('RGB')
        elif isinstance(source, np.ndarray):
            if source.ndim == 2:
                source = np.stack([source] * 3, axis=-1)
            elif source.ndim == 3 and source.shape[2] == 1:
                source = np.repeat(source, 3, axis=2)
            image = Image.fromarray(source.astype('uint8')).convert('RGB')
        else:
            raise TypeError("Input must be a file path (str) or image array (np.ndarray)")
        
        # Transform und Prediction
        input_tensor = self.val_transform(image).unsqueeze(0).to(self.device)
        
        with torch.no_grad():
            outputs = self.model(input_tensor)
            probs = F.softmax(outputs, dim=1)
            confidence, predicted = torch.max(probs, 1)
        
        if confidence.item() < self.config.PREDICTION_THRESHOLD:
            return self.config.UNKNOWN_LABEL
        else:
            return self.class_names[predicted.item()]

---
### 12. UTILITY FUNKTIONEN
---

In [23]:
def quick_save_results(classifier, model_name="model"):
    """Schnelle Funktion zum Speichern von Ergebnissen nach dem Training."""
    eval_workflow = EvaluationWorkflow(classifier.config)
    return eval_workflow.save_training_results(classifier, model_name)

def quick_visualize_latest(config):
    """Schnelle Funktion zur Visualisierung der neuesten Ergebnisse."""
    eval_workflow = EvaluationWorkflow(config)
    
    available = eval_workflow.metrics_manager.list_saved_results()
    if available:
        return eval_workflow.load_and_visualize(available[0]['file'])
    else:
        print("No saved results found!")
        return None

def quick_compare_all(config):
    """Schnelle Funktion zum Vergleich aller verfügbaren Models."""
    eval_workflow = EvaluationWorkflow(config)
    
    available = eval_workflow.metrics_manager.list_saved_results()
    if len(available) >= 2:
        files = [r['file'] for r in available[:5]]  # Max 5 Models
        return eval_workflow.compare_models(files)
    else:
        print("Need at least 2 saved results for comparison!")
        return None

def evaluate_model_from_file(model_path: str, config: Config = None, model_name: str = "loaded_model"):
    """
    Standalone Funktion zur Evaluation eines gespeicherten Models.
    
    Args:
        model_path: Pfad zur .pth Model-Datei
        config: Config object (wird erstellt falls None)
        model_name: Name für die Speicherung
        
    Returns:
        Evaluationsergebnisse
    """
    if config is None:
        config = Config()
    
    evaluator = StandaloneModelEvaluator(config)
    return evaluator.evaluate_saved_model(model_path, model_name)

def show_confusion_matrix_for_loaded_model(model_path):
    """Confusion Matrix für bereits geladenes Model."""
    
    config = Config()
    classifier = StreetFoodClassifier(config)
    
    # Model laden statt trainieren
    classifier.load_and_evaluate(model_path)
    
    # Evaluation und Confusion Matrix
    val_results = classifier.trainer.evaluate(classifier.val_loader)
    classifier.visualizer.plot_confusion_matrix(
        y_true=val_results['labels'],
        y_pred=val_results['predictions'],
        class_names=classifier.class_names,
        title=f"Results for {model_path}",
        save=True
    )

class NumpyEncoder(json.JSONEncoder):
    """Custom JSON Encoder für NumPy Datentypen."""
    
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, np.bool_):
            return bool(obj)
        return super().default(obj)

def convert_numpy_types(obj: Any) -> Any:
    """
    Konvertiert NumPy Datentypen rekursiv zu Python Standard-Typen.
    
    Args:
        obj: Objekt das konvertiert werden soll
        
    Returns:
        Konvertiertes Objekt mit Python Standard-Typen
    """
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, np.bool_):
        return bool(obj)
    elif isinstance(obj, dict):
        return {key: convert_numpy_types(value) for key, value in obj.items()}
    elif isinstance(obj, (list, tuple)):
        return [convert_numpy_types(item) for item in obj]
    else:
        return obj


---
### 13. MAIN APPLICATION CLASS
---

In [15]:
class StreetFoodClassifier:
    """Hauptapplikation für Street Food Klassifikation."""
    
    def __init__(self, config: Config):
        self.config = config
        self.logger = setup_logger()
        
        # Setup
        seed_everything(config.SEED)
        self.device = get_device()
        
        # Components
        self.data_manager = DataManager(config) 
        self.visualizer = Visualizer(config)
        
        # Daten laden
        (self.train_loader, self.val_loader, 
         self.num_classes, self.class_names) = self.data_manager.create_dataloaders()
        
        # Model Setup
        self.model_manager = ModelManager(config, self.num_classes, self.device)
        self.model = self.model_manager.create_model()
        self.optimizer = self.model_manager.create_optimizer(self.model)
        
        # Trainer
        self.trainer = Trainer(config, self.model, self.optimizer, self.device)
        
        # Predictor (wird nach Training/Loading erstellt)
        self.predictor = None
    
    def train(self) -> Dict:
        """
        Startet das Training.
        
        Returns:
            Training history
        """
        self.logger.info("Starting training...")
        history = self.trainer.fit(self.train_loader, self.val_loader)
        self.logger.info("Training completed!")
        
        # Predictor erstellen
        _, val_transform = self.data_manager.get_transforms()
        self.predictor = Predictor(
            self.model, val_transform, self.class_names, self.device, self.config
        )
        
        return history
    
    def load_and_evaluate(self, model_path: str) -> None:
        """
        Lädt Model und evaluiert es.
        
        Args:
            model_path: Pfad zum Model
        """
        self.model = self.model_manager.load_model(self.model, model_path)
        
        # Predictor erstellen
        _, val_transform = self.data_manager.get_transforms()
        self.predictor = Predictor(
            self.model, val_transform, self.class_names, self.device, self.config
        )
        
        # Evaluation
        test_dataset = ImageFolder(root=self.config.DATA_FOLDER, transform=val_transform)
        test_loader = DataLoader(
            test_dataset, batch_size=self.config.BATCH_SIZE, 
            shuffle=False, num_workers=4
        )
        
        metrics = self.trainer.evaluate(test_loader)
        self.logger.info(f"Test Results: {metrics}")
    
    def predict(self, image_path: str) -> str:
        """
        Sagt Klasse für ein Bild vorher.
        
        Args:
            image_path: Pfad zum Bild
            
        Returns:
            Vorhergesagte Klasse
        """
        if self.predictor is None:
            raise RuntimeError("Model not trained or loaded yet!")
        
        return self.predictor.predict_image(image_path)

---
### 14. BEISPIEL NUTZUNGEN
---

In [16]:
def example_after_training(classifier):
    """Beispiel: Nach dem Training Ergebnisse speichern und visualisieren."""
    
    # 1. Ergebnisse speichern
    train_file, val_file = quick_save_results(classifier, "my_resnet18_experiment")
    
    # 2. Sofort visualisieren
    val_results = classifier.trainer.evaluate(classifier.val_loader)
    classifier.visualizer.plot_confusion_matrix(
        val_results['labels'], 
        val_results['predictions'], 
        classifier.class_names,
        title="Current Training Results"
    )

def example_load_and_analyze():
    """Beispiel: Gespeicherte Ergebnisse laden und analysieren."""
    
    config = Config()
    
    # 1. Verfügbare Ergebnisse anzeigen
    eval_workflow = EvaluationWorkflow(config)
    available_results = eval_workflow.metrics_manager.list_saved_results()
    
    # 2. Neuestes Ergebnis visualisieren
    if available_results:
        latest_result = available_results[0]['file']
        eval_workflow.load_and_visualize(latest_result)

def example_evaluate_saved_model():
    """Beispiel: Gespeichertes Model komplett neu evaluieren."""
    
    config = Config()
    
    # Model von Datei laden und komplett evaluieren
    results = evaluate_model_from_file(
        model_path="best_f1_model.pth",
        config=config,
        model_name="best_f1_loaded"
    )
    
    print("Evaluation completed!")
    print(f"Validation Accuracy: {results['validation']['accuracy']:.4f}")
    print(f"Training Accuracy: {results['training']['accuracy']:.4f}")

def example_compare_models():
    """Beispiel: Mehrere Models vergleichen."""
    
    config = Config()
    eval_workflow = EvaluationWorkflow(config)
    
    # Alle verfügbaren Ergebnisse vergleichen
    available = eval_workflow.metrics_manager.list_saved_results()
    if len(available) >= 2:
        files = [r['file'] for r in available[:3]]  # Top 3 vergleichen
        eval_workflow.compare_models(files)

---
### 15. Main
---