## double

In [None]:
"""ViT + CNN-CBAM + Fusion + Quantum Hybrid Network
ERBMAHE Dataset Classification
Binary Classification: Abnormal vs Normal

MODIFICATIONS:
- Using ORIGINAL 2D-CBAM (Channel + Spatial Attention on 2D feature maps)
- Train whole model from epoch 1 (no staged freezing)
- Keeps Focal Loss, EarlyStopping, CosineAnnealingLR, Quantum layer, everything else.
"""

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import seaborn as sns
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Import PyTorch & torchvision
try:
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.utils.data import Dataset, DataLoader
    from torchvision import transforms, models
    print("‚úì PyTorch & torchvision imported successfully")
except Exception as e:
    print(f"‚ùå Error importing PyTorch/torchvision: {e}")
    sys.exit(1)

# Transformers (ViT)
try:
    from transformers import ViTForImageClassification, ViTImageProcessor, ViTModel
    print("‚úì Transformers imported successfully")
except ImportError:
    print("üì¶ Installing Transformers...")
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers"])
    from transformers import ViTForImageClassification, ViTImageProcessor, ViTModel
    print("‚úì Transformers installed successfully")

# PennyLane (quantum)
try:
    import pennylane as qml
    print("‚úì PennyLane imported successfully")
except ImportError:
    print("üì¶ Installing PennyLane...")
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "pennylane"])
    import pennylane as qml
    print("‚úì PennyLane installed successfully")

# Check GPU
if not torch.cuda.is_available():
    raise RuntimeError("CUDA is not available. Please run this script on a machine with CUDA-enabled GPU and proper drivers.")
device = torch.device('cuda')
print(f"üîß Using device: {device}")
print(f"üîß PyTorch version: {torch.__version__}")

# Config
MODEL_NAME = "google/vit-base-patch16-224"
CLASSES = ['Abnormal', 'Normal']
N_QUBITS = 4  # quantum qubits

# ============================================================================
# ORIGINAL 2D-CBAM (Channel Attention + Spatial Attention)
# ============================================================================
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        
        self.fc = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False)
        )
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        out = avg_out + max_out
        return self.sigmoid(out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        padding = (kernel_size - 1) // 2
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x_cat = torch.cat([avg_out, max_out], dim=1)
        out = self.conv(x_cat)
        return self.sigmoid(out)

class CBAM(nn.Module):
    def __init__(self, in_channels, reduction=16, kernel_size=7):
        super(CBAM, self).__init__()
        self.channel_attention = ChannelAttention(in_channels, reduction)
        self.spatial_attention = SpatialAttention(kernel_size)
    
    def forward(self, x):
        # Channel attention
        x = x * self.channel_attention(x)
        # Spatial attention
        x = x * self.spatial_attention(x)
        return x

# ============================================================================
# CNN + CBAM BRANCH (ResNet18 backbone -> CBAM -> Global Pool)
# ============================================================================
class CNN_CBAM_Branch(nn.Module):
    def __init__(self, pretrained=True):
        super(CNN_CBAM_Branch, self).__init__()
        resnet = models.resnet18(pretrained=pretrained)
        # Extract features up to layer4 (before avgpool)
        # ResNet structure: conv1, bn1, relu, maxpool, layer1, layer2, layer3, layer4, avgpool, fc
        self.backbone = nn.Sequential(*list(resnet.children())[:-2])  # up to layer4 (outputs 512 x 7 x 7)
        
        # Apply CBAM on the 2D feature maps
        self.cbam = CBAM(512, reduction=16, kernel_size=7)
        
        # Global average pooling to get 1D features
        self.global_pool = nn.AdaptiveAvgPool2d(1)
    
    def forward(self, x):
        # x: (B, 3, H, W)
        feat = self.backbone(x)          # (B, 512, 7, 7)
        feat = self.cbam(feat)           # (B, 512, 7, 7) with attention applied
        feat = self.global_pool(feat)    # (B, 512, 1, 1)
        feat = feat.view(feat.size(0), -1)  # (B, 512)
        return feat

# ============================================================================
# Quantum Circuit & Layer
# ============================================================================
class QuantumCircuit:
    def __init__(self, n_qubits=4):
        self.n_qubits = n_qubits
        self.dev = qml.device('default.qubit', wires=n_qubits)
    
    def circuit(self, inputs, weights):
        for i in range(self.n_qubits):
            qml.RX(inputs[i], wires=i)
            qml.RZ(inputs[i], wires=i)
        for i in range(self.n_qubits - 1):
            qml.CRX(weights[i], wires=[i, i+1])
        qml.CRX(weights[self.n_qubits-1], wires=[self.n_qubits-1, 0])
        return [qml.expval(qml.PauliZ(i)) for i in range(self.n_qubits)]
    
    def create_qnode(self, weights):
        @qml.qnode(self.dev, interface='torch')
        def qnode(inputs):
            return self.circuit(inputs, weights)
        return qnode

class QuantumLayer(nn.Module):
    def __init__(self, input_dim, n_qubits=4):
        super(QuantumLayer, self).__init__()
        self.n_qubits = n_qubits
        self.feature_compress = nn.Linear(input_dim, n_qubits)
        self.q_weights = nn.Parameter(torch.randn(n_qubits) * 0.01)
        self.qc = QuantumCircuit(n_qubits)
        self.feature_expand = nn.Linear(n_qubits, input_dim)
        self.skip_alpha = nn.Parameter(torch.tensor(0.1))
    
    def forward(self, x):
        identity = x
        batch_size = x.size(0)
        x_compressed = self.feature_compress(x)  # (B, n_qubits)
        qnode = self.qc.create_qnode(self.q_weights)
        q_out_list = []
        for i in range(batch_size):
            q_input = torch.tanh(x_compressed[i]) * np.pi
            q_result = torch.stack(qnode(q_input))
            q_result = q_result.float()
            q_out_list.append(q_result)
        q_out = torch.stack(q_out_list)  # (B, n_qubits)
        output = self.feature_expand(q_out)  # (B, input_dim)
        output = output + self.skip_alpha * identity
        return output

# ============================================================================
# Fusion Hybrid Model (ViT + CNN-CBAM -> Fusion -> Quantum -> Classifier)
# ============================================================================
class ViTQuantumHybrid(nn.Module):
    def __init__(self, model_name, num_classes=2, n_qubits=4):
        super(ViTQuantumHybrid, self).__init__()
        # ViT backbone
        self.vit = ViTModel.from_pretrained(model_name)
        vit_dim = self.vit.config.hidden_size  # typically 768
        
        # CNN branch with 2D-CBAM
        self.cnn_branch = CNN_CBAM_Branch(pretrained=True)
        cnn_dim = 512
        
        # Fusion projection
        self.fusion_proj = nn.Linear(vit_dim + cnn_dim, vit_dim)
        
        # Quantum enhancement
        self.quantum_layer = QuantumLayer(vit_dim, n_qubits=n_qubits)
        
        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(vit_dim, vit_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(vit_dim // 2, num_classes)
        )
    
    def forward(self, pixel_values):
        """
        pixel_values: tensor (B, 3, H, W) ‚Äî produced by ViTImageProcessor
        """
        # ViT features
        vit_out = self.vit(pixel_values=pixel_values)
        vit_features = vit_out.last_hidden_state[:, 0]  # (B, vit_dim)

        # CNN features with 2D-CBAM attention
        cnn_features = self.cnn_branch(pixel_values)    # (B, 512)

        # Fuse and project
        fused = torch.cat([vit_features, cnn_features], dim=1)  # (B, vit_dim + 512)
        fused = self.fusion_proj(fused)  # (B, vit_dim)

        # Quantum enhancement
        quantum_features = self.quantum_layer(fused)  # (B, vit_dim)
        
        # Classification
        logits = self.classifier(quantum_features)
        return logits

# ============================================================================
# Focal Loss
# ============================================================================
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
    
    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        p_t = torch.exp(-ce_loss)
        focal_loss = (1 - p_t) ** self.gamma * ce_loss
        
        if self.alpha is not None:
            if isinstance(self.alpha, (float, int)):
                alpha_t = self.alpha
            else:
                alpha_t = self.alpha[targets]
            focal_loss = alpha_t * focal_loss
        
        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

# ============================================================================
# Early Stopping
# ============================================================================
class EarlyStopping:
    def __init__(self, patience=10, min_delta=0, mode='max', verbose=True):
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_epoch = 0
    
    def __call__(self, current_score, epoch):
        if self.best_score is None:
            self.best_score = current_score
            self.best_epoch = epoch
            if self.verbose:
                print(f"  ‚úì Initial best score: {current_score:.4f}")
            return False
        
        if self.mode == 'max':
            improved = current_score > (self.best_score + self.min_delta)
        else:
            improved = current_score < (self.best_score - self.min_delta)
        
        if improved:
            self.best_score = current_score
            self.best_epoch = epoch
            self.counter = 0
            if self.verbose:
                print(f"  ‚úì New best score: {current_score:.4f}")
        else:
            self.counter += 1
            if self.verbose:
                print(f"  No improvement. Patience: {self.counter}/{self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
                if self.verbose:
                    print(f"\n‚ö†Ô∏è Early stopping triggered!")
                    print(f"   Best score: {self.best_score:.4f} at epoch {self.best_epoch}")
        return self.early_stop

# ============================================================================
# DATASET
# ============================================================================
class ERBMAHEDataset(Dataset):
    def __init__(self, dataframe, processor, augment=False):
        self.df = dataframe.reset_index(drop=True)
        self.processor = processor
        self.augment = augment
        self.aug_transform = transforms.Compose([
            transforms.RandomRotation(10),
            transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
            transforms.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.0),
            transforms.RandomHorizontalFlip(p=0.5),
        ])
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_path = self.df.loc[idx, 'image_path']
        label = self.df.loc[idx, 'class_id']
        image = Image.open(img_path).convert('RGB')
        
        if self.augment:
            image = self.aug_transform(image)
        
        inputs = self.processor(images=image, return_tensors="pt")
        pixel_values = inputs['pixel_values'].squeeze(0)
        return pixel_values, label

# ============================================================================
# TRAIN / VALIDATION FUNCTIONS
# ============================================================================

def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    pbar = tqdm(dataloader, desc='Training')
    for pixel_values, labels in pbar:
        pixel_values, labels = pixel_values.to(device), labels.to(device)
        
        optimizer.zero_grad()
        logits = model(pixel_values)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = logits.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        
        pbar.set_postfix({
            'loss': f'{running_loss/(pbar.n+1):.4f}',
            'acc': f'{100.*correct/total:.2f}%'
        })
    
    _, _, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted', zero_division=0)
    return running_loss / len(dataloader), 100. * correct / total, f1

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        pbar = tqdm(dataloader, desc='Validation')
        for pixel_values, labels in pbar:
            pixel_values, labels = pixel_values.to(device), labels.to(device)
            logits = model(pixel_values)
            loss = criterion(logits, labels)
            
            running_loss += loss.item()
            _, predicted = logits.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            pbar.set_postfix({
                'loss': f'{running_loss/(pbar.n+1):.4f}',
                'acc': f'{100.*correct/total:.2f}%'
            })
    
    _, _, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted', zero_division=0)
    return running_loss / len(dataloader), 100. * correct / total, f1, all_preds, all_labels

# ============================================================================
# MAIN
# ============================================================================

def main():
    print("="*70)
    print("ViT + CNN-2D-CBAM + Fusion + Quantum Hybrid Network")
    print("ERBMAHE Dataset Classification")
    print("Binary Classification: Abnormal vs Normal")
    print("USING ORIGINAL 2D-CBAM + FOCAL LOSS + QUANTUM ENHANCEMENT")
    print("="*70)

    # Dataset path (update if needed)
    data_path = 'D:/training/archive/ICMR_datasets_ERBMAHE'

    print(f"\nü§ñ Base Model: {MODEL_NAME}")
    print(f"‚öõÔ∏è  Quantum Qubits: {N_QUBITS}")
    print(f"üìä Classes: {CLASSES}")

    print("\nüìÅ Loading dataset...")
    data_list = []
    for class_name in CLASSES:
        class_path = os.path.join(data_path, class_name)
        if os.path.exists(class_path):
            for img_file in os.listdir(class_path):
                if img_file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                    data_list.append({
                        'image_path': os.path.join(class_path, img_file),
                        'label': class_name,
                        'class_id': CLASSES.index(class_name)
                    })
        else:
            print(f"‚ö†Ô∏è Warning: {class_path} not found!")
    
    df = pd.DataFrame(data_list)
    print(f"üìä Total images: {len(df)}")
    print("\nClass distribution:")
    print(df['label'].value_counts())

    # Class weights for focal loss
    class_counts = df['label'].value_counts().sort_index().values
    total_samples = len(df)
    class_weights = total_samples / (len(CLASSES) * class_counts)
    class_weights = torch.FloatTensor(class_weights).to(device)
    print(f"\n‚öñÔ∏è Class weights for Focal Loss:")
    for i, class_name in enumerate(CLASSES):
        print(f"  {class_name}: {class_weights[i]:.4f}")

    # Splits
    temp_df, test_df = train_test_split(df, test_size=0.10, stratify=df['class_id'], random_state=42)
    train_df, val_df = train_test_split(temp_df, test_size=0.111111, stratify=temp_df['class_id'], random_state=42)
    print(f"\nüìä Dataset split:")
    print(f"  Train: {len(train_df)} ({len(train_df)/len(df)*100:.1f}%)")
    print(f"  Val:   {len(val_df)} ({len(val_df)/len(df)*100:.1f}%)")
    print(f"  Test:  {len(test_df)} ({len(test_df)/len(df)*100:.1f}%)")

    # ViT processor
    print(f"\nüîß Loading ViT processor...")
    processor = ViTImageProcessor.from_pretrained(MODEL_NAME)

    # Datasets & dataloaders
    train_dataset = ERBMAHEDataset(train_df, processor=processor, augment=True)
    val_dataset = ERBMAHEDataset(val_df, processor=processor, augment=False)
    test_dataset = ERBMAHEDataset(test_df, processor=processor, augment=False)
    
    batch_size = 16
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

    # Create model
    print(f"\nü§ñ Creating Hybrid ViT + CNN-2D-CBAM + Fusion + Quantum Model...")
    model = ViTQuantumHybrid(model_name=MODEL_NAME, num_classes=len(CLASSES), n_qubits=N_QUBITS).to(device)

    # Ensure whole model is trainable from epoch 1
    for p in model.parameters():
        p.requires_grad = True

    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    quantum_params = sum(p.numel() for p in model.quantum_layer.parameters())

    print(f"‚úì Total parameters: {total_params:,}")
    print(f"‚úì Trainable parameters: {trainable_params:,}")
    print(f"‚úì Quantum layer parameters: {quantum_params:,}")
    print(f"‚úì Model size: {total_params * 4 / 1024 / 1024:.2f} MB")

    # Criterion
    criterion = FocalLoss(alpha=class_weights, gamma=2.0, reduction='mean')

    # Build optimizer
    def build_optimizer(model):
        groups = []
        # ViT params
        vit_params = [p for p in model.vit.parameters() if p.requires_grad]
        if vit_params:
            groups.append({'params': vit_params, 'lr': 1e-5})
        # CNN branch
        cnn_params = [p for p in model.cnn_branch.parameters() if p.requires_grad]
        if cnn_params:
            groups.append({'params': cnn_params, 'lr': 2e-4})
        # Quantum layer
        q_params = [p for p in model.quantum_layer.parameters() if p.requires_grad]
        if q_params:
            groups.append({'params': q_params, 'lr': 5e-5})
        # Classifier
        clf_params = [p for p in model.classifier.parameters() if p.requires_grad]
        if clf_params:
            groups.append({'params': clf_params, 'lr': 1e-4})
        # Fallback
        if not groups:
            groups = [{'params': [p for p in model.parameters() if p.requires_grad], 'lr': 1e-4}]
        opt = torch.optim.AdamW(groups, weight_decay=0.01)
        return opt

    optimizer = build_optimizer(model)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-6)
    early_stopping = EarlyStopping(patience=10, min_delta=0.001, mode='max', verbose=True)

    num_epochs = 50
    best_val_acc = 0.0

    print(f"\n‚öôÔ∏è Training configuration:")
    print(f"  Epochs: {num_epochs}")
    print(f"  Batch size: {batch_size}")
    print(f"  Training: Whole model from epoch 1")
    print(f"  CBAM Type: Original 2D-CBAM (Channel + Spatial Attention)")
    print(f"  Learning rate (ViT backbone): 1e-5")
    print(f"  Learning rate (CNN backbone): 2e-4")
    print(f"  Learning rate (Quantum layer): 5e-5")
    print(f"  Learning rate (Classifier): 1e-4")
    print(f"  Scheduler: CosineAnnealingLR")
    print(f"  Early stopping patience: {early_stopping.patience}")
    print(f"  Loss function: Focal Loss (gamma=2.0)")

    # History
    history = {
        'train_loss': [], 'train_acc': [], 'train_f1': [],
        'val_loss': [], 'val_acc': [], 'val_f1': []
    }

    # TRAINING LOOP
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print("-" * 70)

        train_loss, train_acc, train_f1 = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc, val_f1, _, _ = validate(model, val_loader, criterion, device)

        scheduler.step()

        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['train_f1'].append(train_f1)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['val_f1'].append(val_f1)

        current_lr = optimizer.param_groups[0]['lr']
        print(f"\nResults:")
        print(f"  Train - Loss: {train_loss:.4f}, Acc: {train_acc:.2f}%, F1: {train_f1:.4f}")
        print(f"  Val   - Loss: {val_loss:.4f}, Acc: {val_acc:.2f}%, F1: {val_f1:.4f}")
        print(f"  Learning Rate (group0): {current_lr:.2e}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'vit_quantum_hybrid_2d_cbam_best.pth')
            print(f"  üíæ Best model saved! (Val Acc: {best_val_acc:.2f}%)")

        if early_stopping(val_acc, epoch+1):
            print(f"\nüõë Training stopped early at epoch {epoch+1}")
            print(f"   Best validation accuracy: {early_stopping.best_score:.2f}% at epoch {early_stopping.best_epoch}")
            break

    # TEST EVALUATION
    print("\n" + "="*70)
    print("üìä EVALUATING ON TEST SET")
    print("="*70)
    model.load_state_dict(torch.load('vit_quantum_hybrid_2d_cbam_best.pth'))
    test_loss, test_acc, test_f1, y_pred, y_true = validate(model, test_loader, criterion, device)

    precision, recall, f1, support = precision_recall_fscore_support(y_true, y_pred, average='weighted')

    print(f"\nüìà Test Results:")
    print(f"  Accuracy:  {test_acc:.2f}%")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall:    {recall:.4f}")
    print(f"  F1 Score:  {f1:.4f}")

    # Per-class metrics
    print("\nüìä Per-Class Performance:")
    p_class, r_class, f1_class, s_class = precision_recall_fscore_support(y_true, y_pred, average=None)
    for i, class_name in enumerate(CLASSES):
        print(f"\n{class_name}:")
        print(f"  Precision: {p_class[i]:.4f}")
        print(f"  Recall:    {r_class[i]:.4f}")
        print(f"  F1 Score:  {f1_class[i]:.4f}")
        print(f"  Support:   {s_class[i]}")

    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=CLASSES, yticklabels=CLASSES)
    plt.title('ViT + CNN-2D-CBAM + Fusion + Quantum - Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.savefig('vit_quantum_2d_cbam_confusion_matrix.png', dpi=150)
    print("\n‚úì Confusion matrix saved")

    # Training history plots
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    axes[0].plot(history['train_loss'], label='Train', marker='o')
    axes[0].plot(history['val_loss'], label='Val', marker='s')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].set_title('Training and Validation Loss')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)

    axes[1].plot(history['train_acc'], label='Train', marker='o')
    axes[1].plot(history['val_acc'], label='Val', marker='s')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Accuracy (%)')
    axes[1].set_title('Training and Validation Accuracy')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)

    axes[2].plot(history['train_f1'], label='Train', marker='o')
    axes[2].plot(history['val_f1'], label='Val', marker='s')
    axes[2].set_xlabel('Epoch')
    axes[2].set_ylabel('F1 Score')
    axes[2].set_title('Training and Validation F1 Score')
    axes[2].legend()
    axes[2].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig('vit_quantum_2d_cbam_training_history.png', dpi=150)
    print("‚úì Training history saved")

    # Specificity calculation
    tn = cm[0, 0]
    fp = cm[0, 1]
    fn = cm[1, 0]
    tp = cm[1, 1]
    specificity_normal = tn / (tn + fp) if (tn + fp) > 0 else 0
    specificity_abnormal = tp / (tp + fn) if (tp + fn) > 0 else 0
    avg_specificity = (specificity_normal + specificity_abnormal) / 2

    print(f"\nüìä Additional Metrics:")
    print(f"  Specificity (Normal):   {specificity_normal:.4f}")
    print(f"  Specificity (Abnormal): {specificity_abnormal:.4f}")
    print(f"  Average Specificity:    {avg_specificity:.4f}")

    print("\n" + "="*70)
    print("‚úÖ TRAINING COMPLETE")
    print("="*70)
    print(f"\nüìä Final Summary:")
    print(f"  Model: ViT + CNN-2D-CBAM + Fusion + Quantum ({N_QUBITS} qubits)")
    print(f"  CBAM Type: Original 2D-CBAM (Channel + Spatial Attention)")
    print(f"  Dataset: ERBMAHE (Abnormal vs Normal)")
    print(f"  Loss Function: Focal Loss (gamma=2.0)")
    print(f"  Test Accuracy: {test_acc:.2f}%")
    print(f"  Test F1 Score: {test_f1:.4f}")
    print(f"  Best Val Accuracy: {best_val_acc:.2f}%")
    print(f"  Total Parameters: {total_params:,}")
    print(f"  Quantum Parameters: {quantum_params:,}")
    print("\n" + "="*70)

if __name__ == '__main__':
    main()

## KFOLD 2DCBAM

In [1]:
"""ViT + CNN-CBAM + Fusion + Quantum Hybrid Network
ERBMAHE Dataset Classification
Binary Classification: Abnormal vs Normal

METHOD 2 K-FOLD WITH SEPARATE TEST SET:
- 10% holdout test set (never used during training)
- K-Fold cross-validation on remaining 90%
- Validation-based early stopping
- Best fold model evaluated on test set
"""

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import seaborn as sns
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Import PyTorch & torchvision
try:
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.utils.data import Dataset, DataLoader
    from torchvision import transforms, models
    print("‚úì PyTorch & torchvision imported successfully")
except Exception as e:
    print(f"‚ùå Error importing PyTorch/torchvision: {e}")
    sys.exit(1)

# Transformers (ViT)
try:
    from transformers import ViTForImageClassification, ViTImageProcessor, ViTModel
    print("‚úì Transformers imported successfully")
except ImportError:
    print("üì¶ Installing Transformers...")
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers"])
    from transformers import ViTForImageClassification, ViTImageProcessor, ViTModel
    print("‚úì Transformers installed successfully")

# PennyLane (quantum)
try:
    import pennylane as qml
    print("‚úì PennyLane imported successfully")
except ImportError:
    print("üì¶ Installing PennyLane...")
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "pennylane"])
    import pennylane as qml
    print("‚úì PennyLane installed successfully")

# Check GPU
if not torch.cuda.is_available():
    raise RuntimeError("CUDA is not available. Please run this script on a machine with CUDA-enabled GPU and proper drivers.")
device = torch.device('cuda')
print(f"üîß Using device: {device}")
print(f"üîß PyTorch version: {torch.__version__}")

# Config
MODEL_NAME = "google/vit-base-patch16-224"
CLASSES = ['Abnormal', 'Normal']
N_QUBITS = 4
N_FOLDS = 5

# ============================================================================
# ORIGINAL 2D-CBAM (Channel Attention + Spatial Attention)
# ============================================================================
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        
        self.fc = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False)
        )
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        out = avg_out + max_out
        return self.sigmoid(out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        padding = (kernel_size - 1) // 2
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x_cat = torch.cat([avg_out, max_out], dim=1)
        out = self.conv(x_cat)
        return self.sigmoid(out)

class CBAM(nn.Module):
    def __init__(self, in_channels, reduction=16, kernel_size=7):
        super(CBAM, self).__init__()
        self.channel_attention = ChannelAttention(in_channels, reduction)
        self.spatial_attention = SpatialAttention(kernel_size)
    
    def forward(self, x):
        x = x * self.channel_attention(x)
        x = x * self.spatial_attention(x)
        return x

# ============================================================================
# CNN + CBAM BRANCH
# ============================================================================
class CNN_CBAM_Branch(nn.Module):
    def __init__(self, pretrained=True):
        super(CNN_CBAM_Branch, self).__init__()
        resnet = models.resnet18(pretrained=pretrained)
        self.backbone = nn.Sequential(*list(resnet.children())[:-2])
        self.cbam = CBAM(512, reduction=16, kernel_size=7)
        self.global_pool = nn.AdaptiveAvgPool2d(1)
    
    def forward(self, x):
        feat = self.backbone(x)
        feat = self.cbam(feat)
        feat = self.global_pool(feat)
        feat = feat.view(feat.size(0), -1)
        return feat

# ============================================================================
# Quantum Circuit & Layer
# ============================================================================
class QuantumCircuit:
    def __init__(self, n_qubits=4):
        self.n_qubits = n_qubits
        self.dev = qml.device('default.qubit', wires=n_qubits)
    
    def circuit(self, inputs, weights):
        for i in range(self.n_qubits):
            qml.RX(inputs[i], wires=i)
            qml.RZ(inputs[i], wires=i)
        for i in range(self.n_qubits - 1):
            qml.CRX(weights[i], wires=[i, i+1])
        qml.CRX(weights[self.n_qubits-1], wires=[self.n_qubits-1, 0])
        return [qml.expval(qml.PauliZ(i)) for i in range(self.n_qubits)]
    
    def create_qnode(self, weights):
        @qml.qnode(self.dev, interface='torch')
        def qnode(inputs):
            return self.circuit(inputs, weights)
        return qnode

class QuantumLayer(nn.Module):
    def __init__(self, input_dim, n_qubits=4):
        super(QuantumLayer, self).__init__()
        self.n_qubits = n_qubits
        self.feature_compress = nn.Linear(input_dim, n_qubits)
        self.q_weights = nn.Parameter(torch.randn(n_qubits) * 0.01)
        self.qc = QuantumCircuit(n_qubits)
        self.feature_expand = nn.Linear(n_qubits, input_dim)
        self.skip_alpha = nn.Parameter(torch.tensor(0.1))
    
    def forward(self, x):
        identity = x
        batch_size = x.size(0)
        x_compressed = self.feature_compress(x)
        qnode = self.qc.create_qnode(self.q_weights)
        q_out_list = []
        for i in range(batch_size):
            q_input = torch.tanh(x_compressed[i]) * np.pi
            q_result = torch.stack(qnode(q_input))
            q_result = q_result.float()
            q_out_list.append(q_result)
        q_out = torch.stack(q_out_list)
        output = self.feature_expand(q_out)
        output = output + self.skip_alpha * identity
        return output

# ============================================================================
# Fusion Hybrid Model
# ============================================================================
class ViTQuantumHybrid(nn.Module):
    def __init__(self, model_name, num_classes=2, n_qubits=4):
        super(ViTQuantumHybrid, self).__init__()
        self.vit = ViTModel.from_pretrained(model_name)
        vit_dim = self.vit.config.hidden_size
        self.cnn_branch = CNN_CBAM_Branch(pretrained=True)
        cnn_dim = 512
        self.fusion_proj = nn.Linear(vit_dim + cnn_dim, vit_dim)
        self.quantum_layer = QuantumLayer(vit_dim, n_qubits=n_qubits)
        self.classifier = nn.Sequential(
            nn.Linear(vit_dim, vit_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(vit_dim // 2, num_classes)
        )
    
    def forward(self, pixel_values):
        vit_out = self.vit(pixel_values=pixel_values)
        vit_features = vit_out.last_hidden_state[:, 0]
        cnn_features = self.cnn_branch(pixel_values)
        fused = torch.cat([vit_features, cnn_features], dim=1)
        fused = self.fusion_proj(fused)
        quantum_features = self.quantum_layer(fused)
        logits = self.classifier(quantum_features)
        return logits

# ============================================================================
# Focal Loss
# ============================================================================
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
    
    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        p_t = torch.exp(-ce_loss)
        focal_loss = (1 - p_t) ** self.gamma * ce_loss
        if self.alpha is not None:
            if isinstance(self.alpha, (float, int)):
                alpha_t = self.alpha
            else:
                alpha_t = self.alpha[targets]
            focal_loss = alpha_t * focal_loss
        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

# ============================================================================
# Early Stopping
# ============================================================================
class EarlyStopping:
    def __init__(self, patience=10, min_delta=0, mode='max', verbose=True):
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_epoch = 0
    
    def __call__(self, current_score, epoch):
        if self.best_score is None:
            self.best_score = current_score
            self.best_epoch = epoch
            if self.verbose:
                print(f"  ‚úì Initial best score: {current_score:.4f}")
            return False
        if self.mode == 'max':
            improved = current_score > (self.best_score + self.min_delta)
        else:
            improved = current_score < (self.best_score - self.min_delta)
        if improved:
            self.best_score = current_score
            self.best_epoch = epoch
            self.counter = 0
            if self.verbose:
                print(f"  ‚úì New best score: {current_score:.4f}")
        else:
            self.counter += 1
            if self.verbose:
                print(f"  No improvement. Patience: {self.counter}/{self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
                if self.verbose:
                    print(f"\n‚ö†Ô∏è Early stopping triggered!")
                    print(f"   Best score: {self.best_score:.4f} at epoch {self.best_epoch}")
        return self.early_stop

# ============================================================================
# DATASET
# ============================================================================
class ERBMAHEDataset(Dataset):
    def __init__(self, dataframe, processor, augment=False):
        self.df = dataframe.reset_index(drop=True)
        self.processor = processor
        self.augment = augment
        self.aug_transform = transforms.Compose([
            transforms.RandomRotation(10),
            transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
            transforms.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.0),
            transforms.RandomHorizontalFlip(p=0.5),
        ])
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_path = self.df.loc[idx, 'image_path']
        label = self.df.loc[idx, 'class_id']
        image = Image.open(img_path).convert('RGB')
        if self.augment:
            image = self.aug_transform(image)
        inputs = self.processor(images=image, return_tensors="pt")
        pixel_values = inputs['pixel_values'].squeeze(0)
        return pixel_values, label

# ============================================================================
# TRAIN / VALIDATION FUNCTIONS
# ============================================================================

def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    pbar = tqdm(dataloader, desc='Training')
    for pixel_values, labels in pbar:
        pixel_values, labels = pixel_values.to(device), labels.to(device)
        optimizer.zero_grad()
        logits = model(pixel_values)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = logits.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        pbar.set_postfix({
            'loss': f'{running_loss/(pbar.n+1):.4f}',
            'acc': f'{100.*correct/total:.2f}%'
        })
    _, _, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted', zero_division=0)
    return running_loss / len(dataloader), 100. * correct / total, f1

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        pbar = tqdm(dataloader, desc='Validation')
        for pixel_values, labels in pbar:
            pixel_values, labels = pixel_values.to(device), labels.to(device)
            logits = model(pixel_values)
            loss = criterion(logits, labels)
            running_loss += loss.item()
            _, predicted = logits.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            pbar.set_postfix({
                'loss': f'{running_loss/(pbar.n+1):.4f}',
                'acc': f'{100.*correct/total:.2f}%'
            })
    _, _, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted', zero_division=0)
    return running_loss / len(dataloader), 100. * correct / total, f1, all_preds, all_labels

def calculate_metrics(y_true, y_pred):
    """Calculate comprehensive metrics including sensitivity and specificity"""
    cm = confusion_matrix(y_true, y_pred)
    
    # For binary classification
    tn, fp, fn, tp = cm.ravel()
    
    # Sensitivity (Recall) for each class
    sensitivity_class0 = tn / (tn + fn) if (tn + fn) > 0 else 0  # Normal
    sensitivity_class1 = tp / (tp + fp) if (tp + fp) > 0 else 0  # Abnormal
    
    # Specificity for each class
    specificity_class0 = tn / (tn + fp) if (tn + fp) > 0 else 0  # Normal
    specificity_class1 = tp / (tp + fn) if (tp + fn) > 0 else 0  # Abnormal
    
    # Average
    avg_sensitivity = (sensitivity_class0 + sensitivity_class1) / 2
    avg_specificity = (specificity_class0 + specificity_class1) / 2
    
    # Overall metrics
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
    accuracy = accuracy_score(y_true, y_pred)
    
    return {
        'accuracy': accuracy * 100,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'sensitivity_class0': sensitivity_class0,
        'sensitivity_class1': sensitivity_class1,
        'specificity_class0': specificity_class0,
        'specificity_class1': specificity_class1,
        'avg_sensitivity': avg_sensitivity,
        'avg_specificity': avg_specificity,
        'confusion_matrix': cm
    }

# ============================================================================
# MAIN
# ============================================================================

def main():
    print("="*70)
    print("ViT + CNN-2D-CBAM + Fusion + Quantum Hybrid Network")
    print("ERBMAHE Dataset Classification")
    print("Binary Classification: Abnormal vs Normal")
    print(f"METHOD 2: K-FOLD ({N_FOLDS} folds) WITH SEPARATE TEST SET (10%)")
    print("="*70)

    data_path = 'D:/training/archive/ICMR_datasets_ERBMAHE'

    print(f"\nü§ñ Base Model: {MODEL_NAME}")
    print(f"‚öõÔ∏è  Quantum Qubits: {N_QUBITS}")
    print(f"üìä Classes: {CLASSES}")
    print(f"üîÑ K-Fold: {N_FOLDS} folds on 90% data")
    print(f"üß™ Test Set: 10% holdout")

    print("\nüìÅ Loading dataset...")
    data_list = []
    for class_name in CLASSES:
        class_path = os.path.join(data_path, class_name)
        if os.path.exists(class_path):
            for img_file in os.listdir(class_path):
                if img_file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                    data_list.append({
                        'image_path': os.path.join(class_path, img_file),
                        'label': class_name,
                        'class_id': CLASSES.index(class_name)
                    })
        else:
            print(f"‚ö†Ô∏è Warning: {class_path} not found!")
    
    df = pd.DataFrame(data_list)
    print(f"üìä Total images: {len(df)}")
    print("\nClass distribution:")
    print(df['label'].value_counts())

    # Split: 90% for k-fold, 10% for final test
    kfold_df, test_df = train_test_split(df, test_size=0.10, stratify=df['class_id'], random_state=42)
    
    print(f"\nüìä Dataset split:")
    print(f"  K-Fold data: {len(kfold_df)} ({len(kfold_df)/len(df)*100:.1f}%)")
    print(f"  Test data:   {len(test_df)} ({len(test_df)/len(df)*100:.1f}%)")

    # Class weights for focal loss (calculated on k-fold data)
    class_counts = kfold_df['label'].value_counts().sort_index().values
    total_samples = len(kfold_df)
    class_weights = total_samples / (len(CLASSES) * class_counts)
    class_weights = torch.FloatTensor(class_weights).to(device)
    print(f"\n‚öñÔ∏è Class weights for Focal Loss:")
    for i, class_name in enumerate(CLASSES):
        print(f"  {class_name}: {class_weights[i]:.4f}")

    # ViT processor
    print(f"\nüîß Loading ViT processor...")
    processor = ViTImageProcessor.from_pretrained(MODEL_NAME)

    # K-Fold setup
    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42)
    
    fold_results = []
    all_fold_histories = []
    
    batch_size = 16
    num_epochs = 50

    print(f"\n‚öôÔ∏è Training configuration:")
    print(f"  Max epochs per fold: {num_epochs}")
    print(f"  Batch size: {batch_size}")
    print(f"  Early stopping patience: 10 epochs")
    print(f"  Learning rate (ViT): 1e-5")
    print(f"  Learning rate (CNN): 2e-4")
    print(f"  Learning rate (Quantum): 5e-5")
    print(f"  Learning rate (Classifier): 1e-4")
    print(f"  Scheduler: CosineAnnealingLR")
    print(f"  Loss: Focal Loss (gamma=2.0)")
    print(f"  Strategy: Method 2 with separate test set")

    # ============================================================================
    # K-FOLD CROSS-VALIDATION
    # ============================================================================
    
    for fold_idx, (train_idx, val_idx) in enumerate(skf.split(kfold_df, kfold_df['class_id'])):
        print("\n" + "="*70)
        print(f"FOLD {fold_idx + 1}/{N_FOLDS}")
        print("="*70)
        
        train_df = kfold_df.iloc[train_idx].reset_index(drop=True)
        val_df = kfold_df.iloc[val_idx].reset_index(drop=True)
        
        print(f"\nüìä Fold {fold_idx + 1} split:")
        print(f"  Train:      {len(train_df)} ({len(train_df)/len(kfold_df)*100:.1f}% of k-fold data)")
        print(f"  Validation: {len(val_df)} ({len(val_df)/len(kfold_df)*100:.1f}% of k-fold data)")
        
        train_dataset = ERBMAHEDataset(train_df, processor=processor, augment=True)
        val_dataset = ERBMAHEDataset(val_df, processor=processor, augment=False)
        
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
        
        print(f"\nü§ñ Creating model for Fold {fold_idx + 1}...")
        model = ViTQuantumHybrid(model_name=MODEL_NAME, num_classes=len(CLASSES), n_qubits=N_QUBITS).to(device)
        
        for p in model.parameters():
            p.requires_grad = True
        
        if fold_idx == 0:
            total_params = sum(p.numel() for p in model.parameters())
            trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
            quantum_params = sum(p.numel() for p in model.quantum_layer.parameters())
            
            print(f"‚úì Total parameters: {total_params:,}")
            print(f"‚úì Trainable parameters: {trainable_params:,}")
            print(f"‚úì Quantum layer parameters: {quantum_params:,}")
            print(f"‚úì Model size: {total_params * 4 / 1024 / 1024:.2f} MB")
        
        def build_optimizer(model):
            groups = []
            vit_params = [p for p in model.vit.parameters() if p.requires_grad]
            if vit_params:
                groups.append({'params': vit_params, 'lr': 1e-5})
            cnn_params = [p for p in model.cnn_branch.parameters() if p.requires_grad]
            if cnn_params:
                groups.append({'params': cnn_params, 'lr': 2e-4})
            q_params = [p for p in model.quantum_layer.parameters() if p.requires_grad]
            if q_params:
                groups.append({'params': q_params, 'lr': 5e-5})
            clf_params = [p for p in model.classifier.parameters() if p.requires_grad]
            if clf_params:
                groups.append({'params': clf_params, 'lr': 1e-4})
            if not groups:
                groups = [{'params': [p for p in model.parameters() if p.requires_grad], 'lr': 1e-4}]
            return torch.optim.AdamW(groups, weight_decay=0.01)
        
        criterion = FocalLoss(alpha=class_weights, gamma=2.0, reduction='mean')
        optimizer = build_optimizer(model)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=1e-6)
        early_stopping = EarlyStopping(patience=10, min_delta=0.001, mode='max', verbose=True)
        
        best_val_acc = 0.0
        fold_history = {
            'train_loss': [], 'train_acc': [], 'train_f1': [],
            'val_loss': [], 'val_acc': [], 'val_f1': []
        }
        
        # Training loop
        for epoch in range(num_epochs):
            print(f"\nFold {fold_idx + 1} - Epoch {epoch+1}/{num_epochs}")
            print("-" * 70)
            
            train_loss, train_acc, train_f1 = train_epoch(model, train_loader, criterion, optimizer, device)
            val_loss, val_acc, val_f1, _, _ = validate(model, val_loader, criterion, device)
            
            scheduler.step()
            
            fold_history['train_loss'].append(train_loss)
            fold_history['train_acc'].append(train_acc)
            fold_history['train_f1'].append(train_f1)
            fold_history['val_loss'].append(val_loss)
            fold_history['val_acc'].append(val_acc)
            fold_history['val_f1'].append(val_f1)
            
            current_lr = optimizer.param_groups[0]['lr']
            print(f"\nResults:")
            print(f"  Train - Loss: {train_loss:.4f}, Acc: {train_acc:.2f}%, F1: {train_f1:.4f}")
            print(f"  Val   - Loss: {val_loss:.4f}, Acc: {val_acc:.2f}%, F1: {val_f1:.4f}")
            print(f"  Learning Rate (group0): {current_lr:.2e}")
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(model.state_dict(), f'fold{fold_idx+1}_best.pth')
                print(f"  üíæ Best model saved! (Val Acc: {best_val_acc:.2f}%)")
            
            if early_stopping(val_acc, epoch):
                print(f"\n‚ö†Ô∏è Early stopping at epoch {epoch+1}")
                break
        
        # Final validation evaluation
        print(f"\nüìä EVALUATING FOLD {fold_idx + 1}")
        print("="*70)
        model.load_state_dict(torch.load(f'fold{fold_idx+1}_best.pth'))
        val_loss, val_acc, val_f1, y_pred, y_true = validate(model, val_loader, criterion, device)
        
        metrics = calculate_metrics(y_true, y_pred)
        
        print(f"\nüìà Fold {fold_idx + 1} Validation Results:")
        print(f"  Accuracy:         {metrics['accuracy']:.2f}%")
        print(f"  Precision:        {metrics['precision']:.4f}")
        print(f"  Recall:           {metrics['recall']:.4f}")
        print(f"  F1 Score:         {metrics['f1']:.4f}")
        print(f"  Sensitivity (Normal):   {metrics['sensitivity_class0']:.4f}")
        print(f"  Sensitivity (Abnormal): {metrics['sensitivity_class1']:.4f}")
        print(f"  Specificity (Normal):   {metrics['specificity_class0']:.4f}")
        print(f"  Specificity (Abnormal): {metrics['specificity_class1']:.4f}")
        print(f"  Avg Sensitivity:  {metrics['avg_sensitivity']:.4f}")
        print(f"  Avg Specificity:  {metrics['avg_specificity']:.4f}")
        
        fold_results.append({
            'fold': fold_idx + 1,
            'val_acc': metrics['accuracy'],
            'val_precision': metrics['precision'],
            'val_recall': metrics['recall'],
            'val_f1': metrics['f1'],
            'val_sensitivity': metrics['avg_sensitivity'],
            'val_specificity': metrics['avg_specificity'],
            'best_val_acc': best_val_acc,
            'epochs_trained': len(fold_history['train_loss']),
            'model_path': f'fold{fold_idx+1}_best.pth',
            'y_true': y_true,
            'y_pred': y_pred,
            'metrics': metrics
        })
        
        all_fold_histories.append(fold_history)
        
        # Save confusion matrix
        cm = metrics['confusion_matrix']
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=CLASSES, yticklabels=CLASSES)
        plt.title(f'Fold {fold_idx + 1} - Confusion Matrix\n(Val Acc: {metrics["accuracy"]:.2f}%)')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.tight_layout()
        plt.savefig(f'fold{fold_idx+1}_confusion_matrix.png', dpi=150)
        plt.close()
        print(f"‚úì Confusion matrix saved")
        
        # Enhanced visualization for FIRST FOLD
        if fold_idx == 0:
            print(f"\nüìä Creating enhanced visualizations for Fold 1...")
            
            # 1. Training/Validation curves
            fig, axes = plt.subplots(1, 3, figsize=(18, 5))
            epochs_range = range(1, len(fold_history['train_loss']) + 1)
            
            axes[0].plot(epochs_range, fold_history['train_loss'], 'b-o', label='Train', linewidth=2, markersize=4)
            axes[0].plot(epochs_range, fold_history['val_loss'], 'r-s', label='Val', linewidth=2, markersize=4)
            axes[0].set_xlabel('Epoch', fontsize=12)
            axes[0].set_ylabel('Loss', fontsize=12)
            axes[0].set_title('Fold 1 - Loss', fontsize=14, fontweight='bold')
            axes[0].legend(fontsize=10)
            axes[0].grid(True, alpha=0.3)
            
            axes[1].plot(epochs_range, fold_history['train_acc'], 'b-o', label='Train', linewidth=2, markersize=4)
            axes[1].plot(epochs_range, fold_history['val_acc'], 'r-s', label='Val', linewidth=2, markersize=4)
            axes[1].set_xlabel('Epoch', fontsize=12)
            axes[1].set_ylabel('Accuracy (%)', fontsize=12)
            axes[1].set_title('Fold 1 - Accuracy', fontsize=14, fontweight='bold')
            axes[1].legend(fontsize=10)
            axes[1].grid(True, alpha=0.3)
            
            axes[2].plot(epochs_range, fold_history['train_f1'], 'b-o', label='Train', linewidth=2, markersize=4)
            axes[2].plot(epochs_range, fold_history['val_f1'], 'r-s', label='Val', linewidth=2, markersize=4)
            axes[2].set_xlabel('Epoch', fontsize=12)
            axes[2].set_ylabel('F1 Score', fontsize=12)
            axes[2].set_title('Fold 1 - F1 Score', fontsize=14, fontweight='bold')
            axes[2].legend(fontsize=10)
            axes[2].grid(True, alpha=0.3)
            
            plt.tight_layout()
            plt.savefig('fold1_training_curves.png', dpi=150)
            plt.close()
            print("  ‚úì Training curves saved")
            
            # 2. Detailed confusion matrix
            fig, ax = plt.subplots(figsize=(10, 8))
            cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
            annotations = np.empty_like(cm).astype(str)
            for i in range(cm.shape[0]):
                for j in range(cm.shape[1]):
                    annotations[i, j] = f'{cm[i, j]}\n({cm_percent[i, j]:.1f}%)'
            sns.heatmap(cm, annot=annotations, fmt='', cmap='Blues', 
                       xticklabels=CLASSES, yticklabels=CLASSES,
                       cbar_kws={'label': 'Count'}, ax=ax)
            ax.set_title(f'Fold 1 - Detailed Confusion Matrix\nVal Acc: {metrics["accuracy"]:.2f}%', 
                        fontsize=14, fontweight='bold')
            ax.set_ylabel('True Label', fontsize=12)
            ax.set_xlabel('Predicted Label', fontsize=12)
            plt.tight_layout()
            plt.savefig('fold1_confusion_detailed.png', dpi=150)
            plt.close()
            print("  ‚úì Detailed confusion matrix saved")
            
            # 3. Metrics summary
            fig, ax = plt.subplots(figsize=(12, 6))
            metric_names = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'Sensitivity', 'Specificity']
            metric_values = [
                metrics['accuracy'],
                metrics['precision']*100,
                metrics['recall']*100,
                metrics['f1']*100,
                metrics['avg_sensitivity']*100,
                metrics['avg_specificity']*100
            ]
            colors = ['#3498db', '#2ecc71', '#e74c3c', '#f39c12', '#9b59b6', '#1abc9c']
            bars = ax.bar(metric_names, metric_values, color=colors, alpha=0.7, edgecolor='black', linewidth=1.5)
            for bar, value in zip(bars, metric_values):
                height = bar.get_height()
                ax.text(bar.get_x() + bar.get_width()/2., height,
                       f'{value:.2f}%', ha='center', va='bottom', fontsize=11, fontweight='bold')
            ax.set_ylabel('Score (%)', fontsize=12)
            ax.set_title('Fold 1 - Validation Metrics Summary', fontsize=14, fontweight='bold')
            ax.set_ylim([0, 105])
            ax.grid(True, alpha=0.3, axis='y')
            plt.xticks(rotation=15)
            plt.tight_layout()
            plt.savefig('fold1_metrics_summary.png', dpi=150)
            plt.close()
            print("  ‚úì Metrics summary saved")
    
    # ============================================================================
    # SUMMARY ACROSS FOLDS
    # ============================================================================
    
    print("\n" + "="*70)
    print("üìä K-FOLD CROSS-VALIDATION SUMMARY")
    print("="*70)
    
    accuracies = [r['val_acc'] for r in fold_results]
    precisions = [r['val_precision'] for r in fold_results]
    recalls = [r['val_recall'] for r in fold_results]
    f1_scores = [r['val_f1'] for r in fold_results]
    sensitivities = [r['val_sensitivity'] for r in fold_results]
    specificities = [r['val_specificity'] for r in fold_results]
    
    print(f"\nüìà Cross-Validation Results ({N_FOLDS} folds):")
    print(f"  Accuracy:     {np.mean(accuracies):.2f}% ¬± {np.std(accuracies):.2f}%")
    print(f"  Precision:    {np.mean(precisions):.4f} ¬± {np.std(precisions):.4f}")
    print(f"  Recall:       {np.mean(recalls):.4f} ¬± {np.std(recalls):.4f}")
    print(f"  F1 Score:     {np.mean(f1_scores):.4f} ¬± {np.std(f1_scores):.4f}")
    print(f"  Sensitivity:  {np.mean(sensitivities):.4f} ¬± {np.std(sensitivities):.4f}")
    print(f"  Specificity:  {np.mean(specificities):.4f} ¬± {np.std(specificities):.4f}")
    
    # Find best fold
    best_fold_idx = np.argmax(accuracies)
    best_fold = fold_results[best_fold_idx]
    print(f"\nüèÜ Best Fold: {best_fold['fold']} (Val Acc: {best_fold['val_acc']:.2f}%)")
    
    # ============================================================================
    # EVALUATE BEST MODEL ON TEST SET
    # ============================================================================
    
    print("\n" + "="*70)
    print("üß™ EVALUATING BEST MODEL ON HOLDOUT TEST SET")
    print("="*70)
    
    print(f"\nüìä Loading best model from Fold {best_fold['fold']}...")
    best_model = ViTQuantumHybrid(model_name=MODEL_NAME, num_classes=len(CLASSES), n_qubits=N_QUBITS).to(device)
    best_model.load_state_dict(torch.load(best_fold['model_path']))
    
    test_dataset = ERBMAHEDataset(test_df, processor=processor, augment=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
    
    test_loss, test_acc, test_f1, test_pred, test_true = validate(best_model, test_loader, criterion, device)
    test_metrics = calculate_metrics(test_true, test_pred)
    
    print(f"\nüìà Test Set Results (Best Model from Fold {best_fold['fold']}):")
    print(f"  Accuracy:         {test_metrics['accuracy']:.2f}%")
    print(f"  Precision:        {test_metrics['precision']:.4f}")
    print(f"  Recall:           {test_metrics['recall']:.4f}")
    print(f"  F1 Score:         {test_metrics['f1']:.4f}")
    print(f"  Sensitivity (Normal):   {test_metrics['sensitivity_class0']:.4f}")
    print(f"  Sensitivity (Abnormal): {test_metrics['sensitivity_class1']:.4f}")
    print(f"  Specificity (Normal):   {test_metrics['specificity_class0']:.4f}")
    print(f"  Specificity (Abnormal): {test_metrics['specificity_class1']:.4f}")
    print(f"  Avg Sensitivity:  {test_metrics['avg_sensitivity']:.4f}")
    print(f"  Avg Specificity:  {test_metrics['avg_specificity']:.4f}")
    
    # Test confusion matrix
    test_cm = test_metrics['confusion_matrix']
    plt.figure(figsize=(10, 8))
    test_cm_percent = test_cm.astype('float') / test_cm.sum(axis=1)[:, np.newaxis] * 100
    test_annotations = np.empty_like(test_cm).astype(str)
    for i in range(test_cm.shape[0]):
        for j in range(test_cm.shape[1]):
            test_annotations[i, j] = f'{test_cm[i, j]}\n({test_cm_percent[i, j]:.1f}%)'
    sns.heatmap(test_cm, annot=test_annotations, fmt='', cmap='Greens', 
               xticklabels=CLASSES, yticklabels=CLASSES,
               cbar_kws={'label': 'Count'})
    plt.title(f'Test Set Confusion Matrix\n(Best Model from Fold {best_fold["fold"]}, Acc: {test_metrics["accuracy"]:.2f}%)', 
             fontsize=14, fontweight='bold')
    plt.ylabel('True Label', fontsize=12)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.tight_layout()
    plt.savefig('test_confusion_matrix.png', dpi=150)
    plt.close()
    print("\n‚úì Test confusion matrix saved")
    
    # Save all results
    results_df = pd.DataFrame(fold_results)
    results_df.to_csv('kfold_results.csv', index=False)
    print("‚úì Results saved to CSV")
    
    print("\n" + "="*70)
    print("‚úÖ TRAINING COMPLETE")
    print("="*70)
    print(f"\nüìä Final Summary:")
    print(f"  Dataset: ERBMAHE (Abnormal vs Normal)")
    print(f"  Strategy: Method 2 K-Fold with 10% Test Set")
    print(f"  K-Fold CV Accuracy: {np.mean(accuracies):.2f}% ¬± {np.std(accuracies):.2f}%")
    print(f"  Test Set Accuracy:  {test_metrics['accuracy']:.2f}%")
    print(f"  Test Set F1 Score:  {test_metrics['f1']:.4f}")
    print("\n" + "="*70)

if __name__ == '__main__':
    main()

‚úì PyTorch & torchvision imported successfully
‚úì Transformers imported successfully
‚úì PennyLane imported successfully
üîß Using device: cuda
üîß PyTorch version: 2.5.1+cu121
ViT + CNN-2D-CBAM + Fusion + Quantum Hybrid Network
ERBMAHE Dataset Classification
Binary Classification: Abnormal vs Normal
METHOD 2: K-FOLD (5 folds) WITH SEPARATE TEST SET (10%)

ü§ñ Base Model: google/vit-base-patch16-224
‚öõÔ∏è  Quantum Qubits: 4
üìä Classes: ['Abnormal', 'Normal']
üîÑ K-Fold: 5 folds on 90% data
üß™ Test Set: 10% holdout

üìÅ Loading dataset...
üìä Total images: 1981

Class distribution:
label
Normal      1270
Abnormal     711
Name: count, dtype: int64

üìä Dataset split:
  K-Fold data: 1782 (90.0%)
  Test data:   199 (10.0%)

‚öñÔ∏è Class weights for Focal Loss:
  Abnormal: 1.3922
  Normal: 0.7802

üîß Loading ViT processor...

‚öôÔ∏è Training configuration:
  Max epochs per fold: 50
  Batch size: 16
  Early stopping patience: 10 epochs
  Learning rate (ViT): 1e-5
  Learning r

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úì Total parameters: 98,885,421
‚úì Trainable parameters: 98,885,421
‚úì Quantum layer parameters: 6,921
‚úì Model size: 377.22 MB

Fold 1 - Epoch 1/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:10<00:00,  4.12s/it, loss=0.0947, acc=84.56%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:41<00:00,  1.82s/it, loss=0.0537, acc=94.40%]



Results:
  Train - Loss: 0.0947, Acc: 84.56%, F1: 0.8465
  Val   - Loss: 0.0537, Acc: 94.40%, F1: 0.9439
  Learning Rate (group0): 9.99e-06
  üíæ Best model saved! (Val Acc: 94.40%)
  ‚úì Initial best score: 94.3978

Fold 1 - Epoch 2/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:34<00:00,  3.72s/it, loss=0.0611, acc=90.53%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:39<00:00,  1.72s/it, loss=0.1056, acc=89.08%]



Results:
  Train - Loss: 0.0611, Acc: 90.53%, F1: 0.9060
  Val   - Loss: 0.1056, Acc: 89.08%, F1: 0.8860
  Learning Rate (group0): 9.96e-06
  No improvement. Patience: 1/10

Fold 1 - Epoch 3/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:37<00:00,  3.75s/it, loss=0.0442, acc=93.89%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:37<00:00,  1.62s/it, loss=0.0355, acc=95.24%]



Results:
  Train - Loss: 0.0442, Acc: 93.89%, F1: 0.9392
  Val   - Loss: 0.0355, Acc: 95.24%, F1: 0.9529
  Learning Rate (group0): 9.92e-06
  üíæ Best model saved! (Val Acc: 95.24%)
  ‚úì New best score: 95.2381

Fold 1 - Epoch 4/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:44<00:00,  3.83s/it, loss=0.0335, acc=96.63%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:39<00:00,  1.73s/it, loss=0.0424, acc=95.24%]



Results:
  Train - Loss: 0.0335, Acc: 96.63%, F1: 0.9664
  Val   - Loss: 0.0424, Acc: 95.24%, F1: 0.9523
  Learning Rate (group0): 9.86e-06
  No improvement. Patience: 1/10

Fold 1 - Epoch 5/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:46<00:00,  3.86s/it, loss=0.0235, acc=96.42%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:38<00:00,  1.66s/it, loss=0.0526, acc=91.88%]



Results:
  Train - Loss: 0.0235, Acc: 96.42%, F1: 0.9644
  Val   - Loss: 0.0526, Acc: 91.88%, F1: 0.9202
  Learning Rate (group0): 9.78e-06
  No improvement. Patience: 2/10

Fold 1 - Epoch 6/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:38<00:00,  3.76s/it, loss=0.0204, acc=96.98%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:39<00:00,  1.70s/it, loss=0.0495, acc=90.76%]



Results:
  Train - Loss: 0.0204, Acc: 96.98%, F1: 0.9699
  Val   - Loss: 0.0495, Acc: 90.76%, F1: 0.9092
  Learning Rate (group0): 9.68e-06
  No improvement. Patience: 3/10

Fold 1 - Epoch 7/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:39<00:00,  3.77s/it, loss=0.0229, acc=96.56%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:37<00:00,  1.62s/it, loss=0.0196, acc=97.48%]



Results:
  Train - Loss: 0.0229, Acc: 96.56%, F1: 0.9657
  Val   - Loss: 0.0196, Acc: 97.48%, F1: 0.9749
  Learning Rate (group0): 9.57e-06
  üíæ Best model saved! (Val Acc: 97.48%)
  ‚úì New best score: 97.4790

Fold 1 - Epoch 8/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:38<00:00,  3.76s/it, loss=0.0304, acc=96.49%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.58s/it, loss=0.0260, acc=97.76%]



Results:
  Train - Loss: 0.0304, Acc: 96.49%, F1: 0.9651
  Val   - Loss: 0.0260, Acc: 97.76%, F1: 0.9776
  Learning Rate (group0): 9.44e-06
  üíæ Best model saved! (Val Acc: 97.76%)
  ‚úì New best score: 97.7591

Fold 1 - Epoch 9/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:27<00:00,  3.64s/it, loss=0.0210, acc=96.91%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:37<00:00,  1.63s/it, loss=0.0550, acc=95.52%]



Results:
  Train - Loss: 0.0210, Acc: 96.91%, F1: 0.9693
  Val   - Loss: 0.0550, Acc: 95.52%, F1: 0.9546
  Learning Rate (group0): 9.30e-06
  No improvement. Patience: 1/10

Fold 1 - Epoch 10/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:46<00:00,  3.85s/it, loss=0.0081, acc=99.09%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:37<00:00,  1.62s/it, loss=0.0216, acc=98.04%]



Results:
  Train - Loss: 0.0081, Acc: 99.09%, F1: 0.9909
  Val   - Loss: 0.0216, Acc: 98.04%, F1: 0.9803
  Learning Rate (group0): 9.14e-06
  üíæ Best model saved! (Val Acc: 98.04%)
  ‚úì New best score: 98.0392

Fold 1 - Epoch 11/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:41<00:00,  3.80s/it, loss=0.0169, acc=97.96%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:37<00:00,  1.63s/it, loss=0.0297, acc=96.36%]



Results:
  Train - Loss: 0.0169, Acc: 97.96%, F1: 0.9797
  Val   - Loss: 0.0297, Acc: 96.36%, F1: 0.9639
  Learning Rate (group0): 8.97e-06
  No improvement. Patience: 1/10

Fold 1 - Epoch 12/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:38<00:00,  3.76s/it, loss=0.0105, acc=98.53%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.58s/it, loss=0.0176, acc=98.32%]



Results:
  Train - Loss: 0.0105, Acc: 98.53%, F1: 0.9853
  Val   - Loss: 0.0176, Acc: 98.32%, F1: 0.9832
  Learning Rate (group0): 8.78e-06
  üíæ Best model saved! (Val Acc: 98.32%)
  ‚úì New best score: 98.3193

Fold 1 - Epoch 13/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:33<00:00,  3.70s/it, loss=0.0046, acc=99.37%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:37<00:00,  1.63s/it, loss=0.0194, acc=97.48%]



Results:
  Train - Loss: 0.0046, Acc: 99.37%, F1: 0.9937
  Val   - Loss: 0.0194, Acc: 97.48%, F1: 0.9748
  Learning Rate (group0): 8.58e-06
  No improvement. Patience: 1/10

Fold 1 - Epoch 14/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:27<00:00,  3.64s/it, loss=0.0111, acc=98.81%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.57s/it, loss=0.0152, acc=97.48%]



Results:
  Train - Loss: 0.0111, Acc: 98.81%, F1: 0.9881
  Val   - Loss: 0.0152, Acc: 97.48%, F1: 0.9749
  Learning Rate (group0): 8.37e-06
  No improvement. Patience: 2/10

Fold 1 - Epoch 15/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:03<00:00,  3.37s/it, loss=0.0095, acc=98.74%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.60s/it, loss=0.0159, acc=97.76%]



Results:
  Train - Loss: 0.0095, Acc: 98.74%, F1: 0.9874
  Val   - Loss: 0.0159, Acc: 97.76%, F1: 0.9776
  Learning Rate (group0): 8.15e-06
  No improvement. Patience: 3/10

Fold 1 - Epoch 16/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:50<00:00,  3.90s/it, loss=0.0037, acc=99.23%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:37<00:00,  1.62s/it, loss=0.0092, acc=98.60%]



Results:
  Train - Loss: 0.0037, Acc: 99.23%, F1: 0.9923
  Val   - Loss: 0.0092, Acc: 98.60%, F1: 0.9860
  Learning Rate (group0): 7.91e-06
  üíæ Best model saved! (Val Acc: 98.60%)
  ‚úì New best score: 98.5994

Fold 1 - Epoch 17/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:03<00:00,  4.04s/it, loss=0.0015, acc=99.72%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.61s/it, loss=0.0130, acc=98.04%]



Results:
  Train - Loss: 0.0015, Acc: 99.72%, F1: 0.9972
  Val   - Loss: 0.0130, Acc: 98.04%, F1: 0.9805
  Learning Rate (group0): 7.67e-06
  No improvement. Patience: 1/10

Fold 1 - Epoch 18/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:20<00:00,  4.23s/it, loss=0.0142, acc=98.46%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:42<00:00,  1.84s/it, loss=0.0184, acc=95.80%]



Results:
  Train - Loss: 0.0142, Acc: 98.46%, F1: 0.9846
  Val   - Loss: 0.0184, Acc: 95.80%, F1: 0.9583
  Learning Rate (group0): 7.42e-06
  No improvement. Patience: 2/10

Fold 1 - Epoch 19/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:48<00:00,  4.54s/it, loss=0.0046, acc=99.16%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:41<00:00,  1.78s/it, loss=0.0246, acc=97.48%]



Results:
  Train - Loss: 0.0046, Acc: 99.16%, F1: 0.9916
  Val   - Loss: 0.0246, Acc: 97.48%, F1: 0.9748
  Learning Rate (group0): 7.16e-06
  No improvement. Patience: 3/10

Fold 1 - Epoch 20/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:03<00:00,  4.04s/it, loss=0.0111, acc=98.67%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.56s/it, loss=0.0163, acc=96.64%]



Results:
  Train - Loss: 0.0111, Acc: 98.67%, F1: 0.9867
  Val   - Loss: 0.0163, Acc: 96.64%, F1: 0.9666
  Learning Rate (group0): 6.89e-06
  No improvement. Patience: 4/10

Fold 1 - Epoch 21/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:42<00:00,  3.81s/it, loss=0.0026, acc=99.51%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.57s/it, loss=0.0131, acc=98.04%]



Results:
  Train - Loss: 0.0026, Acc: 99.51%, F1: 0.9951
  Val   - Loss: 0.0131, Acc: 98.04%, F1: 0.9804
  Learning Rate (group0): 6.62e-06
  No improvement. Patience: 5/10

Fold 1 - Epoch 22/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:40<00:00,  3.78s/it, loss=0.0080, acc=99.72%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.57s/it, loss=0.0485, acc=97.20%]



Results:
  Train - Loss: 0.0080, Acc: 99.72%, F1: 0.9972
  Val   - Loss: 0.0485, Acc: 97.20%, F1: 0.9718
  Learning Rate (group0): 6.34e-06
  No improvement. Patience: 6/10

Fold 1 - Epoch 23/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:01<00:00,  4.01s/it, loss=0.0399, acc=96.28%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.60s/it, loss=0.0200, acc=96.92%]



Results:
  Train - Loss: 0.0399, Acc: 96.28%, F1: 0.9628
  Val   - Loss: 0.0200, Acc: 96.92%, F1: 0.9694
  Learning Rate (group0): 6.06e-06
  No improvement. Patience: 7/10

Fold 1 - Epoch 24/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:49<00:00,  3.88s/it, loss=0.0064, acc=99.37%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.53s/it, loss=0.0221, acc=97.48%]



Results:
  Train - Loss: 0.0064, Acc: 99.37%, F1: 0.9937
  Val   - Loss: 0.0221, Acc: 97.48%, F1: 0.9749
  Learning Rate (group0): 5.78e-06
  No improvement. Patience: 8/10

Fold 1 - Epoch 25/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:59<00:00,  3.99s/it, loss=0.0026, acc=99.58%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:43<00:00,  1.88s/it, loss=0.0187, acc=97.76%]



Results:
  Train - Loss: 0.0026, Acc: 99.58%, F1: 0.9958
  Val   - Loss: 0.0187, Acc: 97.76%, F1: 0.9777
  Learning Rate (group0): 5.50e-06
  No improvement. Patience: 9/10

Fold 1 - Epoch 26/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:54<00:00,  3.94s/it, loss=0.0027, acc=99.58%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.54s/it, loss=0.0145, acc=98.60%]



Results:
  Train - Loss: 0.0027, Acc: 99.58%, F1: 0.9958
  Val   - Loss: 0.0145, Acc: 98.60%, F1: 0.9860
  Learning Rate (group0): 5.22e-06
  No improvement. Patience: 10/10

‚ö†Ô∏è Early stopping triggered!
   Best score: 98.5994 at epoch 15

‚ö†Ô∏è Early stopping at epoch 26

üìä EVALUATING FOLD 1


Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.56s/it, loss=0.0092, acc=98.60%]



üìà Fold 1 Validation Results:
  Accuracy:         98.60%
  Precision:        0.9862
  Recall:           0.9860
  F1 Score:         0.9860
  Sensitivity (Normal):   0.9695
  Sensitivity (Abnormal): 0.9956
  Specificity (Normal):   0.9922
  Specificity (Abnormal): 0.9825
  Avg Sensitivity:  0.9825
  Avg Specificity:  0.9874
‚úì Confusion matrix saved

üìä Creating enhanced visualizations for Fold 1...
  ‚úì Training curves saved
  ‚úì Detailed confusion matrix saved
  ‚úì Metrics summary saved

FOLD 2/5

üìä Fold 2 split:
  Train:      1425 (80.0% of k-fold data)
  Validation: 357 (20.0% of k-fold data)

ü§ñ Creating model for Fold 2...


Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Fold 2 - Epoch 1/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:40<00:00,  3.78s/it, loss=0.0975, acc=84.49%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:34<00:00,  1.52s/it, loss=0.0555, acc=94.12%]



Results:
  Train - Loss: 0.0975, Acc: 84.49%, F1: 0.8464
  Val   - Loss: 0.0555, Acc: 94.12%, F1: 0.9405
  Learning Rate (group0): 9.99e-06
  üíæ Best model saved! (Val Acc: 94.12%)
  ‚úì Initial best score: 94.1176

Fold 2 - Epoch 2/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:21<00:00,  4.24s/it, loss=0.0583, acc=91.16%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:41<00:00,  1.80s/it, loss=0.1422, acc=87.96%]



Results:
  Train - Loss: 0.0583, Acc: 91.16%, F1: 0.9122
  Val   - Loss: 0.1422, Acc: 87.96%, F1: 0.8730
  Learning Rate (group0): 9.96e-06
  No improvement. Patience: 1/10

Fold 2 - Epoch 3/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:26<00:00,  4.29s/it, loss=0.0434, acc=94.04%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:40<00:00,  1.74s/it, loss=0.0467, acc=95.24%]



Results:
  Train - Loss: 0.0434, Acc: 94.04%, F1: 0.9407
  Val   - Loss: 0.0467, Acc: 95.24%, F1: 0.9515
  Learning Rate (group0): 9.92e-06
  üíæ Best model saved! (Val Acc: 95.24%)
  ‚úì New best score: 95.2381

Fold 2 - Epoch 4/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:21<00:00,  4.24s/it, loss=0.0224, acc=97.12%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:40<00:00,  1.75s/it, loss=0.0288, acc=95.52%]



Results:
  Train - Loss: 0.0224, Acc: 97.12%, F1: 0.9713
  Val   - Loss: 0.0288, Acc: 95.52%, F1: 0.9556
  Learning Rate (group0): 9.86e-06
  üíæ Best model saved! (Val Acc: 95.52%)
  ‚úì New best score: 95.5182

Fold 2 - Epoch 5/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:26<00:00,  4.29s/it, loss=0.0397, acc=94.60%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:41<00:00,  1.82s/it, loss=0.0405, acc=92.44%]



Results:
  Train - Loss: 0.0397, Acc: 94.60%, F1: 0.9461
  Val   - Loss: 0.0405, Acc: 92.44%, F1: 0.9256
  Learning Rate (group0): 9.78e-06
  No improvement. Patience: 1/10

Fold 2 - Epoch 6/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:13<00:00,  4.15s/it, loss=0.0208, acc=97.05%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:39<00:00,  1.74s/it, loss=0.0503, acc=90.76%]



Results:
  Train - Loss: 0.0208, Acc: 97.05%, F1: 0.9706
  Val   - Loss: 0.0503, Acc: 90.76%, F1: 0.9093
  Learning Rate (group0): 9.68e-06
  No improvement. Patience: 2/10

Fold 2 - Epoch 7/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:44<00:00,  3.83s/it, loss=0.0210, acc=97.40%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:27<00:00,  1.19s/it, loss=0.0121, acc=98.04%]



Results:
  Train - Loss: 0.0210, Acc: 97.40%, F1: 0.9741
  Val   - Loss: 0.0121, Acc: 98.04%, F1: 0.9804
  Learning Rate (group0): 9.57e-06
  üíæ Best model saved! (Val Acc: 98.04%)
  ‚úì New best score: 98.0392

Fold 2 - Epoch 8/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:20<00:00,  3.56s/it, loss=0.0086, acc=98.53%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:25<00:00,  1.12s/it, loss=0.0194, acc=98.32%]



Results:
  Train - Loss: 0.0086, Acc: 98.53%, F1: 0.9853
  Val   - Loss: 0.0194, Acc: 98.32%, F1: 0.9832
  Learning Rate (group0): 9.44e-06
  üíæ Best model saved! (Val Acc: 98.32%)
  ‚úì New best score: 98.3193

Fold 2 - Epoch 9/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:40<00:00,  3.78s/it, loss=0.0208, acc=97.82%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:25<00:00,  1.09s/it, loss=0.0182, acc=98.32%]



Results:
  Train - Loss: 0.0208, Acc: 97.82%, F1: 0.9782
  Val   - Loss: 0.0182, Acc: 98.32%, F1: 0.9832
  Learning Rate (group0): 9.30e-06
  No improvement. Patience: 1/10

Fold 2 - Epoch 10/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:49<00:00,  3.88s/it, loss=0.0172, acc=97.47%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:25<00:00,  1.12s/it, loss=0.0159, acc=98.32%]



Results:
  Train - Loss: 0.0172, Acc: 97.47%, F1: 0.9748
  Val   - Loss: 0.0159, Acc: 98.32%, F1: 0.9831
  Learning Rate (group0): 9.14e-06
  No improvement. Patience: 2/10

Fold 2 - Epoch 11/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:49<00:00,  3.88s/it, loss=0.0125, acc=98.39%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.55s/it, loss=0.0139, acc=97.48%]



Results:
  Train - Loss: 0.0125, Acc: 98.39%, F1: 0.9839
  Val   - Loss: 0.0139, Acc: 97.48%, F1: 0.9747
  Learning Rate (group0): 8.97e-06
  No improvement. Patience: 3/10

Fold 2 - Epoch 12/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:55<00:00,  3.95s/it, loss=0.0081, acc=98.95%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.58s/it, loss=0.0112, acc=98.32%]



Results:
  Train - Loss: 0.0081, Acc: 98.95%, F1: 0.9895
  Val   - Loss: 0.0112, Acc: 98.32%, F1: 0.9832
  Learning Rate (group0): 8.78e-06
  No improvement. Patience: 4/10

Fold 2 - Epoch 13/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:42<00:00,  3.81s/it, loss=0.0116, acc=98.60%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.58s/it, loss=0.2706, acc=85.71%]



Results:
  Train - Loss: 0.0116, Acc: 98.60%, F1: 0.9860
  Val   - Loss: 0.2706, Acc: 85.71%, F1: 0.8477
  Learning Rate (group0): 8.58e-06
  No improvement. Patience: 5/10

Fold 2 - Epoch 14/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:44<00:00,  3.83s/it, loss=0.0133, acc=98.53%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:34<00:00,  1.51s/it, loss=0.0077, acc=99.44%]



Results:
  Train - Loss: 0.0133, Acc: 98.53%, F1: 0.9853
  Val   - Loss: 0.0077, Acc: 99.44%, F1: 0.9944
  Learning Rate (group0): 8.37e-06
  üíæ Best model saved! (Val Acc: 99.44%)
  ‚úì New best score: 99.4398

Fold 2 - Epoch 15/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:43<00:00,  3.81s/it, loss=0.0044, acc=99.37%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.56s/it, loss=0.0044, acc=99.72%]



Results:
  Train - Loss: 0.0044, Acc: 99.37%, F1: 0.9937
  Val   - Loss: 0.0044, Acc: 99.72%, F1: 0.9972
  Learning Rate (group0): 8.15e-06
  üíæ Best model saved! (Val Acc: 99.72%)
  ‚úì New best score: 99.7199

Fold 2 - Epoch 16/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:57<00:00,  3.97s/it, loss=0.0081, acc=98.74%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:39<00:00,  1.71s/it, loss=0.0149, acc=97.48%]



Results:
  Train - Loss: 0.0081, Acc: 98.74%, F1: 0.9874
  Val   - Loss: 0.0149, Acc: 97.48%, F1: 0.9750
  Learning Rate (group0): 7.91e-06
  No improvement. Patience: 1/10

Fold 2 - Epoch 17/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:52<00:00,  3.91s/it, loss=0.0094, acc=98.88%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.54s/it, loss=0.0078, acc=98.32%]



Results:
  Train - Loss: 0.0094, Acc: 98.88%, F1: 0.9888
  Val   - Loss: 0.0078, Acc: 98.32%, F1: 0.9832
  Learning Rate (group0): 7.67e-06
  No improvement. Patience: 2/10

Fold 2 - Epoch 18/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:44<00:00,  3.82s/it, loss=0.0021, acc=99.79%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.53s/it, loss=0.0157, acc=97.76%]



Results:
  Train - Loss: 0.0021, Acc: 99.79%, F1: 0.9979
  Val   - Loss: 0.0157, Acc: 97.76%, F1: 0.9776
  Learning Rate (group0): 7.42e-06
  No improvement. Patience: 3/10

Fold 2 - Epoch 19/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:44<00:00,  3.83s/it, loss=0.0047, acc=99.37%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.54s/it, loss=0.0090, acc=99.44%]



Results:
  Train - Loss: 0.0047, Acc: 99.37%, F1: 0.9937
  Val   - Loss: 0.0090, Acc: 99.44%, F1: 0.9944
  Learning Rate (group0): 7.16e-06
  No improvement. Patience: 4/10

Fold 2 - Epoch 20/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:44<00:00,  3.82s/it, loss=0.0036, acc=99.58%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.55s/it, loss=0.0044, acc=99.72%]



Results:
  Train - Loss: 0.0036, Acc: 99.58%, F1: 0.9958
  Val   - Loss: 0.0044, Acc: 99.72%, F1: 0.9972
  Learning Rate (group0): 6.89e-06
  No improvement. Patience: 5/10

Fold 2 - Epoch 21/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:42<00:00,  3.81s/it, loss=0.0061, acc=99.02%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:40<00:00,  1.78s/it, loss=0.2773, acc=77.31%]



Results:
  Train - Loss: 0.0061, Acc: 99.02%, F1: 0.9902
  Val   - Loss: 0.2773, Acc: 77.31%, F1: 0.7762
  Learning Rate (group0): 6.62e-06
  No improvement. Patience: 6/10

Fold 2 - Epoch 22/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:59<00:00,  3.99s/it, loss=0.0123, acc=98.18%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.53s/it, loss=0.0067, acc=98.88%]



Results:
  Train - Loss: 0.0123, Acc: 98.18%, F1: 0.9818
  Val   - Loss: 0.0067, Acc: 98.88%, F1: 0.9888
  Learning Rate (group0): 6.34e-06
  No improvement. Patience: 7/10

Fold 2 - Epoch 23/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:43<00:00,  3.82s/it, loss=0.0019, acc=99.72%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:34<00:00,  1.52s/it, loss=0.0038, acc=99.72%]



Results:
  Train - Loss: 0.0019, Acc: 99.72%, F1: 0.9972
  Val   - Loss: 0.0038, Acc: 99.72%, F1: 0.9972
  Learning Rate (group0): 6.06e-06
  No improvement. Patience: 8/10

Fold 2 - Epoch 24/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:42<00:00,  3.81s/it, loss=0.0021, acc=99.58%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.59s/it, loss=0.0086, acc=97.76%]



Results:
  Train - Loss: 0.0021, Acc: 99.58%, F1: 0.9958
  Val   - Loss: 0.0086, Acc: 97.76%, F1: 0.9777
  Learning Rate (group0): 5.78e-06
  No improvement. Patience: 9/10

Fold 2 - Epoch 25/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:00<00:00,  4.01s/it, loss=0.0064, acc=99.23%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:46<00:00,  2.03s/it, loss=0.0183, acc=98.32%]



Results:
  Train - Loss: 0.0064, Acc: 99.23%, F1: 0.9923
  Val   - Loss: 0.0183, Acc: 98.32%, F1: 0.9832
  Learning Rate (group0): 5.50e-06
  No improvement. Patience: 10/10

‚ö†Ô∏è Early stopping triggered!
   Best score: 99.7199 at epoch 14

‚ö†Ô∏è Early stopping at epoch 25

üìä EVALUATING FOLD 2


Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:44<00:00,  1.93s/it, loss=0.0044, acc=99.72%]



üìà Fold 2 Validation Results:
  Accuracy:         99.72%
  Precision:        0.9972
  Recall:           0.9972
  F1 Score:         0.9972
  Sensitivity (Normal):   1.0000
  Sensitivity (Abnormal): 0.9957
  Specificity (Normal):   0.9922
  Specificity (Abnormal): 1.0000
  Avg Sensitivity:  0.9978
  Avg Specificity:  0.9961
‚úì Confusion matrix saved

FOLD 3/5

üìä Fold 3 split:
  Train:      1426 (80.0% of k-fold data)
  Validation: 356 (20.0% of k-fold data)

ü§ñ Creating model for Fold 3...


Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Fold 3 - Epoch 1/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:24<00:00,  4.27s/it, loss=0.1094, acc=80.43%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.53s/it, loss=0.0661, acc=91.85%]



Results:
  Train - Loss: 0.1094, Acc: 80.43%, F1: 0.8072
  Val   - Loss: 0.0661, Acc: 91.85%, F1: 0.9171
  Learning Rate (group0): 9.99e-06
  üíæ Best model saved! (Val Acc: 91.85%)
  ‚úì Initial best score: 91.8539

Fold 3 - Epoch 2/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:29<00:00,  4.33s/it, loss=0.0538, acc=92.50%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:37<00:00,  1.64s/it, loss=0.0729, acc=91.29%]



Results:
  Train - Loss: 0.0538, Acc: 92.50%, F1: 0.9253
  Val   - Loss: 0.0729, Acc: 91.29%, F1: 0.9133
  Learning Rate (group0): 9.96e-06
  No improvement. Patience: 1/10

Fold 3 - Epoch 3/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:25<00:00,  4.28s/it, loss=0.0517, acc=93.41%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:41<00:00,  1.83s/it, loss=0.0334, acc=95.22%]



Results:
  Train - Loss: 0.0517, Acc: 93.41%, F1: 0.9343
  Val   - Loss: 0.0334, Acc: 95.22%, F1: 0.9525
  Learning Rate (group0): 9.92e-06
  üíæ Best model saved! (Val Acc: 95.22%)
  ‚úì New best score: 95.2247

Fold 3 - Epoch 4/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:23<00:00,  4.26s/it, loss=0.0328, acc=96.35%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.57s/it, loss=0.0409, acc=93.54%]



Results:
  Train - Loss: 0.0328, Acc: 96.35%, F1: 0.9637
  Val   - Loss: 0.0409, Acc: 93.54%, F1: 0.9361
  Learning Rate (group0): 9.86e-06
  No improvement. Patience: 1/10

Fold 3 - Epoch 5/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:49<00:00,  3.88s/it, loss=0.0270, acc=96.42%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:50<00:00,  2.18s/it, loss=0.0316, acc=94.94%]



Results:
  Train - Loss: 0.0270, Acc: 96.42%, F1: 0.9643
  Val   - Loss: 0.0316, Acc: 94.94%, F1: 0.9495
  Learning Rate (group0): 9.78e-06
  No improvement. Patience: 2/10

Fold 3 - Epoch 6/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:57<00:00,  4.64s/it, loss=0.0218, acc=97.34%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:43<00:00,  1.89s/it, loss=0.1592, acc=88.48%]



Results:
  Train - Loss: 0.0218, Acc: 97.34%, F1: 0.9734
  Val   - Loss: 0.1592, Acc: 88.48%, F1: 0.8790
  Learning Rate (group0): 9.68e-06
  No improvement. Patience: 3/10

Fold 3 - Epoch 7/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:46<00:00,  4.52s/it, loss=0.0211, acc=96.70%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:52<00:00,  2.30s/it, loss=0.0321, acc=95.79%]



Results:
  Train - Loss: 0.0211, Acc: 96.70%, F1: 0.9671
  Val   - Loss: 0.0321, Acc: 95.79%, F1: 0.9580
  Learning Rate (group0): 9.57e-06
  üíæ Best model saved! (Val Acc: 95.79%)
  ‚úì New best score: 95.7865

Fold 3 - Epoch 8/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:57<00:00,  4.64s/it, loss=0.0106, acc=98.74%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:42<00:00,  1.85s/it, loss=0.0134, acc=98.60%]



Results:
  Train - Loss: 0.0106, Acc: 98.74%, F1: 0.9874
  Val   - Loss: 0.0134, Acc: 98.60%, F1: 0.9859
  Learning Rate (group0): 9.44e-06
  üíæ Best model saved! (Val Acc: 98.60%)
  ‚úì New best score: 98.5955

Fold 3 - Epoch 9/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:33<00:00,  3.71s/it, loss=0.0145, acc=98.11%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:39<00:00,  1.74s/it, loss=0.0205, acc=96.63%]



Results:
  Train - Loss: 0.0145, Acc: 98.11%, F1: 0.9811
  Val   - Loss: 0.0205, Acc: 96.63%, F1: 0.9662
  Learning Rate (group0): 9.30e-06
  No improvement. Patience: 1/10

Fold 3 - Epoch 10/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:08<00:00,  4.10s/it, loss=0.0115, acc=98.32%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:40<00:00,  1.76s/it, loss=0.0228, acc=97.75%]



Results:
  Train - Loss: 0.0115, Acc: 98.32%, F1: 0.9832
  Val   - Loss: 0.0228, Acc: 97.75%, F1: 0.9776
  Learning Rate (group0): 9.14e-06
  No improvement. Patience: 2/10

Fold 3 - Epoch 11/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:57<00:00,  3.97s/it, loss=0.0191, acc=97.27%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:41<00:00,  1.78s/it, loss=0.0311, acc=96.35%]



Results:
  Train - Loss: 0.0191, Acc: 97.27%, F1: 0.9727
  Val   - Loss: 0.0311, Acc: 96.35%, F1: 0.9632
  Learning Rate (group0): 8.97e-06
  No improvement. Patience: 3/10

Fold 3 - Epoch 12/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:12<00:00,  4.14s/it, loss=0.0049, acc=99.37%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:45<00:00,  1.98s/it, loss=0.1790, acc=90.17%]



Results:
  Train - Loss: 0.0049, Acc: 99.37%, F1: 0.9937
  Val   - Loss: 0.1790, Acc: 90.17%, F1: 0.8974
  Learning Rate (group0): 8.78e-06
  No improvement. Patience: 4/10

Fold 3 - Epoch 13/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:16<00:00,  4.18s/it, loss=0.0191, acc=97.27%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:32<00:00,  1.41s/it, loss=0.0171, acc=97.75%]



Results:
  Train - Loss: 0.0191, Acc: 97.27%, F1: 0.9727
  Val   - Loss: 0.0171, Acc: 97.75%, F1: 0.9777
  Learning Rate (group0): 8.58e-06
  No improvement. Patience: 5/10

Fold 3 - Epoch 14/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:17<00:00,  3.53s/it, loss=0.0069, acc=98.53%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:40<00:00,  1.77s/it, loss=0.0160, acc=98.31%]



Results:
  Train - Loss: 0.0069, Acc: 98.53%, F1: 0.9853
  Val   - Loss: 0.0160, Acc: 98.31%, F1: 0.9832
  Learning Rate (group0): 8.37e-06
  No improvement. Patience: 6/10

Fold 3 - Epoch 15/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:45<00:00,  3.84s/it, loss=0.0178, acc=98.46%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.61s/it, loss=0.0199, acc=96.35%]



Results:
  Train - Loss: 0.0178, Acc: 98.46%, F1: 0.9846
  Val   - Loss: 0.0199, Acc: 96.35%, F1: 0.9634
  Learning Rate (group0): 8.15e-06
  No improvement. Patience: 7/10

Fold 3 - Epoch 16/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:38<00:00,  3.76s/it, loss=0.0236, acc=96.84%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.59s/it, loss=0.0303, acc=96.63%]



Results:
  Train - Loss: 0.0236, Acc: 96.84%, F1: 0.9686
  Val   - Loss: 0.0303, Acc: 96.63%, F1: 0.9665
  Learning Rate (group0): 7.91e-06
  No improvement. Patience: 8/10

Fold 3 - Epoch 17/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:03<00:00,  4.04s/it, loss=0.0056, acc=99.30%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.58s/it, loss=0.0179, acc=98.31%]



Results:
  Train - Loss: 0.0056, Acc: 99.30%, F1: 0.9930
  Val   - Loss: 0.0179, Acc: 98.31%, F1: 0.9831
  Learning Rate (group0): 7.67e-06
  No improvement. Patience: 9/10

Fold 3 - Epoch 18/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:15<00:00,  4.18s/it, loss=0.0034, acc=99.44%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:44<00:00,  1.92s/it, loss=0.0100, acc=98.88%] 



Results:
  Train - Loss: 0.0034, Acc: 99.44%, F1: 0.9944
  Val   - Loss: 0.0100, Acc: 98.88%, F1: 0.9888
  Learning Rate (group0): 7.42e-06
  üíæ Best model saved! (Val Acc: 98.88%)
  ‚úì New best score: 98.8764

Fold 3 - Epoch 19/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:14<00:00,  4.16s/it, loss=0.0054, acc=99.51%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.58s/it, loss=0.0137, acc=98.60%]



Results:
  Train - Loss: 0.0054, Acc: 99.51%, F1: 0.9951
  Val   - Loss: 0.0137, Acc: 98.60%, F1: 0.9860
  Learning Rate (group0): 7.16e-06
  No improvement. Patience: 1/10

Fold 3 - Epoch 20/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:55<00:00,  3.94s/it, loss=0.0040, acc=99.58%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.60s/it, loss=0.1078, acc=95.79%]



Results:
  Train - Loss: 0.0040, Acc: 99.58%, F1: 0.9958
  Val   - Loss: 0.1078, Acc: 95.79%, F1: 0.9573
  Learning Rate (group0): 6.89e-06
  No improvement. Patience: 2/10

Fold 3 - Epoch 21/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:33<00:00,  4.38s/it, loss=0.0056, acc=99.16%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:37<00:00,  1.62s/it, loss=0.0119, acc=98.31%]



Results:
  Train - Loss: 0.0056, Acc: 99.16%, F1: 0.9916
  Val   - Loss: 0.0119, Acc: 98.31%, F1: 0.9831
  Learning Rate (group0): 6.62e-06
  No improvement. Patience: 3/10

Fold 3 - Epoch 22/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:47<00:00,  3.87s/it, loss=0.0011, acc=99.86%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:28<00:00,  1.23s/it, loss=0.0103, acc=98.88%]



Results:
  Train - Loss: 0.0011, Acc: 99.86%, F1: 0.9986
  Val   - Loss: 0.0103, Acc: 98.88%, F1: 0.9888
  Learning Rate (group0): 6.34e-06
  No improvement. Patience: 4/10

Fold 3 - Epoch 23/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:46<00:00,  3.85s/it, loss=0.0060, acc=98.95%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.60s/it, loss=0.0139, acc=98.31%]



Results:
  Train - Loss: 0.0060, Acc: 98.95%, F1: 0.9895
  Val   - Loss: 0.0139, Acc: 98.31%, F1: 0.9832
  Learning Rate (group0): 6.06e-06
  No improvement. Patience: 5/10

Fold 3 - Epoch 24/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:47<00:00,  3.86s/it, loss=0.0026, acc=99.58%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:34<00:00,  1.50s/it, loss=0.0093, acc=98.60%]



Results:
  Train - Loss: 0.0026, Acc: 99.58%, F1: 0.9958
  Val   - Loss: 0.0093, Acc: 98.60%, F1: 0.9859
  Learning Rate (group0): 5.78e-06
  No improvement. Patience: 6/10

Fold 3 - Epoch 25/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:36<00:00,  3.74s/it, loss=0.0011, acc=99.79%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:50<00:00,  2.19s/it, loss=0.0188, acc=98.31%]



Results:
  Train - Loss: 0.0011, Acc: 99.79%, F1: 0.9979
  Val   - Loss: 0.0188, Acc: 98.31%, F1: 0.9832
  Learning Rate (group0): 5.50e-06
  No improvement. Patience: 7/10

Fold 3 - Epoch 26/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:15<00:00,  4.17s/it, loss=0.0033, acc=99.65%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:39<00:00,  1.70s/it, loss=0.0127, acc=98.31%]



Results:
  Train - Loss: 0.0033, Acc: 99.65%, F1: 0.9965
  Val   - Loss: 0.0127, Acc: 98.31%, F1: 0.9831
  Learning Rate (group0): 5.22e-06
  No improvement. Patience: 8/10

Fold 3 - Epoch 27/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:51<00:00,  4.57s/it, loss=0.0025, acc=99.51%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:39<00:00,  1.71s/it, loss=0.0153, acc=98.88%]



Results:
  Train - Loss: 0.0025, Acc: 99.51%, F1: 0.9951
  Val   - Loss: 0.0153, Acc: 98.88%, F1: 0.9887
  Learning Rate (group0): 4.94e-06
  No improvement. Patience: 9/10

Fold 3 - Epoch 28/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [07:13<00:00,  4.81s/it, loss=0.0016, acc=99.65%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:46<00:00,  2.01s/it, loss=0.0114, acc=98.31%]



Results:
  Train - Loss: 0.0016, Acc: 99.65%, F1: 0.9965
  Val   - Loss: 0.0114, Acc: 98.31%, F1: 0.9831
  Learning Rate (group0): 4.66e-06
  No improvement. Patience: 10/10

‚ö†Ô∏è Early stopping triggered!
   Best score: 98.8764 at epoch 17

‚ö†Ô∏è Early stopping at epoch 28

üìä EVALUATING FOLD 3


Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.60s/it, loss=0.0100, acc=98.88%] 



üìà Fold 3 Validation Results:
  Accuracy:         98.88%
  Precision:        0.9889
  Recall:           0.9888
  F1 Score:         0.9888
  Sensitivity (Normal):   0.9769
  Sensitivity (Abnormal): 0.9956
  Specificity (Normal):   0.9922
  Specificity (Abnormal): 0.9868
  Avg Sensitivity:  0.9862
  Avg Specificity:  0.9895
‚úì Confusion matrix saved

FOLD 4/5

üìä Fold 4 split:
  Train:      1426 (80.0% of k-fold data)
  Validation: 356 (20.0% of k-fold data)

ü§ñ Creating model for Fold 4...


Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Fold 4 - Epoch 1/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:01<00:00,  4.02s/it, loss=0.1034, acc=82.82%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:34<00:00,  1.49s/it, loss=0.0595, acc=91.85%]



Results:
  Train - Loss: 0.1034, Acc: 82.82%, F1: 0.8301
  Val   - Loss: 0.0595, Acc: 91.85%, F1: 0.9177
  Learning Rate (group0): 9.99e-06
  üíæ Best model saved! (Val Acc: 91.85%)
  ‚úì Initial best score: 91.8539

Fold 4 - Epoch 2/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:15<00:00,  4.18s/it, loss=0.0546, acc=92.15%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:44<00:00,  1.95s/it, loss=0.0412, acc=92.42%]



Results:
  Train - Loss: 0.0546, Acc: 92.15%, F1: 0.9218
  Val   - Loss: 0.0412, Acc: 92.42%, F1: 0.9252
  Learning Rate (group0): 9.96e-06
  üíæ Best model saved! (Val Acc: 92.42%)
  ‚úì New best score: 92.4157

Fold 4 - Epoch 3/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:26<00:00,  4.30s/it, loss=0.0382, acc=94.95%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:39<00:00,  1.73s/it, loss=0.0311, acc=94.94%]



Results:
  Train - Loss: 0.0382, Acc: 94.95%, F1: 0.9496
  Val   - Loss: 0.0311, Acc: 94.94%, F1: 0.9499
  Learning Rate (group0): 9.92e-06
  üíæ Best model saved! (Val Acc: 94.94%)
  ‚úì New best score: 94.9438

Fold 4 - Epoch 4/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:27<00:00,  4.31s/it, loss=0.0368, acc=94.67%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.60s/it, loss=0.0327, acc=94.66%]



Results:
  Train - Loss: 0.0368, Acc: 94.67%, F1: 0.9470
  Val   - Loss: 0.0327, Acc: 94.66%, F1: 0.9473
  Learning Rate (group0): 9.86e-06
  No improvement. Patience: 1/10

Fold 4 - Epoch 5/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:43<00:00,  3.82s/it, loss=0.0312, acc=96.14%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:33<00:00,  1.47s/it, loss=0.0264, acc=96.63%]



Results:
  Train - Loss: 0.0312, Acc: 96.14%, F1: 0.9615
  Val   - Loss: 0.0264, Acc: 96.63%, F1: 0.9662
  Learning Rate (group0): 9.78e-06
  üíæ Best model saved! (Val Acc: 96.63%)
  ‚úì New best score: 96.6292

Fold 4 - Epoch 6/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:08<00:00,  4.09s/it, loss=0.0197, acc=97.27%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.56s/it, loss=0.0298, acc=94.38%]



Results:
  Train - Loss: 0.0197, Acc: 97.27%, F1: 0.9727
  Val   - Loss: 0.0298, Acc: 94.38%, F1: 0.9446
  Learning Rate (group0): 9.68e-06
  No improvement. Patience: 1/10

Fold 4 - Epoch 7/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:26<00:00,  4.29s/it, loss=0.0168, acc=97.69%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:27<00:00,  1.19s/it, loss=0.0251, acc=95.79%]



Results:
  Train - Loss: 0.0168, Acc: 97.69%, F1: 0.9769
  Val   - Loss: 0.0251, Acc: 95.79%, F1: 0.9582
  Learning Rate (group0): 9.57e-06
  No improvement. Patience: 2/10

Fold 4 - Epoch 8/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:04<00:00,  3.39s/it, loss=0.0151, acc=97.97%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:38<00:00,  1.68s/it, loss=0.0155, acc=96.63%]



Results:
  Train - Loss: 0.0151, Acc: 97.97%, F1: 0.9797
  Val   - Loss: 0.0155, Acc: 96.63%, F1: 0.9665
  Learning Rate (group0): 9.44e-06
  No improvement. Patience: 3/10

Fold 4 - Epoch 9/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:09<00:00,  4.10s/it, loss=0.0078, acc=99.02%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:34<00:00,  1.51s/it, loss=0.0180, acc=97.19%]



Results:
  Train - Loss: 0.0078, Acc: 99.02%, F1: 0.9902
  Val   - Loss: 0.0180, Acc: 97.19%, F1: 0.9721
  Learning Rate (group0): 9.30e-06
  üíæ Best model saved! (Val Acc: 97.19%)
  ‚úì New best score: 97.1910

Fold 4 - Epoch 10/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:23<00:00,  4.26s/it, loss=0.0116, acc=98.25%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:42<00:00,  1.83s/it, loss=0.0203, acc=96.91%]



Results:
  Train - Loss: 0.0116, Acc: 98.25%, F1: 0.9825
  Val   - Loss: 0.0203, Acc: 96.91%, F1: 0.9693
  Learning Rate (group0): 9.14e-06
  No improvement. Patience: 1/10

Fold 4 - Epoch 11/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:25<00:00,  4.29s/it, loss=0.0109, acc=98.32%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:41<00:00,  1.82s/it, loss=0.1993, acc=76.69%]



Results:
  Train - Loss: 0.0109, Acc: 98.32%, F1: 0.9832
  Val   - Loss: 0.1993, Acc: 76.69%, F1: 0.7695
  Learning Rate (group0): 8.97e-06
  No improvement. Patience: 2/10

Fold 4 - Epoch 12/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:16<00:00,  4.19s/it, loss=0.0188, acc=97.34%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:41<00:00,  1.79s/it, loss=0.0305, acc=95.79%]



Results:
  Train - Loss: 0.0188, Acc: 97.34%, F1: 0.9734
  Val   - Loss: 0.0305, Acc: 95.79%, F1: 0.9582
  Learning Rate (group0): 8.78e-06
  No improvement. Patience: 3/10

Fold 4 - Epoch 13/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:17<00:00,  4.19s/it, loss=0.0107, acc=98.60%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:40<00:00,  1.77s/it, loss=0.0409, acc=97.47%]



Results:
  Train - Loss: 0.0107, Acc: 98.60%, F1: 0.9860
  Val   - Loss: 0.0409, Acc: 97.47%, F1: 0.9747
  Learning Rate (group0): 8.58e-06
  üíæ Best model saved! (Val Acc: 97.47%)
  ‚úì New best score: 97.4719

Fold 4 - Epoch 14/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:24<00:00,  4.27s/it, loss=0.0081, acc=98.81%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:42<00:00,  1.84s/it, loss=0.0551, acc=93.54%]



Results:
  Train - Loss: 0.0081, Acc: 98.81%, F1: 0.9881
  Val   - Loss: 0.0551, Acc: 93.54%, F1: 0.9362
  Learning Rate (group0): 8.37e-06
  No improvement. Patience: 1/10

Fold 4 - Epoch 15/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:07<00:00,  4.09s/it, loss=0.0133, acc=97.97%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:41<00:00,  1.82s/it, loss=0.0380, acc=95.79%]



Results:
  Train - Loss: 0.0133, Acc: 97.97%, F1: 0.9797
  Val   - Loss: 0.0380, Acc: 95.79%, F1: 0.9582
  Learning Rate (group0): 8.15e-06
  No improvement. Patience: 2/10

Fold 4 - Epoch 16/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:15<00:00,  4.17s/it, loss=0.0061, acc=99.09%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:40<00:00,  1.77s/it, loss=0.0281, acc=96.35%]



Results:
  Train - Loss: 0.0061, Acc: 99.09%, F1: 0.9909
  Val   - Loss: 0.0281, Acc: 96.35%, F1: 0.9637
  Learning Rate (group0): 7.91e-06
  No improvement. Patience: 3/10

Fold 4 - Epoch 17/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:51<00:00,  3.90s/it, loss=0.0036, acc=99.65%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:37<00:00,  1.61s/it, loss=0.0222, acc=96.63%]



Results:
  Train - Loss: 0.0036, Acc: 99.65%, F1: 0.9965
  Val   - Loss: 0.0222, Acc: 96.63%, F1: 0.9665
  Learning Rate (group0): 7.67e-06
  No improvement. Patience: 4/10

Fold 4 - Epoch 18/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:36<00:00,  3.74s/it, loss=0.0023, acc=99.79%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:37<00:00,  1.62s/it, loss=0.0247, acc=97.19%]



Results:
  Train - Loss: 0.0023, Acc: 99.79%, F1: 0.9979
  Val   - Loss: 0.0247, Acc: 97.19%, F1: 0.9720
  Learning Rate (group0): 7.42e-06
  No improvement. Patience: 5/10

Fold 4 - Epoch 19/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:48<00:00,  3.87s/it, loss=0.0028, acc=99.79%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.57s/it, loss=0.0279, acc=98.03%]



Results:
  Train - Loss: 0.0028, Acc: 99.79%, F1: 0.9979
  Val   - Loss: 0.0279, Acc: 98.03%, F1: 0.9804
  Learning Rate (group0): 7.16e-06
  üíæ Best model saved! (Val Acc: 98.03%)
  ‚úì New best score: 98.0337

Fold 4 - Epoch 20/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:41<00:00,  3.79s/it, loss=0.0138, acc=98.25%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.59s/it, loss=0.0229, acc=95.22%]



Results:
  Train - Loss: 0.0138, Acc: 98.25%, F1: 0.9825
  Val   - Loss: 0.0229, Acc: 95.22%, F1: 0.9528
  Learning Rate (group0): 6.89e-06
  No improvement. Patience: 1/10

Fold 4 - Epoch 21/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:21<00:00,  3.57s/it, loss=0.0069, acc=99.23%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:40<00:00,  1.78s/it, loss=0.0095, acc=98.31%]



Results:
  Train - Loss: 0.0069, Acc: 99.23%, F1: 0.9923
  Val   - Loss: 0.0095, Acc: 98.31%, F1: 0.9832
  Learning Rate (group0): 6.62e-06
  üíæ Best model saved! (Val Acc: 98.31%)
  ‚úì New best score: 98.3146

Fold 4 - Epoch 22/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:01<00:00,  4.01s/it, loss=0.0082, acc=98.81%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:41<00:00,  1.82s/it, loss=0.0115, acc=98.88%]



Results:
  Train - Loss: 0.0082, Acc: 98.81%, F1: 0.9881
  Val   - Loss: 0.0115, Acc: 98.88%, F1: 0.9888
  Learning Rate (group0): 6.34e-06
  üíæ Best model saved! (Val Acc: 98.88%)
  ‚úì New best score: 98.8764

Fold 4 - Epoch 23/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:33<00:00,  4.37s/it, loss=0.0049, acc=99.51%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:42<00:00,  1.83s/it, loss=0.0287, acc=97.75%]



Results:
  Train - Loss: 0.0049, Acc: 99.51%, F1: 0.9951
  Val   - Loss: 0.0287, Acc: 97.75%, F1: 0.9774
  Learning Rate (group0): 6.06e-06
  No improvement. Patience: 1/10

Fold 4 - Epoch 24/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:25<00:00,  4.28s/it, loss=0.0019, acc=99.72%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:42<00:00,  1.85s/it, loss=0.0125, acc=98.60%]



Results:
  Train - Loss: 0.0019, Acc: 99.72%, F1: 0.9972
  Val   - Loss: 0.0125, Acc: 98.60%, F1: 0.9860
  Learning Rate (group0): 5.78e-06
  No improvement. Patience: 2/10

Fold 4 - Epoch 25/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:15<00:00,  4.18s/it, loss=0.0039, acc=99.44%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:41<00:00,  1.81s/it, loss=0.0202, acc=98.03%]



Results:
  Train - Loss: 0.0039, Acc: 99.44%, F1: 0.9944
  Val   - Loss: 0.0202, Acc: 98.03%, F1: 0.9804
  Learning Rate (group0): 5.50e-06
  No improvement. Patience: 3/10

Fold 4 - Epoch 26/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:45<00:00,  3.84s/it, loss=0.0090, acc=98.74%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.60s/it, loss=0.0300, acc=95.22%]



Results:
  Train - Loss: 0.0090, Acc: 98.74%, F1: 0.9874
  Val   - Loss: 0.0300, Acc: 95.22%, F1: 0.9528
  Learning Rate (group0): 5.22e-06
  No improvement. Patience: 4/10

Fold 4 - Epoch 27/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:28<00:00,  3.65s/it, loss=0.0064, acc=99.23%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:30<00:00,  1.33s/it, loss=0.0123, acc=97.75%]



Results:
  Train - Loss: 0.0064, Acc: 99.23%, F1: 0.9923
  Val   - Loss: 0.0123, Acc: 97.75%, F1: 0.9776
  Learning Rate (group0): 4.94e-06
  No improvement. Patience: 5/10

Fold 4 - Epoch 28/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:40<00:00,  3.79s/it, loss=0.0087, acc=99.16%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:34<00:00,  1.52s/it, loss=0.0107, acc=98.60%]



Results:
  Train - Loss: 0.0087, Acc: 99.16%, F1: 0.9916
  Val   - Loss: 0.0107, Acc: 98.60%, F1: 0.9860
  Learning Rate (group0): 4.66e-06
  No improvement. Patience: 6/10

Fold 4 - Epoch 29/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:33<00:00,  3.70s/it, loss=0.0010, acc=99.93%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:34<00:00,  1.51s/it, loss=0.0123, acc=98.31%]



Results:
  Train - Loss: 0.0010, Acc: 99.93%, F1: 0.9993
  Val   - Loss: 0.0123, Acc: 98.31%, F1: 0.9832
  Learning Rate (group0): 4.38e-06
  No improvement. Patience: 7/10

Fold 4 - Epoch 30/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:13<00:00,  4.15s/it, loss=0.0007, acc=100.00%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:34<00:00,  1.49s/it, loss=0.0108, acc=98.60%]



Results:
  Train - Loss: 0.0007, Acc: 100.00%, F1: 1.0000
  Val   - Loss: 0.0108, Acc: 98.60%, F1: 0.9860
  Learning Rate (group0): 4.11e-06
  No improvement. Patience: 8/10

Fold 4 - Epoch 31/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:34<00:00,  3.71s/it, loss=0.0011, acc=99.86%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:33<00:00,  1.45s/it, loss=0.0146, acc=98.31%]



Results:
  Train - Loss: 0.0011, Acc: 99.86%, F1: 0.9986
  Val   - Loss: 0.0146, Acc: 98.31%, F1: 0.9832
  Learning Rate (group0): 3.84e-06
  No improvement. Patience: 9/10

Fold 4 - Epoch 32/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [05:46<00:00,  3.85s/it, loss=0.0006, acc=99.93%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:40<00:00,  1.74s/it, loss=0.0188, acc=98.31%]



Results:
  Train - Loss: 0.0006, Acc: 99.93%, F1: 0.9993
  Val   - Loss: 0.0188, Acc: 98.31%, F1: 0.9832
  Learning Rate (group0): 3.58e-06
  No improvement. Patience: 10/10

‚ö†Ô∏è Early stopping triggered!
   Best score: 98.8764 at epoch 21

‚ö†Ô∏è Early stopping at epoch 32

üìä EVALUATING FOLD 4


Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:36<00:00,  1.58s/it, loss=0.0115, acc=98.88%]



üìà Fold 4 Validation Results:
  Accuracy:         98.88%
  Precision:        0.9889
  Recall:           0.9888
  F1 Score:         0.9888
  Sensitivity (Normal):   0.9769
  Sensitivity (Abnormal): 0.9956
  Specificity (Normal):   0.9922
  Specificity (Abnormal): 0.9868
  Avg Sensitivity:  0.9862
  Avg Specificity:  0.9895
‚úì Confusion matrix saved

FOLD 5/5

üìä Fold 5 split:
  Train:      1426 (80.0% of k-fold data)
  Validation: 356 (20.0% of k-fold data)

ü§ñ Creating model for Fold 5...


Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Fold 5 - Epoch 1/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [06:10<00:00,  4.12s/it, loss=0.0985, acc=83.52%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:35<00:00,  1.55s/it, loss=0.1259, acc=87.36%]



Results:
  Train - Loss: 0.0985, Acc: 83.52%, F1: 0.8369
  Val   - Loss: 0.1259, Acc: 87.36%, F1: 0.8672
  Learning Rate (group0): 9.99e-06
  üíæ Best model saved! (Val Acc: 87.36%)
  ‚úì Initial best score: 87.3596

Fold 5 - Epoch 2/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:51<00:00,  3.24s/it, loss=0.0604, acc=91.51%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:27<00:00,  1.19s/it, loss=0.1301, acc=84.55%]



Results:
  Train - Loss: 0.0604, Acc: 91.51%, F1: 0.9156
  Val   - Loss: 0.1301, Acc: 84.55%, F1: 0.8341
  Learning Rate (group0): 9.96e-06
  No improvement. Patience: 1/10

Fold 5 - Epoch 3/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:24<00:00,  2.94s/it, loss=0.0461, acc=93.90%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.07s/it, loss=0.0414, acc=93.54%]



Results:
  Train - Loss: 0.0461, Acc: 93.90%, F1: 0.9393
  Val   - Loss: 0.0414, Acc: 93.54%, F1: 0.9358
  Learning Rate (group0): 9.92e-06
  üíæ Best model saved! (Val Acc: 93.54%)
  ‚úì New best score: 93.5393

Fold 5 - Epoch 4/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:31<00:00,  3.02s/it, loss=0.0258, acc=96.49%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:25<00:00,  1.10s/it, loss=0.0946, acc=86.80%]



Results:
  Train - Loss: 0.0258, Acc: 96.49%, F1: 0.9650
  Val   - Loss: 0.0946, Acc: 86.80%, F1: 0.8705
  Learning Rate (group0): 9.86e-06
  No improvement. Patience: 1/10

Fold 5 - Epoch 5/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:38<00:00,  3.10s/it, loss=0.0318, acc=96.21%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:28<00:00,  1.23s/it, loss=0.0343, acc=95.22%]



Results:
  Train - Loss: 0.0318, Acc: 96.21%, F1: 0.9623
  Val   - Loss: 0.0343, Acc: 95.22%, F1: 0.9524
  Learning Rate (group0): 9.78e-06
  üíæ Best model saved! (Val Acc: 95.22%)
  ‚úì New best score: 95.2247

Fold 5 - Epoch 6/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:48<00:00,  3.20s/it, loss=0.0288, acc=96.35%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:25<00:00,  1.11s/it, loss=0.0442, acc=95.79%]



Results:
  Train - Loss: 0.0288, Acc: 96.35%, F1: 0.9636
  Val   - Loss: 0.0442, Acc: 95.79%, F1: 0.9572
  Learning Rate (group0): 9.68e-06
  üíæ Best model saved! (Val Acc: 95.79%)
  ‚úì New best score: 95.7865

Fold 5 - Epoch 7/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:27<00:00,  2.97s/it, loss=0.0211, acc=97.41%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.07s/it, loss=0.0334, acc=97.47%]



Results:
  Train - Loss: 0.0211, Acc: 97.41%, F1: 0.9741
  Val   - Loss: 0.0334, Acc: 97.47%, F1: 0.9746
  Learning Rate (group0): 9.57e-06
  üíæ Best model saved! (Val Acc: 97.47%)
  ‚úì New best score: 97.4719

Fold 5 - Epoch 8/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:28<00:00,  2.99s/it, loss=0.0160, acc=97.90%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.07s/it, loss=0.0244, acc=97.19%]



Results:
  Train - Loss: 0.0160, Acc: 97.90%, F1: 0.9790
  Val   - Loss: 0.0244, Acc: 97.19%, F1: 0.9718
  Learning Rate (group0): 9.44e-06
  No improvement. Patience: 1/10

Fold 5 - Epoch 9/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:28<00:00,  2.98s/it, loss=0.0117, acc=98.74%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.08s/it, loss=0.0325, acc=98.31%]



Results:
  Train - Loss: 0.0117, Acc: 98.74%, F1: 0.9874
  Val   - Loss: 0.0325, Acc: 98.31%, F1: 0.9831
  Learning Rate (group0): 9.30e-06
  üíæ Best model saved! (Val Acc: 98.31%)
  ‚úì New best score: 98.3146

Fold 5 - Epoch 10/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:32<00:00,  3.02s/it, loss=0.0122, acc=98.25%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.08s/it, loss=0.0822, acc=93.82%]



Results:
  Train - Loss: 0.0122, Acc: 98.25%, F1: 0.9825
  Val   - Loss: 0.0822, Acc: 93.82%, F1: 0.9370
  Learning Rate (group0): 9.14e-06
  No improvement. Patience: 1/10

Fold 5 - Epoch 11/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:25<00:00,  2.95s/it, loss=0.0193, acc=97.41%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.07s/it, loss=0.0225, acc=98.60%] 



Results:
  Train - Loss: 0.0193, Acc: 97.41%, F1: 0.9741
  Val   - Loss: 0.0225, Acc: 98.60%, F1: 0.9859
  Learning Rate (group0): 8.97e-06
  üíæ Best model saved! (Val Acc: 98.60%)
  ‚úì New best score: 98.5955

Fold 5 - Epoch 12/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:24<00:00,  2.94s/it, loss=0.0106, acc=99.02%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.06s/it, loss=0.0248, acc=98.03%]



Results:
  Train - Loss: 0.0106, Acc: 99.02%, F1: 0.9902
  Val   - Loss: 0.0248, Acc: 98.03%, F1: 0.9803
  Learning Rate (group0): 8.78e-06
  No improvement. Patience: 1/10

Fold 5 - Epoch 13/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:38<00:00,  3.10s/it, loss=0.0091, acc=98.60%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:27<00:00,  1.20s/it, loss=0.0078, acc=98.31%]



Results:
  Train - Loss: 0.0091, Acc: 98.60%, F1: 0.9860
  Val   - Loss: 0.0078, Acc: 98.31%, F1: 0.9831
  Learning Rate (group0): 8.58e-06
  No improvement. Patience: 2/10

Fold 5 - Epoch 14/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:33<00:00,  3.04s/it, loss=0.0101, acc=98.46%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:25<00:00,  1.12s/it, loss=0.0242, acc=98.03%]



Results:
  Train - Loss: 0.0101, Acc: 98.46%, F1: 0.9846
  Val   - Loss: 0.0242, Acc: 98.03%, F1: 0.9802
  Learning Rate (group0): 8.37e-06
  No improvement. Patience: 3/10

Fold 5 - Epoch 15/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:27<00:00,  2.97s/it, loss=0.0118, acc=98.60%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.07s/it, loss=0.0106, acc=98.88%] 



Results:
  Train - Loss: 0.0118, Acc: 98.60%, F1: 0.9860
  Val   - Loss: 0.0106, Acc: 98.88%, F1: 0.9887
  Learning Rate (group0): 8.15e-06
  üíæ Best model saved! (Val Acc: 98.88%)
  ‚úì New best score: 98.8764

Fold 5 - Epoch 16/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:22<00:00,  2.92s/it, loss=0.0095, acc=99.02%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.06s/it, loss=0.0292, acc=97.47%]



Results:
  Train - Loss: 0.0095, Acc: 99.02%, F1: 0.9902
  Val   - Loss: 0.0292, Acc: 97.47%, F1: 0.9745
  Learning Rate (group0): 7.91e-06
  No improvement. Patience: 1/10

Fold 5 - Epoch 17/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:33<00:00,  3.03s/it, loss=0.0086, acc=98.74%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:29<00:00,  1.26s/it, loss=0.0180, acc=98.03%]



Results:
  Train - Loss: 0.0086, Acc: 98.74%, F1: 0.9874
  Val   - Loss: 0.0180, Acc: 98.03%, F1: 0.9803
  Learning Rate (group0): 7.67e-06
  No improvement. Patience: 2/10

Fold 5 - Epoch 18/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:42<00:00,  3.13s/it, loss=0.0052, acc=99.23%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.07s/it, loss=0.0320, acc=98.88%]



Results:
  Train - Loss: 0.0052, Acc: 99.23%, F1: 0.9923
  Val   - Loss: 0.0320, Acc: 98.88%, F1: 0.9887
  Learning Rate (group0): 7.42e-06
  No improvement. Patience: 3/10

Fold 5 - Epoch 19/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:23<00:00,  2.93s/it, loss=0.0038, acc=99.44%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.06s/it, loss=0.0271, acc=98.31%]



Results:
  Train - Loss: 0.0038, Acc: 99.44%, F1: 0.9944
  Val   - Loss: 0.0271, Acc: 98.31%, F1: 0.9831
  Learning Rate (group0): 7.16e-06
  No improvement. Patience: 4/10

Fold 5 - Epoch 20/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:29<00:00,  3.00s/it, loss=0.0073, acc=99.37%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.07s/it, loss=0.0108, acc=99.16%]



Results:
  Train - Loss: 0.0073, Acc: 99.37%, F1: 0.9937
  Val   - Loss: 0.0108, Acc: 99.16%, F1: 0.9916
  Learning Rate (group0): 6.89e-06
  üíæ Best model saved! (Val Acc: 99.16%)
  ‚úì New best score: 99.1573

Fold 5 - Epoch 21/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:24<00:00,  2.94s/it, loss=0.0052, acc=99.51%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.06s/it, loss=0.0179, acc=98.60%]



Results:
  Train - Loss: 0.0052, Acc: 99.51%, F1: 0.9951
  Val   - Loss: 0.0179, Acc: 98.60%, F1: 0.9859
  Learning Rate (group0): 6.62e-06
  No improvement. Patience: 1/10

Fold 5 - Epoch 22/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:18<00:00,  2.87s/it, loss=0.0035, acc=99.72%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.05s/it, loss=0.0158, acc=98.31%]



Results:
  Train - Loss: 0.0035, Acc: 99.72%, F1: 0.9972
  Val   - Loss: 0.0158, Acc: 98.31%, F1: 0.9831
  Learning Rate (group0): 6.34e-06
  No improvement. Patience: 2/10

Fold 5 - Epoch 23/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:22<00:00,  2.92s/it, loss=0.0028, acc=99.51%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.07s/it, loss=0.0264, acc=97.75%]



Results:
  Train - Loss: 0.0028, Acc: 99.51%, F1: 0.9951
  Val   - Loss: 0.0264, Acc: 97.75%, F1: 0.9774
  Learning Rate (group0): 6.06e-06
  No improvement. Patience: 3/10

Fold 5 - Epoch 24/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:19<00:00,  2.89s/it, loss=0.0114, acc=98.60%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.06s/it, loss=0.0169, acc=98.03%]



Results:
  Train - Loss: 0.0114, Acc: 98.60%, F1: 0.9860
  Val   - Loss: 0.0169, Acc: 98.03%, F1: 0.9804
  Learning Rate (group0): 5.78e-06
  No improvement. Patience: 4/10

Fold 5 - Epoch 25/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:31<00:00,  3.02s/it, loss=0.0021, acc=99.72%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:28<00:00,  1.22s/it, loss=0.0164, acc=98.88%] 



Results:
  Train - Loss: 0.0021, Acc: 99.72%, F1: 0.9972
  Val   - Loss: 0.0164, Acc: 98.88%, F1: 0.9887
  Learning Rate (group0): 5.50e-06
  No improvement. Patience: 5/10

Fold 5 - Epoch 26/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:47<00:00,  3.19s/it, loss=0.0020, acc=99.72%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:23<00:00,  1.03s/it, loss=0.0220, acc=98.88%]



Results:
  Train - Loss: 0.0020, Acc: 99.72%, F1: 0.9972
  Val   - Loss: 0.0220, Acc: 98.88%, F1: 0.9887
  Learning Rate (group0): 5.22e-06
  No improvement. Patience: 6/10

Fold 5 - Epoch 27/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:17<00:00,  2.87s/it, loss=0.0031, acc=99.72%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.08s/it, loss=0.0164, acc=98.31%]



Results:
  Train - Loss: 0.0031, Acc: 99.72%, F1: 0.9972
  Val   - Loss: 0.0164, Acc: 98.31%, F1: 0.9831
  Learning Rate (group0): 4.94e-06
  No improvement. Patience: 7/10

Fold 5 - Epoch 28/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:27<00:00,  2.97s/it, loss=0.0017, acc=99.86%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.06s/it, loss=0.0133, acc=98.88%]



Results:
  Train - Loss: 0.0017, Acc: 99.86%, F1: 0.9986
  Val   - Loss: 0.0133, Acc: 98.88%, F1: 0.9888
  Learning Rate (group0): 4.66e-06
  No improvement. Patience: 8/10

Fold 5 - Epoch 29/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:31<00:00,  3.02s/it, loss=0.0016, acc=99.72%]
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:24<00:00,  1.08s/it, loss=0.0290, acc=97.47%]



Results:
  Train - Loss: 0.0016, Acc: 99.72%, F1: 0.9972
  Val   - Loss: 0.0290, Acc: 97.47%, F1: 0.9745
  Learning Rate (group0): 4.38e-06
  No improvement. Patience: 9/10

Fold 5 - Epoch 30/50
----------------------------------------------------------------------


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 90/90 [04:55<00:00,  3.28s/it, loss=0.0016, acc=99.79%] 
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:28<00:00,  1.25s/it, loss=0.0165, acc=98.60%]



Results:
  Train - Loss: 0.0016, Acc: 99.79%, F1: 0.9979
  Val   - Loss: 0.0165, Acc: 98.60%, F1: 0.9860
  Learning Rate (group0): 4.11e-06
  No improvement. Patience: 10/10

‚ö†Ô∏è Early stopping triggered!
   Best score: 99.1573 at epoch 19

‚ö†Ô∏è Early stopping at epoch 30

üìä EVALUATING FOLD 5


Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:25<00:00,  1.11s/it, loss=0.0108, acc=99.16%]



üìà Fold 5 Validation Results:
  Accuracy:         99.16%
  Precision:        0.9916
  Recall:           0.9916
  F1 Score:         0.9916
  Sensitivity (Normal):   0.9921
  Sensitivity (Abnormal): 0.9913
  Specificity (Normal):   0.9844
  Specificity (Abnormal): 0.9956
  Avg Sensitivity:  0.9917
  Avg Specificity:  0.9900
‚úì Confusion matrix saved

üìä K-FOLD CROSS-VALIDATION SUMMARY

üìà Cross-Validation Results (5 folds):
  Accuracy:     99.05% ¬± 0.38%
  Precision:    0.9905 ¬± 0.0037
  Recall:       0.9905 ¬± 0.0038
  F1 Score:     0.9905 ¬± 0.0038
  Sensitivity:  0.9889 ¬± 0.0053
  Specificity:  0.9905 ¬± 0.0029

üèÜ Best Fold: 2 (Val Acc: 99.72%)

üß™ EVALUATING BEST MODEL ON HOLDOUT TEST SET

üìä Loading best model from Fold 2...


Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13/13 [00:19<00:00,  1.47s/it, loss=0.0243, acc=96.48%]


üìà Test Set Results (Best Model from Fold 2):
  Accuracy:         96.48%
  Precision:        0.9656
  Recall:           0.9648
  F1 Score:         0.9650
  Sensitivity (Normal):   0.9324
  Sensitivity (Abnormal): 0.9840
  Specificity (Normal):   0.9718
  Specificity (Abnormal): 0.9609
  Avg Sensitivity:  0.9582
  Avg Specificity:  0.9664

‚úì Test confusion matrix saved
‚úì Results saved to CSV

‚úÖ TRAINING COMPLETE

üìä Final Summary:
  Dataset: ERBMAHE (Abnormal vs Normal)
  Strategy: Method 2 K-Fold with 10% Test Set
  K-Fold CV Accuracy: 99.05% ¬± 0.38%
  Test Set Accuracy:  96.48%
  Test Set F1 Score:  0.9650




