In [1]:
import torch.nn as nn
from copy import deepcopy
from sklearn.model_selection import StratifiedKFold
import torchvision
from torch.utils.data import DataLoader, SubsetRandomSampler, random_split
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, confusion_matrix, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns
import torch.optim as optim
from torch.nn.functional import softmax

from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, Subset
import os
from PIL import Image
import numpy as np
import torchvision.transforms as transforms

class JetImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []

        # Iterate through the directory to load and label the generated images
        for filename in os.listdir(root_dir):
            if filename.endswith(".png"):
                label = 0 if "type0" in filename else 1  # 0 = gluon, 1 = quark
                self.images.append(filename)
                self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.images[idx])
        image = Image.open(img_path).convert("RGB")  # The images are converted to RGB format

        if self.transform:
            image = self.transform(image)

        label = self.labels[idx]
        return image, label
    

def load_jet_images(data_dir, img_size=299):
    transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        #transforms.Lambda(lambda x: x.expand(3, -1, -1)),
    ])

    # Load the personalized dataset
    full_dataset = JetImageDataset(root_dir=data_dir, transform=transform)
    labels = np.array(full_dataset.labels)

    # Stratification using sklearn train_test_split
    indices = np.arange(len(full_dataset))

    train_val_indices, test_indices = train_test_split(
        indices,
        test_size=0.2,
        stratify=labels,
        random_state=42
    )

    train_val_set = Subset(full_dataset, train_val_indices)
    test_set = Subset(full_dataset, test_indices)

    return train_val_set, test_set

In [2]:
from sklearn.metrics import roc_curve, auc
def plot_roc_curve(y_true, y_probs, model_name):
    fpr, tpr, _ = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)
    
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'{model_name} (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - {model_name}')
    plt.legend(loc="lower right")
    plt.show()
    
def plot_score_distributions(y_true, y_probs, model_name):
    scores_class0 = y_probs[y_true == 0]
    scores_class1 = y_probs[y_true == 1]
    
    plt.figure(figsize=(8, 6))
    plt.hist(scores_class0, bins=50, alpha=0.5, label='Gluon', color='blue')
    plt.hist(scores_class1, bins=50, alpha=0.5, label='Quark', color='red')
    plt.xlabel('Classifier Score')
    plt.ylabel('Count')
    plt.title(f'Score Distributions - {model_name}')
    plt.legend(loc='upper right')
    plt.show()

##### Nuevo modelo incorporado

In [3]:
class EnsembleModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.resnet50 = torchvision.models.resnet50(pretrained=True)
        self.inceptionv3 = torchvision.models.inception_v3(pretrained=True, aux_logits=True)
        
        # Modify the input layer of ResNet50
        self.resnet50.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        
        # Specific Configuration for InceptionV3:
        # Disable automatic input transformation
        self.inceptionv3.transform_input = False
        # Modify the first layer to acept 3 chanels (sin tocar Conv2d_1a_3x3 directamente)
        original_first_conv = self.inceptionv3.Conv2d_1a_3x3.conv
        self.inceptionv3.Conv2d_1a_3x3.conv = nn.Conv2d(
            in_channels=3,  # Changed to 3 canales
            out_channels=original_first_conv.out_channels,
            kernel_size=original_first_conv.kernel_size,
            stride=original_first_conv.stride,
            padding=original_first_conv.padding,
            bias=False
        )
        
        with torch.no_grad():
            # Average the weights across the RGB channels and replicate
            weights_1ch = original_first_conv.weight.mean(dim=1, keepdim=True)
            self.inceptionv3.Conv2d_1a_3x3.conv.weight = nn.Parameter(weights_1ch.repeat(1,3,1,1))
       
        
        # Perform selective parameter freezing
        for param in self.resnet50.parameters():
            param.requires_grad = False
        for param in self.inceptionv3.parameters():
            param.requires_grad = False
            
        # Unfreeze the final layers
        for param in list(self.resnet50.parameters())[-50:]:
            param.requires_grad = True
        for param in list(self.inceptionv3.parameters())[-30:]:
            param.requires_grad = True

        # Replace the fully connected layers
        self.resnet50.fc = nn.Identity()  # Extrat 2048 features
        self.inceptionv3.fc = nn.Identity()  # Extrat 2048 features
        
        # Attention mechanism for feature fusion
        self.attention = nn.Sequential(
            nn.Linear(2048*2, 512),  # 2048 of ResNet + 2048 of Inception
            nn.ReLU(),
            nn.Linear(512, 2048*2),
            nn.Sigmoid()  # Sigmoide activation atenttion (0-1)
        )
        
        # Classifier layers
        self.classifier = nn.Sequential(
            nn.Linear(2048*2, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),  # Dropout alto para regularización
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 2)  # Salida binaria
        )
        
    def forward(self, x):
        # Process with both models
        features_resnet = self.resnet50(x)
        features_inception = self.inceptionv3(x)
        
        # InceptionV3 return a tuple (output, aux_output) if aux_logits=True
        if isinstance(features_inception, tuple):
            features_inception = features_inception[0]  # Take the principal output
        
        combined = torch.cat((features_resnet, features_inception), dim=1)
        # Calculate attention weights (batch_size x 4096)
        attention_weights = self.attention(combined)
        
        # Apply attention (multiplication element-wise)
        attended_features = combined * attention_weights
        
        # final Clasification 
        #return self.classifier(attended_features)
        out = self.classifier(attended_features)
        return out

###### viejo modelo

In [9]:
class EnsembleModel(nn.Module):
    def __init__(self):
        super(EnsembleModel, self).__init__()
        self.resnet50 = torchvision.models.resnet50(pretrained=True)
        self.resnet50.fc = nn.Linear(2048, 2)  # Cambiar la salida a 2 clases

        self.inceptionv3 = torchvision.models.inception_v3(pretrained=True, aux_logits=True)
        self.inceptionv3.fc = nn.Linear(2048, 2)  # Cambiar la salida a 2 clases

        # La capa final de combinación debe recibir el tamaño correcto de la concatenación
        self.fc = nn.Sequential(
            nn.Linear(2, 128),  # Solo las salidas de ResNet y Inception
            nn.ReLU(),
            nn.Linear(128, 2)  # Cambiar a dos salidas para clasificación binaria
        )
        #self.transform_resnet = transforms.Resize((224, 224))
        #self.transform_inception = transforms.Resize((299, 299))    

    def forward(self, x):
        features_resnet = self.resnet50(x)
        features_inception = self.inceptionv3(x)
        #resnet_input = self.transform_resnet(x)
        #inception_input = self.transform_inception(x)

        # Procesar cada modelo individualmente
        #resnet_out = self.resnet50(resnet_input)
        
        # Manejar salida de InceptionV3 para evitar el error
        inception_outputs = self.inceptionv3(inception_input)
        inception_out = inception_outputs[0] if isinstance(inception_outputs, tuple) else inception_outputs
        
        # Concatenar salidas y calcular la salida combinada
        #combined = torch.cat((resnet_out, inception_out), dim=1)
        out = self.fc(inception_out)
        
        return out

In [4]:
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    all_labels = []
    incept_preds = []

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

        all_labels.extend(labels.cpu().numpy())
        incept_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
        

    train_accuracy = correct_train / total_train
    return running_loss / len(train_loader), train_accuracy


In [5]:
def validate_model_individuals(model, val_loader, criterion, device, target_tpr=0.5):
    model.eval()
    running_loss = 0.0
    correct_val = 0
    total_val = 0
    all_labels = []
    incept_preds = []
    incept_confidences = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            # Obtén la confianza de cada modelo
            #incept_confidences.append(softmax(outputs, dim=1).cpu().numpy())
            # Manejar salida de InceptionV3
            if isinstance(outputs, tuple):
                outputs = outputs[0]  # Tomar la salida principal
            
            # Calcular probabilidades con softmax y guardar confianzas de la CLASE POSITIVA (índice 1)
            confidences = softmax(outputs, dim=1).cpu().numpy()
            incept_confidences.append(confidences[:, 1])  # Cambio clave: usar solo clase positiva

            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

            all_labels.extend(labels.cpu().numpy())
            incept_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
            
    val_accuracy = correct_val / total_val
    #cm_incept = confusion_matrix(all_labels, incept_preds)

    predicted_probs = np.concatenate(incept_confidences)  # Ya son solo las de clase positiva



    return running_loss / len(val_loader), val_accuracy, all_labels, predicted_probs

In [5]:
def test_model(models, test_loader, device):
    # Set all models to evaluation mode
    for model in models:
        model.eval()

    all_labels = []
    all_probs = []

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = []

            for model in models:
                output = model(images)
                output = torch.softmax(output, dim=1)  # convert logits to probabilities
                outputs.append(output)

            # Stack outputs and average them across models
            outputs = torch.stack(outputs)  # shape: (n_models, batch_size, n_classes)
            avg_output = torch.mean(outputs, dim=0)  # shape: (batch_size, n_classes)

            all_probs.append(avg_output.cpu())
            all_labels.append(labels.cpu())

    # Concatenate all batches
    all_probs = torch.cat(all_probs, dim=0)
    all_labels = torch.cat(all_labels, dim=0)

    # Predicted classes
    _, predicted = torch.max(all_probs, 1)

    # Accuracy
    correct = (predicted == all_labels).sum().item()
    total = all_labels.size(0)
    accuracy = correct / total

    # AUC Score (only if binary classification)
    try:
        auc_score = roc_auc_score(all_labels.numpy(), all_probs[:, 1].numpy())
    except Exception as e:
        print(f"Warning: Could not compute AUC Score. Reason: {e}")
        auc_score = None

    return accuracy, auc_score, all_labels.numpy().tolist(), all_probs.numpy().tolist()


In [6]:
def validate_ensemble(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_labels = []
    all_preds = []
    all_probs = []
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Forward pass con el ensemble completo
            outputs = model(inputs)
            
            # Handling outputs (for InceptionV3 in the ensemble)
            if isinstance(outputs, tuple):
                outputs = outputs[0]  # We take the main output
            
            # Loss and metrics calculation.
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            # Obtaining predictions
            probs = torch.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs, 1)
            
            # Storage for metrics
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())
            all_probs.extend(probs[:, 1].cpu().numpy())  # Positive class probability.
            
    # Final metrics calculation
    val_loss = running_loss / len(val_loader)
    val_accuracy = correct / total
    val_auc = roc_auc_score(all_labels, all_probs)
    
    return val_loss, val_accuracy, val_auc, all_labels, all_probs


In [7]:
from torch.cuda.amp import GradScaler, autocast
import time
def cross_validate_ensemble(train_val_set, test_set, k_folds=5, epochs=30, batch_size=64):
    # Obtain labels for stratification
    labels = [train_val_set[i][1] for i in range(len(train_val_set))]
    
    # Use StratifiedKFold to maintain class proportion
    kfold = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"\nTraining divice: {device}") 

    if torch.cuda.is_available():
        print(f"\nGPUs availables: {torch.cuda.device_count()}")
        for i in range(torch.cuda.device_count()):
            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

    final_model = EnsembleModel().to(device)
    if torch.cuda.device_count() > 1:
        final_model = nn.DataParallel(final_model)
    
    # Metrics list
    fold_metrics = {
        'train_acc': [],
        'val_acc': [],
        'models': []  # To save the models of each fold
    }

    # Training setup
    train_accuracies, val_accuracies, test_accuracy = [], [], []
    all_labels = []
    all_probs = []
    best_models = []  # to save the best model in each fold

    # Early stopping config
    patience = 5
    min_delta = 0.005

    for fold, (train_ids, val_ids) in enumerate(kfold.split(range(len(train_val_set)), labels)):
        print(f'\n{"="*50}\nTraining on Fold {fold + 1}/{k_folds}\n{"="*50}')
        
        # DataLoaders optimized
        train_loader = DataLoader(
            train_val_set,
            batch_size=batch_size,
            sampler=SubsetRandomSampler(train_ids),
            num_workers=min(4, os.cpu_count()),
            pin_memory=True,
            persistent_workers=True
        )
        
        val_loader = DataLoader(
            train_val_set,
            batch_size=batch_size,
            sampler=SubsetRandomSampler(val_ids),
            num_workers=min(4, os.cpu_count()),
            pin_memory=True
        )

        model = EnsembleModel()
        if torch.cuda.device_count() > 1:
            print(f"Using {torch.cuda.device_count()} GPUs with DataParallel")
            model = nn.DataParallel(model)
        model.to(device)
 
        optimizer = optim.AdamW([
            {'params': [p for p in model.module.resnet50.parameters() if p.requires_grad], 'lr': 1e-5},
            {'params': [p for p in model.module.inceptionv3.parameters() if p.requires_grad], 'lr': 1e-5},
            {'params': model.module.attention.parameters(), 'lr': 3e-4},
            {'params': model.module.classifier.parameters(), 'lr': 1e-3}
        ], weight_decay=1e-4)
        
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, 
            mode='max', 
            factor=0.5, 
            patience=5, 
            verbose=True
        )

        # Loss function with label smoothing
        criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
        
        # Early stopping variables
        best_val_acc = 0.0
        early_stop_counter = 0
        best_model_state = None

        for epoch in range(epochs):
            start_time = time.time()
            
            # Training
            train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device)
            
            # Validation
            val_loss, val_acc, val_auc, val_labels, val_probs = validate_ensemble(
                model, val_loader, criterion, device
            )

            # Update scheduler based in val_acc
            scheduler.step(val_acc)
            
            # Calculate epochs time
            epoch_time = time.time() - start_time
            
            print(f'Epoch {epoch + 1}/{epochs} | Time: {epoch_time:.2f}s')
            print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}')
            print(f'Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}')
            
            # Early stopping logic
            if val_acc > best_val_acc + min_delta:
                best_val_acc = val_acc
                early_stop_counter = 0
                best_model_state = deepcopy(model.state_dict())
            else:
                early_stop_counter += 1
                if early_stop_counter >= patience:
                    print(f'Early stopping triggered at epoch {epoch + 1}!')
                    model.load_state_dict(best_model_state)
                    break

        # Save results from each fold
        fold_metrics['train_acc'].append(train_acc)
        fold_metrics['val_acc'].append(val_acc)
        fold_metrics['models'].append(best_model_state)
        # Save best fold model

        #best_models.append(deepcopy(model.state_dict()))
        
        train_accuracies.append(train_acc)
        val_accuracies.append(val_acc)
        
        all_labels.extend(val_labels)
        all_probs.extend(val_probs)
        
        print(f'\nFold {fold + 1} Results:')
        print(f'Best Val Acc: {best_val_acc:.4f}')


    y_true = np.array(all_labels)
    y_pred = np.array(all_probs)
    # Generar gráficos con TODOS los datos de validación
    #plot_roc_curve(y_true, y_pred, "Inception-V3")
    plot_roc_curve(y_true, y_pred, "Val Ensemble")
    #plot_score_distributions(y_true, y_pred, "Inception-V3")
    plot_score_distributions(y_true, y_pred, "Val Ensemble")
        
    test_loader = DataLoader(
            test_set,
            batch_size=batch_size*2,
            shuffle=False,
            num_workers=min(4, os.cpu_count()),
            pin_memory=True
        )

    # Evaluación en test set
    test_acc, test_auc, true_labels, test_probs = test_model(fold_metrics['models'], test_loader, device)
            
    test_accuracy.append(test_acc)
    # Resultados finales
    avg_train_acc = np.mean(train_accuracies)
    avg_val_acc = np.mean(val_accuracies)
    #avg_test_acc = np.mean(test_accuracies)
    
    print(f'\n{"="*50}')
    print(f'Cross-Validation Complete')
    print(f'Average Training Accuracy: {avg_train_acc:.4f}')
    print(f'Average Validation Accuracy: {avg_val_acc:.4f}')
    print(f'Average Testing Accuracy: {test_accuracy:.4f}')
    print(f'{"="*50}')
    
    return avg_train_acc, avg_val_acc, test_accuracy, all_labels, all_probs, test_auc, true_labels, test_probs

In [20]:
from torch.cuda.amp import GradScaler, autocast
import time
# Validación cruzada del modelo de ensamble sin ViT
def cross_validate_ensemble(train_val_set, test_set, k_folds=10, epochs=30, batch_size=256):
    kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if torch.cuda.is_available():
        print(f"\nGPUs disponibles: {torch.cuda.device_count()}")
        for i in range(torch.cuda.device_count()):
            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

    train_accuracies, val_accuracies, test_accuracies = [], [], []
    CM_incept = []
    incept_rejection_rates = []
    
    # Listas para almacenar todas las probabilidades y etiquetas de validación
    all_labels = []
    all_probs = []  # Ahora almacenará SOLO las probabilidades de la clase positiva

    patience = 2  # Número de épocas sin mejora antes de parar
    min_delta = 0.001  # Cambio mínimo para considerar mejora
    best_val_loss = float('inf')
    early_stop_counter = 0

    for fold, (train_ids, val_ids) in enumerate(kfold.split(train_val_set)):
        #print(f'\nTraining on Fold {fold + 1}...')
        print(f'\n{"="*50}\nFold {fold + 1}/{k_folds}\n{"="*50}')
        
        # DataLoaders optimizados
        train_loader = DataLoader(
            train_val_set,
            batch_size=batch_size,
            sampler=SubsetRandomSampler(train_ids),
            num_workers=min(4, os.cpu_count()),
            pin_memory=True,
            persistent_workers=True
        )
        
        val_loader = DataLoader(
            train_val_set,
            batch_size=batch_size*2,  # Mayor batch_size para validación
            sampler=SubsetRandomSampler(val_ids),
            num_workers=min(4, os.cpu_count()),
            pin_memory=True
        )
        
        test_loader = DataLoader(
            test_set,
            batch_size=batch_size*2,
            shuffle=False,
            num_workers=min(4, os.cpu_count()),
            pin_memory=True
        )

        # Modelo y optimizador
        model = EnsembleModel()
        if torch.cuda.device_count() > 1:
            print(f"Usando {torch.cuda.device_count()} GPUs con DataParallel")
            model = nn.DataParallel(model)
        model.to(device)
        
        criterion = nn.CrossEntropyLoss()
        # Reemplazar tu optimizer actual por:
        optimizer = optim.AdamW([
            {'params': [p for p in model.resnet50.parameters() if p.requires_grad], 'lr': 1e-5},
            {'params': [p for p in model.inceptionv3.parameters() if p.requires_grad], 'lr': 1e-5},
            {'params': model.attention.parameters(), 'lr': 3e-4},
            {'params': model.classifier.parameters(), 'lr': 1e-3}
        ], weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, 
            mode='max', 
            factor=0.5, 
            patience=3, 
            verbose=True
        )
        #optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
        #scaler = GradScaler()  # Para mixed-precision
        
        # Early Stopping variables
        best_val_loss = float('inf')
        early_stop_counter = 0
        best_model_state = None

        for epoch in range(epochs):
            start_time = time.time()
            epoch_time = time.time() - start_time
            train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device)

            val_loss, val_acc, val_labels, val_probs = validate_model_individuals(model, val_loader, criterion, device)
            
            print(f'Epoch {epoch + 1}/{epochs} |  Time: {epoch_time:.2f}s | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} '
                  f'| Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}')
            #print(f'Epoch {epoch + 1}/{epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}')
            

            # Early Stopping Logic
            if val_loss < best_val_loss - min_delta:
                best_val_loss = val_loss
                early_stop_counter = 0
                best_model_weights = model.state_dict().copy()  # Guardar mejor modelo
            else:
                early_stop_counter += 1
                if early_stop_counter >= patience:
                    print(f'Early stopping triggered at epoch {epoch + 1}!')
                    model.load_state_dict(best_model_weights)  # Restaurar mejor modelo
                    break


        #incept_rejection_rates.append(incept_rejection_rate)
        #CM_incept.append(val_cm_incept)
        
        # Almacenar etiquetas y probabilidades CORRECTAS (clase positiva)
        all_labels.extend(val_labels)
        all_probs.extend(val_probs)  # val_probs ahora son las probabilidades de la clase 1
        scheduler.step(val_acc)

        val_accuracies.append(val_acc)
        train_accuracies.append(train_acc)

        # Evaluar y obtener tasa de rechazo del modelo de ensamble en el test set
        test_acc, test_auc, true_all_labels, test_all_probs = test_model(model, test_loader, device)
        test_accuracies.append(test_acc)

    avg_train_accuracy = np.mean(train_accuracies)
    avg_val_accuracy = np.mean(val_accuracies)
    avg_test_accuracy = np.mean(test_accuracies)

    # Promedio de matrices de confusión (suma en lugar de promedio)
    #avg_cm_incept = np.sum(CM_incept, axis=0)  # Más informativo que el promedio

    #avg_incept_rejection = np.mean(incept_rejection_rates)
    

    print(f'\nAverage Training Accuracy: {avg_train_accuracy:.4f}')
    print(f'Average Validation Accuracy: {avg_val_accuracy:.4f}')
    print(f'Average Testing Accuracy: {avg_test_accuracy:.4f}')


    return avg_train_accuracy, avg_val_accuracy, avg_test_accuracy, all_labels, all_probs, test_auc, true_all_labels, test_all_probs

In [8]:
data_dir = './data/Jet2Image_g-q_170k'  # Cambia esto según la ubicación de tus imágenes

# Cargando el dataset con la función corregida
train_val_set, test_set = load_jet_images(data_dir)
#train_val_set, test_set = load_cifar10()
Train_accuracy_avg, validation_accuracy_avg, Testing_avg_acc, all_labels, all_probs, test_auc, true_all_labels, test_all_probs  = cross_validate_ensemble(train_val_set, test_set)

y_true = np.array(true_all_labels)
y_pred = np.array(test_all_probs)
# Generar gráficos con TODOS los datos de validación
#plot_roc_curve(y_true, y_pred, "Inception-V3")
plot_roc_curve(y_true, y_pred, "Test Ensemble")
#plot_score_distributions(y_true, y_pred, "Inception-V3")
plot_score_distributions(y_true, y_pred, "Test Ensemble")
# Ejemplo: Supongamos que tienes esto después de evaluar tu modelo
#model_name = "inception-v3"  # Cambia esto según corresponda
model_name = "ensemble"
np.savez(f'{model_name}_results_g-q.npz', y_true=y_true, y_pred=y_pred)


Dispositivo de entrenamiento: cuda

GPUs disponibles: 2
GPU 0: NVIDIA A100-SXM4-80GB
GPU 1: NVIDIA A100-SXM4-80GB





Training on Fold 1/5




Usando 2 GPUs con DataParallel


