In [34]:
import torch
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
from torchvision.models import vit_b_16
from tqdm import tqdm


# Verifica si CUDA está disponible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cuda


In [27]:
transformaciones = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Cargar los datos de entrenamiento
carpeta_imagenes_train = "./Datasets/malevis/malevis_train_val_224x224/train"
dataset_train = datasets.ImageFolder(carpeta_imagenes_train, transform=transformaciones)
dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)

# Cargar los datos de prueba
carpeta_imagenes_test = "./Datasets/malevis/malevis_train_val_224x224/val"
dataset_test = datasets.ImageFolder(carpeta_imagenes_test, transform=transformaciones)
dataloader_test = DataLoader(dataset_test, batch_size=32, shuffle=True)

In [31]:
# Cargar el modelo ViT preentrenado
model = vit_b_16(pretrained=True)

# Modificar la última capa del modelo para ajustarse al número de clases
num_classes = len(dataset_train.classes)
model.heads[0] = torch.nn.Linear(in_features=model.heads[0].in_features, out_features=num_classes)

# Mover el modelo a CUDA
model = model.to(device)

In [37]:
def train_and_evaluate(model, dataloader_train, dataloader_test, criterion, optimizer, num_epochs=60):
    # Listas para almacenar la pérdida y precisión
    train_losses = []
    test_accuracies = []

    for epoch in tqdm(range(num_epochs), desc="Training Progress"):
        model.train()
        running_loss = 0.0
        for images, labels in dataloader_train:
            images, labels = images.to(device), labels.to(device)  # Mover a CUDA

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        avg_loss = running_loss / len(dataloader_train)
        train_losses.append(avg_loss)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
        
        # Evaluación en el conjunto de prueba
        model.eval()
        correct = 0
        total = 0
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for images, labels in dataloader_test:
                images, labels = images.to(device), labels.to(device)  # Mover a CUDA

                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        accuracy = 100 * correct / total
        test_accuracies.append(accuracy)
        print(f'Accuracy: {accuracy:.2f}%')

        # Actualizar tqdm con la pérdida y precisión actual
        tqdm.write(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')

    # Calcular precisión, recall y f1-score
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')
    
    # Calcular la matriz de confusión
    cm = confusion_matrix(all_labels, all_preds)
    
    return train_losses, test_accuracies, precision, recall, f1, cm


In [38]:
# Configurar el optimizador y la función de pérdida
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Ejecutar entrenamiento y evaluación
train_losses, test_accuracies, precision, recall, f1, cm = train_and_evaluate(
    model, dataloader_train, dataloader_test, criterion, optimizer, num_epochs=60
)

Training Progress:   0%|          | 0/60 [00:00<?, ?it/s]

Epoch [1/60], Loss: 0.0952


Training Progress:   2%|▏         | 1/60 [03:35<3:31:29, 215.08s/it]

Accuracy: 84.12%
Epoch [1/60], Loss: 0.0952, Accuracy: 84.12%


Training Progress:   2%|▏         | 1/60 [04:19<4:15:03, 259.38s/it]


KeyboardInterrupt: 

In [None]:
print(train_losses)
print(test_accuracies)
print(precision)
print(recall)
print(f1)
print(cm)