In [None]:
import os
import random
import shutil
import copy
import time
import torch
import torchvision
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.utils.data import DataLoader, Subset
import math
from torchvision.models import vision_transformer
import timm

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = timm.create_model('vit_small_patch16_224', pretrained=True)
model.head = nn.Linear(model.head.in_features, 2)  # Cambiar la capa de clasificación para tener 2 clases (tumores benignos y malignos)
model = model.to(device)

In [None]:
# Ruta de la carpeta principal
main_folder = "C:\\Users\\alvaro.rlanceta\\Documents\\tfm\\datasetstfm\\datasets_D"

input_size = 224

In [None]:
# Just normalization
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size, scale=(0.8, 1.0)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

In [None]:
# Batch size para el entrenamiento (cambia según la cantidad de memoria disponible)
batch_size = 8

# Crear datasets de entrenamiento y validación
image_datasets = {x: datasets.ImageFolder(os.path.join(main_folder, x), data_transforms[x]) for x in ['train', 'val', 'test']}


# Generar los índices para el subconjunto
subset_indices_train = torch.randperm(len(image_datasets['train']))[:int(0.1*len(image_datasets['train']))]
subset_indices_val = torch.randperm(len(image_datasets['val']))[:int(0.1*len(image_datasets['val']))]

# Crear subconjuntos
train_data_subset = Subset(image_datasets['train'], subset_indices_train)
val_data_subset = Subset(image_datasets['val'], subset_indices_val)


# Crear dataloaders de entrenamiento y validación
dataloaders_dict = {
    'train': DataLoader(train_data_subset, batch_size=batch_size, shuffle=True, num_workers=4),
    'val': DataLoader(val_data_subset, batch_size=batch_size, shuffle=True, num_workers=4)
}

#dataloaders_dict = {x: DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val','test']}

In [None]:
# Función de entrenamiento del modelo
def train_model(model, dataloaders, criterion, optimizer, num_epochs=20):
    since = time.time()

    acc_history = {"train": [], "val": []}
    losses = {"train": [], "val": []}

    best_acc = 0.0
    best_model_wts = None

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data).item()

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()
                best_model_wts = copy.deepcopy(model.state_dict())

            acc_history[phase].append(epoch_acc)
            
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model, acc_history, losses



In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 40

In [None]:
import datetime

# Tiempo inicial
start_time = datetime.datetime.now()
model, acc_history, losses = train_model(model, dataloaders_dict, criterion, optimizer, num_epochs)

In [None]:
end_time = datetime.datetime.now()

# Calculando la diferencia de tiempo
duration = end_time - start_time

print(f"Tiempo de ejecución: {duration}")

In [None]:
# Plot the losses and accuracies
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

ax1.plot(losses["train"], label="training loss")
ax1.plot(losses["val"], label="validation loss")
ax1.legend()

ax2.plot(acc_history["train"],label="training accuracy")
ax2.plot(acc_history["val"],label="val accuracy")
ax2.legend()

plt.show()

In [None]:
subset_indices_test = torch.randperm(len(image_datasets['test']))[:int(0.3*len(image_datasets['test']))]
test_data_subset = torch.utils.data.Subset(image_datasets['test'], subset_indices_test)
test_dataloader = torch.utils.data.DataLoader(test_data_subset, batch_size=batch_size, shuffle=True, num_workers=4)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

def evaluate_model(model, dataloader, device, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0

    all_labels = []
    all_preds = []

    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data).item()

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

    test_loss = running_loss / len(dataloader.dataset)
    test_acc = running_corrects / len(dataloader.dataset)

    # Calculate confusion matrix
    conf_matrix = confusion_matrix(all_labels, all_preds)
    
    # Generate classification report
    class_report = classification_report(all_labels, all_preds)
    
    return test_loss, test_acc, conf_matrix, class_report


In [None]:
test_loss, test_acc, conf_matrix, class_report = evaluate_model(model, test_dataloader, device, criterion)
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}')
print('Confusion Matrix:\n', conf_matrix)
print('Classification Report:\n', class_report)