Esteban Castañeda Blanco C01795

Israel López Vallecillo C04396

Daniel Lizano Morales C04285

Ariel Solís Monge B97664

In [25]:
import os
import copy
import torch
import numpy as np
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
import torch.nn as nn
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, accuracy_score, classification_report

# Importa la clase Encoder desde encoder.py
from encoder import Encoder

In [26]:
unlabeled_set_size = 0.8
labeled_train_absolute_set_size = 0.1
labeled_test_absolute_set_size = round(1 - (labeled_train_absolute_set_size + unlabeled_set_size), 2)

labeled_train_relative_set_size = round((labeled_train_absolute_set_size / (1 - unlabeled_set_size)), 2)
labeled_test_relative_set_size = 1 - labeled_train_relative_set_size


base_dir = os.path.join('Plant_leave_diseases_dataset', 'original')
os.makedirs('best_models', exist_ok=True)
model_save_path = \
    os.path.join('best_models', f'h1_{int(unlabeled_set_size*100)}-{int(labeled_train_absolute_set_size*100)}-{int(labeled_test_absolute_set_size*100)}_classifierA_withEncoder.pth')

In [27]:
# Configuración del dispositivo
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Usando dispositivo: {device}")

# Inicializa el modelo EfficientNet con el Encoder
encoder = Encoder().to(device)

# Cargar pesos preentrenados para el encoder con map_location
encoder.load_state_dict(torch.load('best_models/h1_80-10-10_Encoder.pth', map_location=device))
encoder.eval()

# Congelar los pesos del encoder
for param in encoder.parameters():
    param.requires_grad = False

efficientnet = models.efficientnet_b2().to(device)

# Modifica la primera capa convolucional de EfficientNet
new_in_channels = 1
original_conv1 = efficientnet.features[0][0]

new_conv1 = nn.Conv2d(
    in_channels=new_in_channels,
    out_channels=original_conv1.out_channels,
    kernel_size=original_conv1.kernel_size,
    stride=original_conv1.stride,
    padding=original_conv1.padding,
    bias=original_conv1.bias
).to(device)


Usando dispositivo: mps


In [28]:
with torch.no_grad():
    if new_in_channels == 1:
        new_conv1.weight = nn.Parameter(original_conv1.weight.mean(dim=1, keepdim=True))
    else:
        new_conv1.weight[:, :3] = original_conv1.weight
        if new_in_channels > 3:
            for i in range(3, new_in_channels):
                new_conv1.weight[:, i:i+1] = original_conv1.weight.mean(dim=1, keepdim=True)

# Reemplaza la primera capa convolucional en el modelo
efficientnet.features[0][0] = new_conv1

efficientnet.to(device)

# Verificar dimensiones de salida del encoder
dummy_input = torch.randn(32, 1, 224, 224).to(device)
encoded_output = encoder(dummy_input)[-1]
print("Dimensiones de salida del encoder:", encoded_output.shape)

# Ajustar la capa de clasificación para aceptar la salida del encoder
flattened_dim = encoded_output.numel() // encoded_output.size(0)  # Ajustar la dimensión aplanada
print("Dimensión aplanada esperada:", flattened_dim)

efficientnet.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(flattened_dim, 10).to(device),
)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(efficientnet.parameters(), lr=0.001)

# Verificar la dimensión aplanada y la salida de la capa lineal
flattened_output = encoded_output.view(encoded_output.size(0), -1)
print("Dimensión del output aplanado:", flattened_output.shape)
output = efficientnet.classifier(flattened_output)
print("Salida de la capa lineal:", output.shape)

Dimensiones de salida del encoder: torch.Size([1024, 14, 14])
Dimensión aplanada esperada: 196
Dimensión del output aplanado: torch.Size([1024, 196])
Salida de la capa lineal: torch.Size([1024, 10])


In [29]:
# Carga y preprocesa los datos
data_transforms = {
    'all': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ]),
}

base_dir = 'Plant_leave_diseases_dataset/original'
full_dataset = datasets.ImageFolder(base_dir, transform=data_transforms['all'])

# División de datos de entrenamiento, validación y prueba
indices = list(range(len(full_dataset)))
image_paths = [sample[0] for sample in full_dataset.samples]
labels = [os.path.split(os.path.dirname(path))[-1] for path in image_paths]

unlabeled_set_size = 0.8
labeled_train_absolute_set_size = 0.1
labeled_test_absolute_set_size = round(1 - (labeled_train_absolute_set_size + unlabeled_set_size), 2)

train_val_indices, _ = train_test_split(indices, test_size=unlabeled_set_size, stratify=labels, random_state=42)
train_val_labels = [labels[i] for i in train_val_indices]
train_indices, val_indices = train_test_split(train_val_indices, test_size=labeled_test_absolute_set_size, stratify=train_val_labels, random_state=42)

train_dataset = Subset(full_dataset, train_indices)
val_dataset = Subset(full_dataset, val_indices)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [30]:
def evaluate_model(model, encoder, dataloader, device, load_best_model_path=None):
    if load_best_model_path:
        model.load_state_dict(torch.load(load_best_model_path, map_location=device))
        print(f"Loaded best model from {load_best_model_path}")

    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            encoded_output = encoder(inputs)[-1]
            flattened_output = encoded_output.view(encoded_output.size(0), -1)
            outputs = model.classifier(flattened_output)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    
    report = classification_report(all_labels, all_preds, output_dict=True)
    accuracy = report['accuracy']
    precision = {key: value['precision'] for key, value in report.items() if key not in ['accuracy', 'macro avg', 'weighted avg']}
    recall = {key: value['recall'] for key, value in report.items() if key not in ['accuracy', 'macro avg', 'weighted avg']}
    
    return accuracy, precision, recall

In [31]:
def train_model(model, encoder, criterion, optimizer, train_loader, val_loader, device, num_epochs=30, patience=5, save_path='best_model.pth'):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    patience_counter = 0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}')
        print('-' * 10)

        model.train()
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            encoded_output = encoder(inputs)[-1]
            flattened_output = encoded_output.view(encoded_output.size(0), -1)
            outputs = model.classifier(flattened_output)
            print(f'Output shape: {outputs.shape}, Labels shape: {labels.shape}')
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)

        print(f'Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        val_acc, val_precision, val_recall = evaluate_model(model, encoder, val_loader, device)
        print(f'Val Acc: {val_acc:.4f}')
        print(f'Val Precision: {val_precision}')
        print(f'Val Recall: {val_recall}')

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            patience_counter = 0
            # Guardar el mejor modelo
            torch.save(model.state_dict(), save_path)
            print(f'New best model saved with accuracy: {best_acc:.4f}')
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print("Early stopping")
            break

    model.load_state_dict(best_model_wts)
    return model

In [32]:
trained_model = train_model(efficientnet, encoder, criterion, optimizer, train_loader, val_loader, device, num_epochs=50, patience=5, save_path=model_save_path)


Epoch 1/50
----------


  0%|          | 0/346 [00:00<?, ?it/s]

Output shape: torch.Size([1024, 10]), Labels shape: torch.Size([32])





ValueError: Expected input batch_size (1024) to match target batch_size (32).

In [None]:
best_model_acc, best_model_precision, best_model_recall = evaluate_model(efficientnet, encoder, val_loader, device, load_best_model_path=model_save_path)
print(f'Best Model Accuracy: {best_model_acc:.4f}')
print(f'Best Model Precision per class: {best_model_precision}')
print(f'Best Model Recall per class: {best_model_recall}')