In [1]:
import os
import torch
from torchvision import datasets, transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
import torch.nn as nn
from tqdm import tqdm

In [2]:
use_unet = True
if use_unet: import unet as autoencoder
else: import autoencoder2 as autoencoder

In [3]:
unlabeled_set_size = 0.8
labeled_train_absolute_set_size = 0.1
labeled_test_absolute_set_size = round(1 - (labeled_train_absolute_set_size + unlabeled_set_size), 2)

labeled_train_relative_set_size = round((labeled_train_absolute_set_size / (1 - unlabeled_set_size)), 2)
labeled_test_relative_set_size = 1 - labeled_train_relative_set_size

In [4]:
base_original_dir = os.path.join('Plant_leave_diseases_dataset', 'original')
base_noisy_dir = os.path.join('Plant_leave_diseases_dataset', 'with_noise')
os.makedirs('best_models', exist_ok=True)
model_save_path = \
    os.path.join('best_models', f'h1_{int(unlabeled_set_size*100)}-{int(labeled_train_absolute_set_size*100)}-{int(labeled_test_absolute_set_size*100)}_Autoencoder.pth')
noisy_model_save_path = \
    os.path.join('best_models', f'h1_{int(unlabeled_set_size*100)}-{int(labeled_train_absolute_set_size*100)}-{int(labeled_test_absolute_set_size*100)}_DenoisingAutoencoder.pth')
encoder_save_path = \
    os.path.join('best_models', f'h1_{int(unlabeled_set_size*100)}-{int(labeled_train_absolute_set_size*100)}-{int(labeled_test_absolute_set_size*100)}_Encoder.pth')

In [5]:
# Set up of the device
if torch.backends.mps.is_available():
    device = torch.device("mps")#tqm
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

Using device: cuda


## Datasets and Dataloaders

In [6]:
data_transforms = {
    'all': transforms.Compose([
        transforms.Resize((224, 224)),  
        # transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor(),
        # transforms.Normalize(mean=0.4726, std=0.1515),
        transforms.Normalize(mean=0.4543, std=0.1757)
    ]),
    'dispersion_calc': transforms.Compose([
        transforms.Resize((224, 224)),  
        # transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor()
    ])
}

In [7]:
full_dataset = datasets.ImageFolder(base_original_dir, transform=data_transforms['all'])
full_noisy_dataset = datasets.ImageFolder(base_noisy_dir, transform=data_transforms['all'])

In [8]:
indices = list(range(len(full_dataset)))
noisy_indices =list(range(len(full_noisy_dataset)))

# Get the directory paths of images
image_paths = [sample[0] for sample in full_dataset.samples]
noisy_image_paths = [sample[0] for sample in full_noisy_dataset.samples]

labels = [os.path.split(os.path.dirname(path))[-1] for path in image_paths]
noisy_labels = [os.path.split(os.path.dirname(path))[-1] for path in noisy_image_paths]

In [9]:
#Obtenemos el 20% de los datos 
val_indices, train_indices = train_test_split(indices, test_size=unlabeled_set_size, stratify=labels, random_state=42)#Obtenemos el 20% de los datos 
noisy_val_indices, noisy_train_indices = train_test_split(noisy_indices, test_size=unlabeled_set_size, stratify=noisy_labels, random_state=42)

#Obtenemos las etiquetas de los datos de entrenamiento
val_labels = [labels[i] for i in val_indices]
noisy_val_labels = [noisy_labels[i] for i in noisy_val_indices]

#dividir el 20% en 10% de entrenamiento y 10% de validación
_, val_indices = train_test_split(val_indices, test_size=labeled_test_relative_set_size, stratify=val_labels, random_state=42)
_, noisy_val_indices = train_test_split(noisy_val_indices, test_size=labeled_test_relative_set_size, stratify=noisy_val_labels, random_state=42)

train_dataset = Subset(full_dataset, train_indices)
noisy_train_dataset = Subset(full_noisy_dataset, noisy_train_indices)
val_dataset = Subset(full_dataset, val_indices)
noisy_val_dataset = Subset(full_dataset, noisy_val_indices)

In [10]:
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4)
noisy_loader = DataLoader(noisy_train_dataset, batch_size=4, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=4)
noisy_val_loader = DataLoader(noisy_val_dataset, batch_size=4, shuffle=False, num_workers=4)

print(f"Número de imágenes en el conjunto de entrenamiento: {len(train_loader.dataset)}")
print(f"Número de imágenes en el conjunto de entrenamiento: {len(noisy_loader.dataset)}")
print(f"Número de imágenes en el conjunto de validación: {len(val_loader.dataset)}")
print(f"Número de imágenes en el conjunto de validación: {len(noisy_val_loader.dataset)}")

Número de imágenes en el conjunto de entrenamiento: 49189
Número de imágenes en el conjunto de entrenamiento: 49189
Número de imágenes en el conjunto de validación: 6149
Número de imágenes en el conjunto de validación: 6149


In [11]:
# # Initialize variables for mean and std calculation
# mean = 0.0
# std = 0.0
# nb_samples = 0

# for data, _ in train_loader:
#     data = data.to(device)
#     batch_samples = data.size(0)  # number of images in the batch
#     data = data.view(batch_samples, -1)  # flatten the channel and spatial dimensions
#     mean += data.mean(1).sum(0)
#     std += data.std(1).sum(0)
#     nb_samples += batch_samples

# mean /= nb_samples
# std /= nb_samples

# print("Mean:", mean)
# print("Std:", std)

## Function definitions

In [12]:
def train_model(model, criterion, optimizer, train_loader, val_loader, device, num_epochs=10, patience=3):
    best_val_loss = float('inf')
    epochs_no_improve = 0
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for inputs, _ in tqdm(train_loader):
            inputs = inputs.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        train_loss = running_loss / len(train_loader)
        
        model.eval()
        val_loss = 0.0
        
        with torch.no_grad():
            for inputs, _ in val_loader:
                inputs = inputs.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, inputs)
                val_loss += loss.item()
        
        val_loss = val_loss / len(val_loader)
        
        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Train Loss: {train_loss:.4f}')
        print(f'Val Loss: {val_loss:.4f}')

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            torch.save(model.state_dict(), model_save_path)
        else:
            epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print("Early stopping triggered!")
            break
    
    return model

In [13]:
def train_denoising_model(model, criterion, optimizer, noisy_loader, original_loader, val_loader, noisy_val_loader, device, num_epochs=10, patience=3):
    best_val_loss = float('inf')
    epochs_no_improve = 0
    
    for epoch in range(num_epochs):
        full_loader = zip(noisy_loader, original_loader)
        model.train()
        running_loss = 0.0
        
        for batch in tqdm(full_loader):
            inputs, _ = batch[0]
            loss_inputs, _ = batch[1]
            inputs = inputs.to(device)
            loss_inputs = loss_inputs.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, loss_inputs)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        train_loss = running_loss / len(train_loader)
        
        model.eval()
        val_loss = 0.0
        
        full_val_loader = zip(noisy_val_loader, val_loader)
        
        with torch.no_grad():
            for batch in full_val_loader:
                inputs, _ = batch[0]
                loss_inputs, _ = batch[1]
                inputs = inputs.to(device)
                loss_inputs = loss_inputs.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, loss_inputs)
                val_loss += loss.item()
        
        val_loss = val_loss / len(val_loader)
        
        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Train Loss: {train_loss:.4f}')
        print(f'Val Loss: {val_loss:.4f}')

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            torch.save(model.state_dict(), noisy_model_save_path)
        else:
            epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print("Early stopping triggered!")
            break
    
    return model

In [14]:
def load_model(model_path, device):
    model = autoencoder.AutoEncoder()
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    return model

In [15]:
def evaluate_model(model, dataloader, device):
    model.eval()
    with torch.no_grad():
        for inputs, _ in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            inputs[0] = inputs[0] * 0.1757 + 0.4543
            save_image(inputs[0], 'input.jpg')
            outputs[0] = outputs[0] * 0.1757 + 0.4543
            save_image(outputs[0], 'output.jpg')
            break

In [16]:
def evaluate_denoising_model(model, noisy_loader, original_loader, device):
    model.eval()
    full_loader = zip(noisy_loader, original_loader)
    with torch.no_grad():
        for batch in full_loader:
            inputs, _ = batch[0]
            inputs = inputs.to(device)
            outputs = model(inputs)
            # inputs[0] = inputs[0] * 0.1515 + 0.4726
            save_image(inputs[0], 'input.jpg')
            # outputs[0] = outputs[0] * 0.1515 + 0.4726
            save_image(outputs[0], 'output.jpg')
            break

## U-Net Autoencoder

In [17]:
model = autoencoder.AutoEncoder()
model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [18]:
trained_model = train_denoising_model(model, criterion, optimizer, noisy_loader, train_loader, val_loader, noisy_val_loader, device, num_epochs=10)

12298it [04:54, 41.80it/s]


Epoch 1/10
Train Loss: 1.0587
Val Loss: 1.0413


12298it [04:41, 43.75it/s]


Epoch 2/10
Train Loss: 1.0377
Val Loss: 1.0410


262it [00:34,  7.60it/s]


KeyboardInterrupt: 

In [None]:
# trained_model = train_model(model, criterion, optimizer, train_loader, val_loader, device, num_epochs=10, patience=3)

100%|██████████| 12298/12298 [05:06<00:00, 40.19it/s]


Epoch 1/10
Train Loss: 0.0728
Val Loss: 0.0109


100%|██████████| 12298/12298 [05:10<00:00, 39.58it/s]


Epoch 2/10
Train Loss: 0.0500
Val Loss: 0.0178


100%|██████████| 12298/12298 [05:12<00:00, 39.40it/s]


Epoch 3/10
Train Loss: 0.0448
Val Loss: 0.0053


100%|██████████| 12298/12298 [05:14<00:00, 39.13it/s]


Epoch 4/10
Train Loss: 0.0412
Val Loss: 0.0128


100%|██████████| 12298/12298 [05:13<00:00, 39.17it/s]


Epoch 5/10
Train Loss: 0.0394
Val Loss: 0.0054


 29%|██▉       | 3601/12298 [01:50<04:26, 32.69it/s]


KeyboardInterrupt: 

In [20]:
model = load_model(noisy_model_save_path, device)
# encoder = model.encoder
# torch.save(encoder.state_dict(), encoder_save_path)

In [21]:
evaluate_denoising_model(model, noisy_loader, train_loader, device)

In [None]:
# evaluate_model(model, val_loader, device=device)