In [3]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import KFold
from torchvision import transforms
import matplotlib.pyplot as plt
from ultralytics import YOLO
from skimage.metrics import peak_signal_noise_ratio as psnr
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import mean_squared_error as mse
import warnings

warnings.filterwarnings("ignore")

torch.manual_seed(2697134)

<torch._C.Generator at 0x1b8d3b29b70>

### Explanation of WatermarkDataset Class
The `WatermarkDataset` class is a custom dataset implementation designed for handling datasets of watermarked images and their corresponding target (clear) images. It leverages a CSV file to retrieve the paths of images stored in a specified root directory, ensuring flexibility in dataset organization. The class resizes images to a given size, converts them to RGB format for consistency, and supports applying optional transformations, such as normalization or augmentation, to both input and target images. It includes a subsampling mechanism that allows users to skip over samples in the dataset, which is particularly useful for working with large datasets and enabling faster iterations during training.

In [5]:
class WatermarkDataset(Dataset):
    def __init__(self, csv_file_path, database_root_dir, image_size, subsample=1, transform=None):
        self.csvFile = pd.read_csv(csv_file_path)
        self.database_root_dir = database_root_dir
        self.image_size = image_size
        self.subsample = subsample
        self.transform = transform

    def __len__(self):
        return len(self.csvFile) // self.subsample

    def __getitem__(self, idx):
        if idx >= len(self):
            raise IndexError("Index out of bounds")
        actual_idx = idx * self.subsample

        input_image_path = os.path.join(self.database_root_dir, self.csvFile.iloc[actual_idx, 1])
        target_image_path = os.path.join(self.database_root_dir, self.csvFile.iloc[actual_idx, 0])

        input_image = Image.open(input_image_path).convert('RGB').resize((self.image_size, self.image_size))
        target_image = Image.open(target_image_path).convert('RGB').resize((self.image_size, self.image_size))

        if self.transform:
            input_image = self.transform(input_image)
            target_image = self.transform(target_image)

        return input_image, target_image

### Explanation of DoubleConv, Down, Up, OutConv, and VAE Classes

The `DoubleConv`, `Down`, `Up`, and `OutConv` classes serve as fundamental components in constructing the encoder-decoder architecture utilized in the `VAE` class. These modules facilitate various stages of feature extraction, downsampling, upsampling, and output generation, forming the backbone of the Variational Autoencoder (VAE) designed specifically for watermark removal.

- **DoubleConv** acts as a core building block that sequentially applies two convolutional layers, each followed by Batch Normalization and ReLU activation. This ensures effective feature extraction with normalization for stable training.
- **Down** performs downsampling using a MaxPooling layer, followed by a `DoubleConv` module for extracting hierarchical features while reducing spatial dimensions.
- **Up** focuses on upsampling the feature maps using a transposed convolution, followed by concatenation with skip connections from the encoder. It then applies a `DoubleConv` to refine the upsampled features, enabling high-resolution reconstruction.
- **OutConv** is responsible for producing the final output of the model by reducing the number of channels and applying a Sigmoid activation. This is particularly suited for pixel-level predictions, such as mask generation or image reconstruction.
- **VAE** integrates these components within an encoder-decoder framework and incorporates a latent space for compact representation. Additionally, it leverages a pre-trained YOLO model to identify watermark regions, using these predictions as an extra input channel to enhance the reconstruction process in masked areas.

The VAE model operates through three primary stages: detecting watermarked regions via YOLO, encoding the input image into a latent representation, and decoding this representation to reconstruct a watermark-free image. This modular design, combining feature extraction, downsampling, and upsampling with guidance from YOLO, enables precise and efficient watermar removal.


In [7]:
# ---------------------------------------------------------
# Define the DoubleConv module: Two convolutional layers with BatchNorm and ReLU
# ---------------------------------------------------------

class DoubleConv(nn.Module):
    """
    A sequential module consisting of two convolutional layers, 
    each followed by BatchNorm and ReLU activation.
    """

    def __init__(self, in_channels, out_channels):
        """
        Initialize the DoubleConv module.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
        """
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        """
        Forward pass of the DoubleConv module.

        Args:
            x (torch.Tensor): Input tensor of shape (B, C, H, W).
        
        Returns:
            torch.Tensor: Output tensor after two convolutions.
        """
        return self.double_conv(x)


# ---------------------------------------------------------
# Define the Down module: MaxPooling followed by DoubleConv
# ---------------------------------------------------------

class Down(nn.Module):
    """
    A module for downsampling that applies MaxPooling followed by a DoubleConv.
    """

    def __init__(self, in_channels, out_channels):
        """
        Initialize the Down module.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
        """
        super(Down, self).__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),  # Downsample by a factor of 2
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        """
        Forward pass of the Down module.

        Args:
            x (torch.Tensor): Input tensor of shape (B, C, H, W).
        
        Returns:
            torch.Tensor: Downsampled tensor after MaxPooling and DoubleConv.
        """
        return self.maxpool_conv(x)


# ---------------------------------------------------------
# Define the Up module: Upsampling followed by DoubleConv
# ---------------------------------------------------------

class Up(nn.Module):
    """
    A module for upsampling that applies ConvTranspose2d for resizing, 
    concatenates with skip connections, and applies DoubleConv.
    """

    def __init__(self, in_channels, out_channels):
        """
        Initialize the Up module.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
        """
        super(Up, self).__init__()
        self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
        self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        """
        Forward pass of the Up module.

        Args:
            x1 (torch.Tensor): Tensor from the decoder path (upsampled).
            x2 (torch.Tensor): Tensor from the encoder path (skip connection).
        
        Returns:
            torch.Tensor: Output tensor after upsampling and concatenation.
        """
        # Perform upsampling
        x1 = self.up(x1)

        # Handle size mismatch due to rounding in upsampling
        diffY = x2.size(2) - x1.size(2)
        diffX = x2.size(3) - x1.size(3)
        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2])

        # Concatenate and apply DoubleConv
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


# ---------------------------------------------------------
# Define the OutConv module: Final output layer with Sigmoid activation
# ---------------------------------------------------------

class OutConv(nn.Module):
    """
    A module for reducing the number of channels to the desired output 
    and applying Sigmoid activation for pixel-wise predictions.
    """

    def __init__(self, in_channels, out_channels):
        """
        Initialize the OutConv module.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
        """
        super(OutConv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1),  # 1x1 convolution
            nn.Sigmoid()
        )

    def forward(self, x):
        """
        Forward pass of the OutConv module.

        Args:
            x (torch.Tensor): Input tensor of shape (B, C, H, W).
        
        Returns:
            torch.Tensor: Output tensor with reduced channels.
        """
        return self.conv(x)

# ---------------------------------------------------------
# Define the VAE (Variational Autoencoder) Model
# ---------------------------------------------------------

class VAE(nn.Module):
    """
    A custom Variational Autoencoder (VAE) model for watermark removal. 
    Integrates a YOLO object detection model for mask prediction.
    """

    def __init__(self, in_channels=4, out_channels=3, latent_dim=256, input_size=128, yolo_model_path="yolov8s.pt", device='cuda'):
        """
        Initialize the VAE model.

        Args:
            in_channels (int): Number of input channels (e.g., 4 for RGB + Mask).
            out_channels (int): Number of output channels (e.g., 3 for RGB).
            latent_dim (int): Dimensionality of the latent space.
            input_size (int): Size of the input image (height and width).
            yolo_model_path (str): Path to the pre-trained YOLO model.
            device (str): Device to run the model on ('cuda' or 'cpu').
        """
        super(VAE, self).__init__()
        yolo = YOLO(yolo_model_path)
        object.__setattr__(self, 'yolo_model', yolo)
        self.device = device
        self.input_size = input_size

        # Encoder layers
        self.inc = DoubleConv(in_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.down4 = Down(512, 1024)

        # Latent space
        self.fc_mu = nn.Linear(1024 * (input_size // 16) ** 2, latent_dim)
        self.fc_logvar = nn.Linear(1024 * (input_size // 16) ** 2, latent_dim)
        self.fc_dec = nn.Linear(latent_dim, 1024 * (input_size // 16) ** 2)

        # Decoder layers
        self.up1 = Up(1024, 512)
        self.up2 = Up(512, 256)
        self.up3 = Up(256, 128)
        self.up4 = Up(128, 64)
        self.outc = OutConv(64, out_channels)

    def reparameterize(self, mu, logvar):
        """
        Reparameterization trick for the latent space.

        Args:
            mu (torch.Tensor): Mean of the latent distribution.
            logvar (torch.Tensor): Log variance of the latent distribution.
        
        Returns:
            torch.Tensor: Sampled latent vector.
        """
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def create_mask_from_yolo_preds(self, predictions, image_size):
        """
        Create a binary mask from YOLO predictions.

        Args:
            predictions: YOLO detection results.
            image_size (int): Size of the output mask (H, W).
        
        Returns:
            torch.Tensor: Binary mask of shape (1, H, W).
        """
        mask = torch.zeros((1, image_size, image_size))
        for det in predictions:
            boxes = det.boxes
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
                mask[:, y1:y2, x1:x2] = 1.0
        return mask

    def forward(self, input_images):
        """
        Forward pass of the VAE model.

        Args:
            input_images (torch.Tensor): Batch of input images.
        
        Returns:
            tuple: (Final output, mean, log variance, predicted masks).
        """
        # YOLO mask prediction
        batch_size = input_images.size(0)
        predicted_masks = []
        for i in range(batch_size):
            img_np = (input_images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
            results = self.yolo_model.predict(img_np, imgsz=self.input_size, verbose=False, device=self.device)
            pmask = self.create_mask_from_yolo_preds(results, self.input_size).to(input_images.device)
            predicted_masks.append(pmask)
        predicted_masks = torch.stack(predicted_masks, dim=0)

        # Combine input with predicted masks
        combined_input = torch.cat([input_images, predicted_masks], dim=1)

        # Encoder
        x1 = self.inc(combined_input)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)

        # Latent space
        x_flat = x5.view(batch_size, -1)
        mu = torch.clamp(self.fc_mu(x_flat), -10, 10)
        logvar = torch.clamp(self.fc_logvar(x_flat), -10, 10)
        z = self.reparameterize(mu, logvar)

        # Decoder
        x_decoded = self.fc_dec(z).view(batch_size, 1024, x5.size(2), x5.size(3))
        x = self.up1(x_decoded, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        reconstruction = self.outc(x)

        # Apply mask blending
        predicted_masks_3 = predicted_masks.expand_as(reconstruction)
        final_output = input_images * (1 - predicted_masks_3) + reconstruction * predicted_masks_3

        return final_output, mu, logvar, predicted_masks

### Explanation of VAELoss Class

The `VAELoss` class is a specialized loss function designed for the Variational Autoencoder (VAE) model to tackle watermark removal tasks. It integrates two key components: reconstruction loss and KL divergence loss, each playing a distinct role in enhancing the model's performance. The reconstruction loss focuses on minimizing pixel-wise differences between the reconstructed images and the target (ground truth) images. To ensure the model pays more attention to watermarked regions, the loss assigns higher weights to these areas using a weight map derived from the difference between the input and target images. This approach allows the model to prioritize the challenging regions affected by watermarks during training.

The second component, KL divergence loss, promotes regularization by encouraging the latent space to follow a standard normal distribution. This helps maintain a compact and stable latent representation, balancing reconstruction accuracy with effective regularization. The impact of KL divergence is controlled through a weighting factor (`kl_factor`), allowing flexibility in adjusting its influence during training. By combining these two components, `VAELoss` effectively guides the VAE model to achieve precise watermark removal while ensuring the latent space remains well-regularized and generalizable. This design makes it an essential component for training the VAE model to handle complex watermark removal challenes.
egularized latent space.


In [9]:
class VAELoss(nn.Module):
    """
    Custom loss function for the VAE model.
    
    The loss combines:
        1. Reconstruction Loss: Measures the difference between reconstructed images and target images,
           with a higher focus on watermarked regions.
        2. KL Divergence Loss: Encourages the latent distribution to match a standard normal distribution.

    Watermarked regions are identified using the predicted mask, and additional weights are applied to 
    emphasize these areas during the reconstruction loss calculation.
    """

    def __init__(self):
        """
        Initialize the VAELoss class.
        """
        super(VAELoss, self).__init__()

    def forward(self, reconstructed_images, target_images, input_images, mu, logvar, predicted_mask, kl_factor):
        """
        Calculate the total loss for the VAE model.

        Args:
            reconstructed_images (torch.Tensor): Reconstructed output images of shape (B, C, H, W).
            target_images (torch.Tensor): Ground truth target images of shape (B, C, H, W).
            input_images (torch.Tensor): Original input images of shape (B, C, H, W).
            mu (torch.Tensor): Mean of the latent space distribution of shape (B, latent_dim).
            logvar (torch.Tensor): Log variance of the latent space distribution of shape (B, latent_dim).
            predicted_mask (torch.Tensor): Predicted binary masks of shape (B, 1, H, W).
            kl_factor (float): Weighting factor for the KL divergence loss.
        
        Returns:
            torch.Tensor: Total loss value (scalar).
        """
        eps = 1e-8  # A small epsilon value to prevent division by zero

        # Step 1: Compute the mask for watermarked regions
        # Convert predicted masks to binary (thresholded at 0.5)
        mask = (predicted_mask > 0.5).float()
        masked_pixels = mask.sum() + eps  # Count the number of masked pixels

        # Step 2: Compute the difference between input and target images
        # This identifies regions likely containing watermarks
        diff_input_target = (input_images - target_images).abs()

        # Step 3: Apply additional weights to watermarked regions
        # Regions with larger differences (greater than 0.1) are given higher weight
        watermark_weight = (diff_input_target > 0.1).float() * 5 + 1  # Threshold of 0.1

        # Step 4: Compute the reconstruction loss
        # Calculate the pixel-wise squared difference between reconstructed and target images
        diff_recon_target = (reconstructed_images - target_images) ** 2

        # Apply the mask and watermark weights to the reconstruction loss
        weighted_diff = diff_recon_target * mask * watermark_weight
        recon_loss = weighted_diff.sum() / masked_pixels  # Normalize by the number of masked pixels

        # Step 5: Compute the KL divergence loss
        # Encourages the latent space distribution to approach a standard normal distribution
        KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) / (reconstructed_images.size(0) + eps)

        # Step 6: Combine the losses
        # The reconstruction loss is weighted higher (by a factor of 5)
        total_loss = recon_loss * 5 + kl_factor * KLD

        return total_loss

### Explanation of `validateModel` Function

The `validateModel` function is responsible for evaluating the Variational Autoencoder (VAE) model's performance on a validation dataset by calculating the average loss over all batches. This function is integral to monitoring the model's generalization ability during training.

The function begins by setting the model to evaluation mode using `model.eval()`. This ensures that layers such as dropout and batch normalization behave correctly, as they are only intended for training. To optimize performance and reduce memory consumption, the function employs the `torch.no_grad()` context, which disables gradient computation during validation.

For each batch in the validation dataset, the function calculates the loss using the provided custom loss function, such as `VAELoss`. This loss typically combines reconstruction loss, which measures how well the model reconstructs input images, and KL divergence loss, which regularizes the latent space. The `kl_factor` argument determines the weight of the KL divergence term, allowing fine-tuned control over its impact.

The function iterates through all batches in the validation DataLoader, accumulating the loss values for each batch. Once all batches have been processed, the average validation loss is computed and returned. This average loss serves as a key metric for assessing the model's performance on unseen data and guiding decisions during hyperparameter tuning and training.

Overall, the `validateModel` function provides an efficient and reliable way to evaluate the VAE model, ensuring that it generalizes well to the validation dataset without requiring additional training or gradien updates.

r tuning.


In [11]:
def validateModel(model, validation_loader, loss_func, device, kl_factor):
    """
    Validate the VAE model on the validation dataset.

    This function evaluates the model's performance on the validation dataset
    by calculating the average validation loss over all batches. The loss is
    computed using the specified loss function, which includes reconstruction
    and KL divergence losses.

    Args:
        model (nn.Module): The VAE model to be validated.
        validation_loader (DataLoader): DataLoader for the validation dataset.
        loss_func (callable): Custom loss function (e.g., VAELoss) for computing the loss.
        device (str): Device to run the validation on ('cuda' or 'cpu').
        kl_factor (float): Weighting factor for the KL divergence loss.

    Returns:
        float: The average validation loss over all batches.
    """
    # Set the model to evaluation mode (disables dropout, batch norm updates, etc.)
    model.eval()

    # List to store the loss for each batch
    val_losses = []

    # Disable gradient computation for validation to save memory and computation
    with torch.no_grad():
        # Iterate over batches in the validation dataset
        for input_images, target_images in validation_loader:
            # Move input and target images to the specified device (e.g., GPU or CPU)
            input_images = input_images.to(device)
            target_images = target_images.to(device)

            # Perform a forward pass through the VAE model
            recon_images, mu, logvar, predicted_masks = model(input_images)

            # Compute the loss using the provided loss function
            # Includes reconstruction loss and KL divergence loss
            loss = loss_func(recon_images, target_images, input_images, mu, logvar, predicted_masks, kl_factor=kl_factor)

            # Append the computed loss for this batch to the list
            val_losses.append(loss.item())

    # Compute and return the average validation loss over all batches
    return np.mean(val_losses)

### Explanation of `trainModel` Function

The `trainModel` function is designed to train the Variational Autoencoder (VAE) model using K-Fold Cross Validation, a robust technique for evaluating model performance. By dividing the dataset into multiple folds, this method ensures that the model is trained and validated on different subsets, improving its ability to generalize.

The function begins by splitting the dataset into `num_folds` parts. For each fold, one subset is used for validation while the remaining subsets are used for training. This iterative process continues until every fold has been used as a validation set, providing a comprehensive evaluation of the model's performance.

The training process uses the `VAELoss` function, which combines reconstruction loss and KL divergence loss. A warm-up period for the KL divergence factor (`kl_factor`) allows the model to focus on reconstruction in the early epochs before gradually incorporating the regularization effect of KL divergence. This dynamic adjustment improves training stability and performance.

To adapt the learning rate dynamically, the function employs a scheduler, `ReduceLROnPlateau`, which reduces the learning rate when validation loss plateaus. This ensures efficient optimization and helps avoid overfitting.

Throughout the training process, the function logs metrics such as average training and validation losses for each epoch. These metrics are stored for visualization and analysis, providing insights into the model's learning behavior. Additionally, the function saves the trained model weights and loss plots in the `outputs` directory for further use.

The `trainModel` function integrates all essential components for effective training and evaluation, ensuring that the VAE model achieves high performance and generalizes well to nseen data.
cross epochs.


In [13]:
def trainModel(model, optim, dataset, n_epochs, mini_batch_size, num_folds, device, model_save_name):
    """
    Train the VAE model using K-Fold Cross Validation.

    This function trains the model over multiple folds, where the dataset is split
    into training and validation subsets for each fold. The training loss and
    validation loss are logged and visualized, and the best-performing model is saved.

    Args:
        model (nn.Module): The VAE model to be trained.
        optim (torch.optim.Optimizer): Optimizer for model training.
        dataset (Dataset): Dataset used for training and validation.
        n_epochs (int): Number of epochs to train the model.
        mini_batch_size (int): Number of samples per batch.
        num_folds (int): Number of folds for K-Fold Cross Validation.
        device (str): Device to train the model on ('cuda' or 'cpu').
        model_save_name (str): Base name for saving the trained model and outputs.

    Returns:
        None
    """
    print(f"Training on device: {device}")

    # Define the custom loss function
    loss_func = VAELoss()

    # Set up the K-Fold Cross Validator
    k_fold = KFold(n_splits=num_folds, shuffle=True, random_state=2697134)

    # Learning rate scheduler to reduce LR on plateau
    scheduler = ReduceLROnPlateau(optim, mode='min', factor=0.1, patience=5, verbose=True)

    # Create an output folder to save model weights and visualizations
    output_folder = "outputs"
    os.makedirs(output_folder, exist_ok=True)

    # Lists to store training and validation losses
    training_losses = []
    val_losses = []

    # Loop through each fold
    for fold, (train_ids, val_ids) in enumerate(k_fold.split(dataset)):
        print(f'\n{model_save_name} Fold {fold + 1}/{num_folds}')

        # Define the train and validation samplers for the current fold
        train_subsampler = SubsetRandomSampler(train_ids)
        val_subsampler = SubsetRandomSampler(val_ids)

        # Create DataLoaders for training and validation
        training_loader = DataLoader(dataset, batch_size=mini_batch_size, sampler=train_subsampler, num_workers=0, pin_memory=True)
        validation_loader = DataLoader(dataset, batch_size=mini_batch_size, sampler=val_subsampler, num_workers=0, pin_memory=True)

        # Store losses for the current fold
        fold_training_losses = []
        fold_val_losses = []

        # Training loop for each epoch
        for epoch in range(n_epochs):
            model.train()  # Set the model to training mode

            # Calculate KL divergence warm-up factor
            warmup_epochs = int(n_epochs * 0.75)
            max_kl_factor = 0.5
            kl_factor = epoch / float(warmup_epochs) if epoch < warmup_epochs else max_kl_factor

            batch_losses = []  # List to store batch losses
            for batch_idx, (input_images, target_images) in enumerate(training_loader):
                # Move data to the specified device
                input_images = input_images.to(device)
                target_images = target_images.to(device)

                # Zero the gradients
                optim.zero_grad()

                # Forward pass through the model
                recon_images, mu, logvar, predicted_masks = model(input_images)

                # Compute the loss
                loss = loss_func(recon_images, target_images, input_images, mu, logvar, predicted_masks, kl_factor=kl_factor)

                # Backward pass and optimization
                loss.backward()
                optim.step()

                # Store the batch loss
                batch_losses.append(loss.item())

            # Calculate and log average training loss for the epoch
            epoch_loss = np.mean(batch_losses)
            fold_training_losses.append(epoch_loss)
            print(f"\tEpoch {epoch + 1}/{n_epochs}")
            print(f"\t\tTraining Loss: {epoch_loss:.6f}")

            # Perform validation and compute validation loss
            val_loss = validateModel(model, validation_loader, loss_func, device, kl_factor=kl_factor)
            fold_val_losses.append(val_loss)
            print(f"\t\tValidation Loss: {val_loss:.6f}")

            # Update the learning rate scheduler
            scheduler.step(val_loss)

        # Append fold losses to global losses
        training_losses.extend(fold_training_losses)
        val_losses.extend(fold_val_losses)

    # Save the final model
    final_model_path = os.path.join(output_folder, f"{model_save_name}_final.pt")
    torch.save(model.state_dict(), final_model_path)
    print(f"\nModel saved as {final_model_path}")

    # Plot and save the training and validation losses
    plt.figure(figsize=(10, 5))
    epochs_total = range(1, len(training_losses) + 1)
    plt.plot(epochs_total, training_losses, label='Training Loss')
    plt.plot(epochs_total, val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'{model_save_name} Training and Validation Losses')
    plt.legend()
    loss_plot_path = os.path.join(output_folder, f"{model_save_name}_loss_plot.png")
    plt.savefig(loss_plot_path)
    plt.close()

### Explanation of `testModel` Function

The `testModel` function is designed to evaluate the Variational Autoencoder (VAE) model on a test dataset, measuring its performance in terms of reconstruction accuracy and perceptual quality. It computes the average test loss, Peak Signal-to-Noise Ratio (PSNR), and Structural Similarity Index Measure (SSIM), which are critical metrics for assessing the quality of reconstructed images.

The function starts by preparing the test dataset, loading images from the specified path, and applying the necessary transformations such as resizing and normalization. The model is then set to evaluation mode using `model.eval()`, ensuring that layers like dropout and batch normalization behave correctly. To optimize memory usage and speed during evaluation, gradient computation is disabled using the `torch.no_grad()` context.

For each batch in the test dataset, the function calculates the test loss using the `VAELoss` function, which combines reconstruction and KL divergence terms. In addition to the loss, the function computes PSNR and SSIM metrics for each image, which evaluate the quality of reconstructed images compared to their ground truth. PSNR measures the signal fidelity, while SSIM assesses the perceptual similarity between images.

After processing all batches, the function calculates the average test loss, PSNR, and SSIM values. These results are printed for immediate analysis and returned as a dictionary for further use, such as logging or comparison with other models. The `testModel` function provides a comprehensive evaluation framework, ensuring that the model's performance is measured accurately on unsen data.

images.


In [15]:
def testModel(model, test_path, transform, mini_batch_size, device, model_save_name, image_size):
    """
    Test the VAE model on a test dataset and compute evaluation metrics.

    This function evaluates the VAE model on the provided test dataset, calculates
    the test loss using the custom loss function, and computes the PSNR and SSIM
    metrics for the reconstructed images.

    Args:
        model (torch.nn.Module): The trained VAE model.
        test_path (str): Path to the test dataset.
        transform (callable): Transformations to apply to the images.
        mini_batch_size (int): Batch size for testing.
        device (str): Device to run the testing on ('cuda' or 'cpu').
        model_save_name (str): Name of the model (for display purposes).
        image_size (int): Image size to use for the test dataset.

    Returns:
        dict: A dictionary containing the average test loss, PSNR, and SSIM.
    """
    # Create the test dataset
    dataset = WatermarkDataset(
        csv_file_path=f"{test_path}/metadata.csv",
        database_root_dir=test_path,
        subsample=1,
        image_size=image_size,
        transform=transform
    )

    # Create a DataLoader for the test dataset
    test_loader = DataLoader(
        dataset,
        batch_size=mini_batch_size,
        shuffle=False,
        num_workers=0,
        pin_memory=True
    )

    # Define the custom loss function
    loss_func = VAELoss()

    # Set the model to evaluation mode
    model.eval()

    # Disable gradient computation for testing
    with torch.no_grad():
        test_losses = []  # List to store test losses
        psnrs = []        # List to store PSNR values
        ssims = []        # List to store SSIM values

        # Iterate over the test dataset
        for input_images, target_images in test_loader:
            # Move data to the specified device
            input_images = input_images.to(device)
            target_images = target_images.to(device)

            # Forward pass through the model
            recon_images, mu, logvar, predicted_masks = model(input_images)

            # Calculate the loss
            loss = loss_func(recon_images, target_images, input_images, mu, logvar, predicted_masks, kl_factor=0.1)
            test_losses.append(loss.item())

            # Move tensors to CPU for metric calculation
            recon_images_np = recon_images.permute(0, 2, 3, 1).cpu().numpy()
            target_images_np = target_images.permute(0, 2, 3, 1).cpu().numpy()

            target_images_np = target_images_np / 255.0
            recon_images_np = recon_images_np / 255.0

            # Calculate PSNR and SSIM for each image in the batch
            for i in range(recon_images_np.shape[0]):
                # PSNR Calculation
                curr_mse = mse(target_images_np[i], recon_images_np[i])
                if curr_mse == 0:
                    cur_psnr = 1.0  # Completely identical images
                else:
                    cur_psnr = psnr(target_images_np[i], recon_images_np[i], data_range=1.0)
                    
                # SSIM Calculation
                min_side = min(target_images_np[i].shape[:2])
                win_size = min(min_side, 7)  # Dynamic win_size
                if win_size % 2 == 0:
                    win_size -= 1  # Ensure odd value for win_size
        
                data_range = target_images_np[i].max() - target_images_np[i].min()
                if data_range == 0:
                    data_range = 1e-5
                    
                cur_ssim = ssim(
                    target_images_np[i],
                    recon_images_np[i],
                    win_size=win_size,
                    data_range=data_range,
                    multichannel=True,
                    channel_axis=-1
                )

                psnrs.append(cur_psnr)
                ssims.append(cur_ssim)

        # Calculate average metrics
        avg_test_loss = np.mean(test_losses)
        avg_psnr = np.mean(psnrs)
        avg_ssim = np.mean(ssims)

        # Print results
        print(f"{model_save_name} Test Results:")
        print(f"\tAverage Test Loss: {avg_test_loss:.4f}")
        print(f"\tAverage PSNR: {avg_psnr:.2f} dB")
        print(f"\tAverage SSIM: {avg_ssim:.4f}")

        # Return the results as a dictionary
        return {
            "test_loss": avg_test_loss,
            "psnr": avg_psnr,
            "ssim": avg_ssim
        }

### Explanation of `initModelWeights` Function

The `initModelWeights` function is responsible for initializing the weights and biases of the layers in the VAE model, ensuring effective training by starting with well-initialized parameters. Proper weight initialization helps prevent issues such as vanishing or exploding gradients, which can hinder the learning process.

The function applies specific initialization techniques tailored to different layer types in the model. For convolutional layers (`nn.Conv2d` and `nn.ConvTranspose2d`), it uses Kaiming Normal (He Initialization) to initialize the weights. This method is particularly suited for layers with ReLU activations, as it maintains a consistent variance of activations across layers. The biases for these layers are set to 0 to ensure no initial offset in the computations.

For batch normalization layers (`nn.BatchNorm2d`), the scale parameter (weight) is initialized to 1, ensuring that the initial normalization preserves the scale of inputs. The shift parameter (bias) is set to 0, providing a neutral starting point for normalization adjustments.

In the case of linear layers (`nn.Linear`), the function employs Xavier Normal initialization for weights. This technique is effective for layers with linear or sigmoid activations, as it maintains a balanced variance of activations across layers. Biases for linear layers are also initialized to 0, ensuring consistency across different layer types.

By handling multiple layer types and applying the most suitable initialization techniques, the `initModelWeights` function provides flexible and robust weight initialization. This flexibility not only stabilizes gradients during training but also ensures that the model starts with parameters optimized for efficientlearning.

.


In [17]:
def initModelWeights(m):
    """
    Initialize the weights of the model layers.

    This function initializes the weights of convolutional, batch normalization,
    and linear layers in the VAE model using appropriate initialization techniques.
    Proper weight initialization helps in faster convergence and avoids issues like
    vanishing or exploding gradients.

    Args:
        m (torch.nn.Module): A module in the VAE model whose weights need to be initialized.

    Returns:
        None: Modifies the weights of the module in place.
    """
    # Check if the module is a 2D convolutional layer or a transposed convolutional layer
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        # Initialize weights using Kaiming Normal initialization (He initialization)
        nn.init.kaiming_normal_(m.weight, nonlinearity='relu')

        # Initialize biases to 0 if they exist
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

    # Check if the module is a 2D batch normalization layer
    elif isinstance(m, nn.BatchNorm2d):
        # Initialize the scale parameter (weight) to 1
        nn.init.constant_(m.weight, 1)

        # Initialize the shift parameter (bias) to 0
        nn.init.constant_(m.bias, 0)

    # Check if the module is a linear (fully connected) layer
    elif isinstance(m, nn.Linear):
        # Initialize weights using Xavier Normal initialization
        nn.init.xavier_normal_(m.weight)

        # Initialize biases to 0
        nn.init.constant_(m.bias, 0)

### Explanation of `startTraining` Function

The `startTraining` function orchestrates the complete training and testing workflow for the Variational Autoencoder (VAE) model. It utilizes the best hyperparameters obtained from a previous hyperparameter tuning process to train the model on a training dataset and evaluate its performance on a test dataset. This function ensures an efficient and systematic approach to model development and validation.

The function begins by extracting hyperparameters such as batch size, learning rate, number of epochs, KL divergence weight, and mask weight from the provided `best_config` dictionary. These parameters are crucial for controlling various aspects of training and regularization. 

The training dataset is then prepared by loading it from the specified path and applying necessary transformations, such as resizing and normalization, to standardize the data. The model is initialized with the specified latent dimension and image size, followed by applying a custom weight initialization function to set up the model with well-initialized weights, ensuring stable and efficient training.

For training, the function leverages the `trainModel` function, which employs K-Fold Cross Validation to train and validate the model across multiple folds. This ensures robust performance evaluation and reduces the risk of overfitting. Once training is complete, the model is tested on the test dataset using the `testModel` function, which computes key evaluation metrics such as test loss, Peak Signal-to-Noise Ratio (PSNR), and Structural Similarity Index Measure (SSIM). These metrics provide a comprehensive assessment of the model's reconstruction quality and overall performance.

Finally, the function performs resource cleanup by deleting the model instance and clearing the GPU cache (if applicable) to free up memory for subsequent tasks. The `startTraining` function integrates all essential steps in the model development pipeline, from training to testing, ensuring a seamless workflow for VAE-based watermark remoal tasks.
ettings.
erations.


In [19]:
def startTraining(best_config, transform, initModelWeights, image_size, num_folds, train_path, test_path, device, yolo_model_path, model_save_name):
    """
    Start the training and testing process for the VAE model.

    This function trains the Variational Autoencoder (VAE) model using the best hyperparameter
    configuration found during hyperparameter tuning. It initializes the model, applies
    weight initialization, trains it using K-Fold Cross Validation, and evaluates it
    on a test dataset.

    Args:
        best_config (dict): Dictionary containing the best hyperparameters from hyperparameter tuning.
        transform (callable): Transformations to apply to the dataset (e.g., resizing, normalization).
        initModelWeights (callable): Function to initialize the model weights.
        image_size (int): Size of the input images (height and width).
        num_folds (int): Number of folds for K-Fold Cross Validation.
        train_path (str): Path to the training dataset.
        test_path (str): Path to the test dataset.
        device (str): Device to run the training on ('cuda' or 'cpu').
        model_save_name (str): Name for saving the trained model and outputs.

    Returns:
        None
    """
    # Extract hyperparameters from the best configuration
    mini_batch_size = best_config["batch_size"]
    learning_rate = best_config["lr"]
    n_epochs = best_config["n_epochs"]
    latent_dim = best_config["latent_dim"]

    # Create the training dataset
    dataset = WatermarkDataset(
        csv_file_path=os.path.join(train_path, "metadata.csv"),
        database_root_dir=train_path,
        image_size=image_size,
        subsample=1,
        transform=transform
    )

    # Initialize the VAE model and move it to the specified device
    model = VAE(
        in_channels=4, 
        out_channels=3, 
        latent_dim=latent_dim, 
        input_size=image_size,
        yolo_model_path=yolo_model_path,
        device=device
    ).to(device)

    # Apply weight initialization to the model
    model.apply(initModelWeights)

    # Set up the optimizer (Adam) with the specified learning rate
    optim = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

    # Train the model using K-Fold Cross Validation
    trainModel(
        model=model,
        optim=optim,
        dataset=dataset,
        n_epochs=n_epochs,
        mini_batch_size=mini_batch_size,
        num_folds=num_folds,
        device=device,
        model_save_name=model_save_name
    )

    # Test the model on the test dataset
    testModel(
        model=model,
        test_path=test_path,
        transform=transform,
        mini_batch_size=mini_batch_size,
        device=device,
        model_save_name=model_save_name,
        image_size=image_size
    )

    # Free up memory by deleting the model and clearing the GPU cache (if applicable)
    del model
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

### Definitions

The provided code block sets up the device and training configuration for the model.

First, it checks if CUDA (GPU support) is available. If a GPU is detected, the name of the GPU is printed for confirmation; otherwise, a message is displayed indicating that no GPU is found, meaning the model will run on the CPU, which may result in slower processing. Based on the availability of CUDA, the `device` is set to either GPU or CPU, ensuring compatibility with the hardware.

The image size for the dataset is defined as 128 pixels, which determines the dimensions to which all input images will be resized. The number of folds for K-Fold Cross Validation is set to 5, allowing the model to be validated on multiple splits of the dataset, thereby improving its robustness and generalization.

To preprocess the images, a transformation pipeline is defined. The pipeline includes:
- **`Resize`**: Resizes the input images to a uniform size of 128x128 pixels, ensuring consistency in model input dimensions.
- **`ToTensor`**: Converts images into PyTorch tensors, a format compatible with the model.

These transformations are applied sequentially to ensure that the input data is properly formatted for training. Additionally, the transformations are logged to provide visibility into the preprocessing steps applied to the dataset, which aids in debugging and understanding the data preparaton process.
et.
aining.


In [21]:
# Check for CUDA availability and set the device
if torch.cuda.is_available():
    # Print the name of the GPU if available
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    # Fallback to CPU if no GPU is available
    print("No GPU found. The model will run on CPU, which may be slower.")

# Set the device to CUDA if available, otherwise default to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the image size for resizing input images in the dataset
image_size = 128

# Define the number of folds for K-Fold Cross Validation
num_folds = 5

# Define YOLO best model path
base_path = os.getcwd()  # Çalışma dizini
yolo_model_path = os.path.join(base_path, "yolo_best_model", "best.pt")

best_config = {
    "batch_size": 32,
    "lr": 1e-3,
    "n_epochs": 10,
    "latent_dim": 256
}

# Define transformations for preprocessing images
transform = transforms.Compose([
        transforms.Resize((image_size, image_size)),  # Resize images to the defined size
        transforms.ToTensor()                        # Convert images to PyTorch tensors
    ])

# Log the transformations being applied for debugging purposes
print(f"Applying the following transformations: {transform}")

Using GPU: NVIDIA GeForce RTX 3070 Ti Laptop GPU
Applying the following transformations: Compose(
    Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
)


### No Logo And Low Opacity Model: Hyperparameter Optimization, Training, and Testing

This code block outlines the comprehensive workflow for training the **No Logo Low Opacity VAE Model**, encompassing hyperparameter optimization, training, and testing.

The process begins by defining the paths for the training and test datasets. These paths are constructed by appending the respective dataset folder names to the current working directory. To ensure the integrity of the workflow, the script checks for the existence of these directories. If any of the specified paths are missing, an error is raised, preventing further execution.

Hyperparameter tuning is then initiated using the `startHyperparameterTuning` function. This function leverages the Hyperband scheduler with Ray Tune to explore various combinations of hyperparameters, including learning rate, batch size, KL divergence weight, latent dimension, and number of epochs. The key inputs to this function are:
- A transformation pipeline (`transform`) for preprocessing images.
- A weight initialization function (`initModelWeights`) to ensure proper model setup.
- The training dataset path (`train_path`) for data preparation.
- The image size (`image_size`) for resizing inputs.
- The computation device (`device`), which is either GPU or CPU, depending on availability.

The hyperparameter optimization process identifies the best configuration, which is then logged and used for model training. Once the optimal parameters are determined, the `startTraining` function is called to train the model using K-Fold Cross Validation. This robust training approach ensures that the model is validated across multiple dataset splits, enhancing its generalization and reducing the risk of overfitting.

During training, the model's performance is evaluated using metrics such as reconstruction loss, Peak Signal-to-Noise Ratio (PSNR), and Structural Similarity Index Measure (SSIM). These metrics provide insights into the quality of the reconstructed images and the overall effectiveness of the model.

After training, the model is tested on a separate test dataset to assess its performance on unseen data. The trained model and its associated results are saved with a descriptive name that includes details such as the image size and number of folds used in cross-validation. This ensures that the model can be easily identified and reused for future tasks.

Finally, the script confirms the successful completion of the hyperparameter optimization, training, and testing workflow, providing a comprehensive solution for watermark removal tasks wth the VAE model.
ning and testing.

### ***IMPORTANT NOTE***
During hyperparameter tuning with Ray Tune, some warnings related to performance bottlenecks may be encountered. While these warnings do not prevent the process from completing, they suggest potential areas for optimizing the tuning workflow to improe efficiecy and scalability.
is needed.


In [21]:
# Define the paths for the training and test datasets
train_path = os.path.join(os.getcwd(), "no_logo_and_low_opacity_watermark_dataset_train")
test_path = os.path.join(os.getcwd(), "no_logo_and_low_opacity_watermark_dataset_test")

# Ensure the dataset paths exist
if not os.path.exists(train_path):
    raise FileNotFoundError(f"Training dataset path not found: {train_path}")
if not os.path.exists(test_path):
    raise FileNotFoundError(f"Test dataset path not found: {test_path}")

# Start the hyperparameter tuning process
print(f"Starting hyperparameter tuning on {device}...")

# Define a descriptive model save name
model_save_name = f"VAE_NoLogoLowOpacity_{image_size}px_{num_folds}folds"

# Start the training process with the best configuration
print("Starting training with the best configuration...")
startTraining(
    best_config=best_config,
    transform=transform,
    initModelWeights=initModelWeights,
    image_size=image_size,
    num_folds=num_folds,
    train_path=train_path,
    test_path=test_path,
    device=device,
    yolo_model_path=yolo_model_path,
    model_save_name=model_save_name
)

print("Training and testing completed successfully.")

Starting hyperparameter tuning on cuda...
Starting training with the best configuration...
Training on device: cuda

VAE_NoLogoLowOpacity_128px_5folds Fold 1/5
	Epoch 1/10
		Training Loss: 0.042826
		Validation Loss: 0.015227
	Epoch 2/10
		Training Loss: 503.681023
		Validation Loss: 125.167428
	Epoch 3/10
		Training Loss: 406.553346
		Validation Loss: 192.531059
	Epoch 4/10
		Training Loss: 380.057882
		Validation Loss: 253.740759
	Epoch 5/10
		Training Loss: 523.751203
		Validation Loss: 2.647768
	Epoch 6/10
		Training Loss: 0.473824
		Validation Loss: 0.851245
	Epoch 7/10
		Training Loss: 0.213039
		Validation Loss: 0.617133
	Epoch 8/10
		Training Loss: 0.063886
		Validation Loss: 0.520381
	Epoch 9/10
		Training Loss: 0.058703
		Validation Loss: 0.548439
	Epoch 10/10
		Training Loss: 0.056998
		Validation Loss: 0.840445

VAE_NoLogoLowOpacity_128px_5folds Fold 2/5
	Epoch 1/10
		Training Loss: 0.016444
		Validation Loss: 0.012533
	Epoch 2/10
		Training Loss: 0.026374
		Validation Loss

### No Logo And High Opacity Model: Hyperparameter Optimization, Training And Test
Below block creates No Logo And High Opacity VAE Model

In [23]:
# Define the paths for the training and test datasets
train_path = os.getcwd() + "/no_logo_and_high_opacity_watermark_dataset_train"
test_path = os.getcwd() + "/no_logo_and_high_opacity_watermark_dataset_test"

# Ensure the dataset paths exist
if not os.path.exists(train_path):
    raise FileNotFoundError(f"Training dataset path not found: {train_path}")
if not os.path.exists(test_path):
    raise FileNotFoundError(f"Test dataset path not found: {test_path}")

# Start the hyperparameter tuning process
print(f"Starting hyperparameter tuning on {device}...")

# Define a descriptive model save name
model_save_name = f"VAE_NoLogoHighOpacity_{image_size}px_{num_folds}folds"

# Start the training process with the best configuration
print("Starting training with the best configuration...")
startTraining(
    best_config=best_config,
    transform=transform,
    initModelWeights=initModelWeights,
    image_size=image_size,
    num_folds=num_folds,
    train_path=train_path,
    test_path=test_path,
    device=device,
    yolo_model_path=yolo_model_path,
    model_save_name=model_save_name
)

print("Training and testing completed successfully.")

Starting hyperparameter tuning on cuda...
Starting training with the best configuration...
Training on device: cuda

VAE_NoLogoHighOpacity_128px_5folds Fold 1/5
	Epoch 1/10
		Training Loss: 0.066940
		Validation Loss: 0.037901
	Epoch 2/10
		Training Loss: 563.287345
		Validation Loss: 82.630151
	Epoch 3/10
		Training Loss: 302.788562
		Validation Loss: 188.976936
	Epoch 4/10
		Training Loss: 493.418668
		Validation Loss: 564.901319
	Epoch 5/10
		Training Loss: 550.126229
		Validation Loss: 194.641072
	Epoch 6/10
		Training Loss: 615.803178
		Validation Loss: 207.624019
	Epoch 7/10
		Training Loss: 690.096535
		Validation Loss: 250.069527
	Epoch 8/10
		Training Loss: 406.334134
		Validation Loss: 218.396873
	Epoch 9/10
		Training Loss: 377.052106
		Validation Loss: 245.135470
	Epoch 10/10
		Training Loss: 334.427801
		Validation Loss: 241.538370

VAE_NoLogoHighOpacity_128px_5folds Fold 2/5
	Epoch 1/10
		Training Loss: 0.040125
		Validation Loss: 0.037932
	Epoch 2/10
		Training Loss: 91.

### Logo And High Opacity Model: Hyperparameter Optimization, Training And Test
Below block creates Logo And High Opacity VAE Model

In [33]:
# Define the paths for the training and test datasets
train_path = os.getcwd() + "/logo_and_high_opacity_watermark_dataset_train"
test_path = os.getcwd() + "/logo_and_high_opacity_watermark_dataset_test"

# Ensure the dataset paths exist
if not os.path.exists(train_path):
    raise FileNotFoundError(f"Training dataset path not found: {train_path}")
if not os.path.exists(test_path):
    raise FileNotFoundError(f"Test dataset path not found: {test_path}")

# Start the hyperparameter tuning process
print(f"Starting hyperparameter tuning on {device}...")

# Define a descriptive model save name
model_save_name = f"VAE_LogoHighOpacity_{image_size}px_{num_folds}folds"

# Start the training process with the best configuration
print("Starting training with the best configuration...")
startTraining(
    best_config=best_config,
    transform=transform,
    initModelWeights=initModelWeights,
    image_size=image_size,
    num_folds=num_folds,
    train_path=train_path,
    test_path=test_path,
    device=device,
    yolo_model_path=yolo_model_path,   model_save_name=model_save_name
)

print("Training and testing completed successfully.")

Starting hyperparameter tuning on cuda...
Starting training with the best configuration...
Training on device: cuda

VAE_LogoHighOpacity_128px_5folds Fold 1/5
	Epoch 1/10
		Training Loss: 0.148569
		Validation Loss: 0.087056
	Epoch 2/10
		Training Loss: 1332.320631
		Validation Loss: 55.069904
	Epoch 3/10
		Training Loss: 315.469157
		Validation Loss: 252.460368
	Epoch 4/10
		Training Loss: 425.071306
		Validation Loss: 373.795685
	Epoch 5/10
		Training Loss: 261.383133
		Validation Loss: 4.742695
	Epoch 6/10
		Training Loss: 0.529162
		Validation Loss: 0.621556
	Epoch 7/10
		Training Loss: 0.207183
		Validation Loss: 0.264693
	Epoch 8/10
		Training Loss: 0.121647
		Validation Loss: 0.151179
	Epoch 9/10
		Training Loss: 0.118230
		Validation Loss: 0.650892
	Epoch 10/10
		Training Loss: 0.109631
		Validation Loss: 0.142096

VAE_LogoHighOpacity_128px_5folds Fold 2/5
	Epoch 1/10
		Training Loss: 0.075088
		Validation Loss: 0.068638
	Epoch 2/10
		Training Loss: 0.081743
		Validation Loss: 