In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import os
from PIL import Image
from torchvision.transforms import ToTensor
import numpy as np

In [3]:
# Define the forward diffusion process with proper broadcasting
def forward_diffusion(x0, noise, t, T):
    """
    Applies the forward diffusion process with proper broadcasting.
    
    Args:
    - x0: Original image tensor (batch_size, channels, height, width)
    - noise: Gaussian noise tensor with the same shape as x0
    - t: Current time step in the diffusion process (1D tensor of size [batch_size])
    - T: Total number of time steps
    
    Returns:
    - xt: Noised image tensor at time step t
    """
    
    alpha = 1 - (t.view(-1, 1, 1, 1) / T)  # broadcasting
    return alpha * x0 + (1 - alpha) * noise

In [4]:
# UNet model for reverse diffusion (denoising)
class SimpleUNet(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(SimpleUNet, self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU()
        )
        
        self.decoder = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, out_channels, kernel_size=3, padding=1),
            nn.Sigmoid()  
        )
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [5]:
# Hyperparameters
T = 1000  # Number of time steps
batch_size = 32
learning_rate = 1e-4
epochs = 15
img_size = (128, 128) 

In [6]:
# Define dataset loading function for underwater images
class UnderwaterDataset(torch.utils.data.Dataset):
    def __init__(self, raw_folder, reference_folder, transform=None):
        self.raw_images = [os.path.join(raw_folder, img) for img in os.listdir(raw_folder)]
        self.reference_images = [os.path.join(reference_folder, img) for img in os.listdir(reference_folder)]
        self.transform = transform

    def __len__(self):
        return len(self.raw_images)
    
    def __getitem__(self, idx):
        raw_image = Image.open(self.raw_images[idx]).convert("RGB")
        reference_image = Image.open(self.reference_images[idx]).convert("RGB")
        
        if self.transform:
            raw_image = self.transform(raw_image)
            reference_image = self.transform(reference_image)
        
        return raw_image, reference_image

In [7]:
# Define transformations for underwater images
transform = transforms.Compose([
    transforms.Resize(img_size),
    transforms.ToTensor()
])

In [8]:
# Paths to training and testing dataset folders
train_raw_folder = '/kaggle/input/wec-task-2/Train/Raw'
train_reference_folder = '/kaggle/input/wec-task-2/Train/Reference'
test_raw_folder = '/kaggle/input/wec-task-2/Test/Raw'
test_reference_folder = '/kaggle/input/wec-task-2/Test/Reference'

In [9]:
# Load training and testing datasets
train_dataset = UnderwaterDataset(train_raw_folder, train_reference_folder, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [10]:
# Model, optimizer, loss
model = SimpleUNet(in_channels=3, out_channels=3)  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

In [11]:
# Training loop
for epoch in range(epochs):
    model.train()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        
        # Apply forward diffusion (add noise)
        noise = torch.randn_like(data)
        t = torch.randint(0, T, (data.shape[0],)).to(device)  # Random time steps
        xt = forward_diffusion(data, noise, t, T)
        
        # Denoising step: predict original image from noisy image
        reconstructed = model(xt)
        loss = criterion(reconstructed, target)
        
        loss.backward()
        optimizer.step()
    
    print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}')


Epoch 1/15, Loss: 0.06085292622447014
Epoch 2/15, Loss: 0.05511225387454033
Epoch 3/15, Loss: 0.04923778027296066
Epoch 4/15, Loss: 0.04560481384396553
Epoch 5/15, Loss: 0.039505694061517715
Epoch 6/15, Loss: 0.04550008103251457
Epoch 7/15, Loss: 0.045904383063316345
Epoch 8/15, Loss: 0.04249816760420799
Epoch 9/15, Loss: 0.035655662417411804
Epoch 10/15, Loss: 0.047562986612319946
Epoch 11/15, Loss: 0.040187086910009384
Epoch 12/15, Loss: 0.03946516662836075
Epoch 13/15, Loss: 0.03942451253533363
Epoch 14/15, Loss: 0.04312475770711899
Epoch 15/15, Loss: 0.03772418200969696


In [12]:
# Testing and generating enhanced images
test_dataset = UnderwaterDataset(test_raw_folder, test_reference_folder, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [13]:
from skimage.metrics import peak_signal_noise_ratio as psnr
from skimage.metrics import structural_similarity as ssim

# Function to compute MSE
def compute_mse(pred, target):
    return F.mse_loss(pred, target).item()

# Function to compute PSNR and SSIM
def compute_psnr_ssim(pred, target):
    # Convert from PyTorch tensors to NumPy arrays for skimage metrics
    pred = pred.cpu().numpy().transpose(1, 2, 0)  # Convert to HWC
    target = target.cpu().numpy().transpose(1, 2, 0)
    
    # Compute PSNR
    psnr_value = psnr(target, pred, data_range=1.0)
    
    # Compute SSIM (for RGB images, we set multichannel=True)
    ssim_value = ssim(target, pred, multichannel=True, data_range=1.0, win_size=3)
    
    return psnr_value, ssim_value

In [14]:
from PIL import Image
import os
import numpy as np

# Directory to save the enhanced images
output_dir = 'enhanced_images/'
os.makedirs(output_dir, exist_ok=True)

# Save function to convert tensor to image and save
def save_image(tensor, file_path):
    """
    Convert a PyTorch tensor to an image and save it to disk.
    
    Args:
    - tensor: PyTorch tensor of shape (C, H, W)
    - file_path: File path to save the image
    """
    # Convert to numpy array and denormalize (if needed)
    image_np = tensor.cpu().numpy().transpose(1, 2, 0)  # Convert to HWC
    image_np = (image_np * 255).astype(np.uint8)  # Scale from [0, 1] to [0, 255]
    
    # Convert to a PIL image and save
    image_pil = Image.fromarray(image_np)
    image_pil.save(file_path)

# Testing and saving enhanced images
model.eval()
psnr_scores, mse_scores, ssim_scores = [], [], []

with torch.no_grad():
    for batch_idx, (raw_images, reference_images) in enumerate(test_loader):
        raw_images, reference_images = raw_images.to(device), reference_images.to(device)
        
        # Generate enhanced images by denoising
        enhanced_images = model(raw_images)
        
        # Save each enhanced image
        for i in range(enhanced_images.shape[0]):
            enhanced_image = enhanced_images[i]
            reference_image = reference_images[i]
            
            # Save the enhanced image to disk
            save_path = os.path.join(output_dir, f"enhanced_image_{batch_idx}_{i}.png")
            save_image(enhanced_image, save_path)
            
            # Compute MSE
            mse_value = compute_mse(enhanced_image, reference_image)
            mse_scores.append(mse_value)
            
            # Compute PSNR and SSIM
            psnr_value, ssim_value = compute_psnr_ssim(enhanced_image, reference_image)
            psnr_scores.append(psnr_value)
            ssim_scores.append(ssim_value)

# Calculate the average scores
avg_psnr = np.mean(psnr_scores)
avg_mse = np.mean(mse_scores)
avg_ssim = np.mean(ssim_scores)

print(f"Average PSNR: {avg_psnr}")
print(f"Average MSE: {avg_mse}")
print(f"Average SSIM: {avg_ssim}")

Average PSNR: 16.30504430366424
Average MSE: 0.02973171222072683
Average SSIM: 0.4945477907704117
