In [10]:
import torch
import numpy as np
import time
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import random
import matplotlib.pyplot as plt
import os
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

# Set the base path for your project in Kaggle
base_path = '/kaggle/working/YourProjectFolder'  # Change this to your actual project folder path

class SimpleUNet(nn.Module):
    def __init__(self):
        super(SimpleUNet, self).__init__()
        # Encoder
        self.enc1 = self.conv_block(3, 64)
        self.enc2 = self.conv_block(64, 128)
        self.enc3 = self.conv_block(128, 256)
        self.enc4 = self.conv_block(256, 512)

        # Decoder
        self.dec1 = self.conv_block(512, 256)
        self.dec2 = self.conv_block(512, 128)
        self.dec3 = self.conv_block(256, 64)
        self.dec4 = nn.Conv2d(128, 3, kernel_size=3, padding=1)

    def conv_block(self, in_ch, out_ch):
        return nn.Sequential(
            nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x, t):
        # Encoder
        e1 = self.enc1(x)
        e2 = self.enc2(F.max_pool2d(e1, 2))
        e3 = self.enc3(F.max_pool2d(e2, 2))
        e4 = self.enc4(F.max_pool2d(e3, 2))

        # Decoder
        d1 = self.dec1(F.interpolate(e4, size=e3.shape[2:], mode='nearest'))
        d2 = self.dec2(torch.cat([d1, e3], dim=1))
        d3 = self.dec3(torch.cat([F.interpolate(d2, size=e2.shape[2:], mode='nearest'), e2], dim=1))
        output = self.dec4(torch.cat([F.interpolate(d3, size=e1.shape[2:], mode='nearest'), e1], dim=1))

        return output

class ImageDataset(Dataset):
    def __init__(self, folder_path, small_images_folder, transform=None):
        self.folder_path = folder_path
        self.image_files = [f for f in os.listdir(folder_path) if f.endswith(('jpg', 'png', 'jpeg'))]
        self.small_images = [Image.open(os.path.join(small_images_folder, f)) for f in os.listdir(small_images_folder) if f.endswith(('jpg', 'png', 'jpeg'))]
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.folder_path, self.image_files[idx])
        image = Image.open(img_path)

        # Resize image to ensure dimensions are divisible by 8
        w, h = image.size
        new_w, new_h = ((w + 7) // 8) * 8, ((h + 7) // 8) * 8
        image = image.resize((new_w, new_h), Image.LANCZOS)

        if self.transform:
            image = self.transform(image)

        return image

def place_image(main_img, small_img):
    # Convert numpy arrays to PIL Images
    main_img_pil = Image.fromarray((main_img * 255).astype(np.uint8))
    small_img_pil = small_img.convert("RGBA")  # Ensure small_img is in RGBA mode

    # Resize small image to be smaller if necessary
    small_img_pil.thumbnail((main_img_pil.width // 4, main_img_pil.height // 4), Image.LANCZOS)

    # Get dimensions
    main_width, main_height = main_img_pil.size
    small_width, small_height = small_img_pil.size

    # Generate random position
    x = random.randint(0, main_width - small_width)
    y = random.randint(0, main_height - small_height)

    # Adjust opacity
    datas = small_img_pil.getdata()
    new_data = []
    for item in datas:
        # Change all white (also shades of whites) pixels to transparent
        if item[0] in list(range(200, 256)):
            new_data.append((255, 255, 255, 0))
        else:
            new_data.append((item[0], item[1], item[2], 10))  # Adjust alpha value (0-255) for lower opacity
    small_img_pil.putdata(new_data)

    # Paste the image onto the main image
    main_img_pil.paste(small_img_pil, (x, y), small_img_pil)

    # Convert back to numpy array
    return np.array(main_img_pil) / 255.0

def add_image_noise(image, small_images, t, max_t):
    # Number of small images to paste based on timestep
    num_images = int((t / max_t) * 300)  # Adjust 300 as needed

    noisy_image = image.copy()
    for _ in range(num_images):
        small_img = random.choice(small_images)
        noisy_image = place_image(noisy_image, small_img)

    return torch.from_numpy(noisy_image).permute(2, 0, 1).unsqueeze(0).float()

def show_images(noisy_image, denoised_image, title):
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(noisy_image)
    plt.title('Noisy Image')
    plt.axis('off')

    denoised_image_path = os.path.join(base_path, f"{title}.PNG")
    denoised_image.save(denoised_image_path)

    plt.subplot(1, 2, 2)
    plt.imshow(denoised_image)
    plt.title('Denoised Image')
    plt.axis('off')

    plt.suptitle(title)
    plt.savefig(os.path.join(base_path, f"{title}_comparison.png"))
    plt.close()

def train_ddpm(model, optimizer, train_loader, val_loader, small_images, num_timesteps, num_epochs, device):
    model.to(device)
    best_val_loss = float('inf')
    total_start_time = time.time()
    model_save_path = os.path.join(base_path, 'best_model.pth')
    writer = SummaryWriter(os.path.join(base_path, 'logs'))

    for epoch in range(num_epochs):
        epoch_start_time = time.time()
        model.train()
        train_loss = 0

        for batch_idx, batch in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")):
            main_image = batch.to(device)

            for t in range(num_timesteps):
                optimizer.zero_grad()

                # Add image noise
                noisy_image = add_image_noise(main_image.cpu().squeeze(0).permute(1, 2, 0).numpy(), small_images, t, num_timesteps)
                noisy_image = noisy_image.to(device)

                # Predict denoised image
                predicted = model(noisy_image, torch.tensor([t]).float().to(device))

                # Calculate loss
                loss = F.mse_loss(predicted, main_image)

                loss.backward()
                optimizer.step()

                train_loss += loss.item()

            # Print progress for every batch
            print(f"Epoch {epoch+1}/{num_epochs}, Batch {batch_idx+1}/{len(train_loader)}, Loss: {loss.item():.4f}")

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                main_image = batch.to(device)

                for t in range(num_timesteps):
                    noisy_image = add_image_noise(main_image.cpu().squeeze(0).permute(1, 2, 0).numpy(), small_images, t, num_timesteps)
                    noisy_image = noisy_image.to(device)

                    predicted = model(noisy_image, torch.tensor([t]).float().to(device))
                    loss = F.mse_loss(predicted, main_image)
                    val_loss += loss.item()

        train_loss /= len(train_loader) * num_timesteps
        val_loss /= len(val_loader) * num_timesteps

        # Log metrics
        writer.add_scalar('Loss/train', train_loss, epoch)
        writer.add_scalar('Loss/val', val_loss, epoch)

        epoch_end_time = time.time()
        epoch_duration = epoch_end_time - epoch_start_time
        total_duration = epoch_end_time - total_start_time
        estimated_time_left = (num_epochs - epoch - 1) * epoch_duration

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
        print(f"Epoch duration: {epoch_duration:.2f} seconds")
        print(f"Total duration: {total_duration:.2f} seconds")
        print(f"Estimated time left: {estimated_time_left:.2f} seconds")

        # Save the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), model_save_path)

    print(f"Training completed in {time.time() - total_start_time:.2f} seconds")

    # Close the SummaryWriter
    writer.close()

def inference(model, device, image_folder, small_images, num_timesteps):
    model.to(device)
    model.eval()

    transform = transforms.Compose([
        transforms.ToTensor(),
    ])

    dataset = ImageDataset(image_folder, small_images, transform=transform)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

    with torch.no_grad():
        for i, main_image in enumerate(dataloader):
            main_image = main_image.to(device)

            noisy_image = main_image
            for t in reversed(range(num_timesteps)):
                noisy_image = add_image_noise(noisy_image.cpu().squeeze(0).permute(1, 2, 0).numpy(), small_images, t, num_timesteps)
                noisy_image = noisy_image.to(device)
                predicted = model(noisy_image, torch.tensor([t]).float().to(device))
                noisy_image = predicted

            # Convert tensors to PIL Images for saving
            noisy_image_pil = transforms.ToPILImage()(noisy_image.squeeze(0).cpu())
            denoised_image_pil = transforms.ToPILImage()(main_image.squeeze(0).cpu())

            show_images(noisy_image_pil, denoised_image_pil, f'inference_result_{i}')

# Set the number of timesteps and epochs
num_timesteps = 10  # Set based on your requirements
num_epochs = 5  # Set based on your requirements

# Set the folder paths
train_folder = '/kaggle/input/train-f1'
val_folder = '/kaggle/input/validation1'
small_images_folder = '/kaggle/input/small-images1'
output_dir = os.path.join(base_path, 'output')
# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Create datasets and dataloaders
train_dataset = ImageDataset(train_folder, small_images_folder, transform=transform)
val_dataset = ImageDataset(val_folder, small_images_folder, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

# Instantiate the model and optimizer
model = SimpleUNet()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Get the small images for adding noise
small_images = [Image.open(os.path.join(small_images_folder, f)) for f in os.listdir(small_images_folder) if f.endswith(('jpg', 'png', 'jpeg'))]


train_ddpm(model, optimizer, train_loader, val_loader, small_images, num_timesteps, num_epochs, device='cuda' if torch.cuda.is_available() else 'cpu')



Epoch 1/5:  20%|██        | 1/5 [01:43<06:52, 103.07s/it]

Epoch 1/5, Batch 1/5, Loss: 0.1768


Epoch 1/5:  40%|████      | 2/5 [03:27<05:11, 103.90s/it]

Epoch 1/5, Batch 2/5, Loss: 0.0143


Epoch 1/5:  60%|██████    | 3/5 [05:11<03:27, 103.78s/it]

Epoch 1/5, Batch 3/5, Loss: 0.0276


Epoch 1/5:  80%|████████  | 4/5 [06:55<01:43, 103.94s/it]

Epoch 1/5, Batch 4/5, Loss: 0.0103


Epoch 1/5: 100%|██████████| 5/5 [08:34<00:00, 102.83s/it]

Epoch 1/5, Batch 5/5, Loss: 0.0188





Epoch 1/5
Train Loss: 0.0749, Val Loss: 0.0331
Epoch duration: 1033.24 seconds
Total duration: 1033.24 seconds
Estimated time left: 4132.96 seconds


Epoch 2/5:  20%|██        | 1/5 [01:39<06:39, 99.96s/it]

Epoch 2/5, Batch 1/5, Loss: 0.0064


Epoch 2/5:  40%|████      | 2/5 [03:18<04:57, 99.01s/it]

Epoch 2/5, Batch 2/5, Loss: 0.0184


Epoch 2/5:  60%|██████    | 3/5 [05:01<03:22, 101.09s/it]

Epoch 2/5, Batch 3/5, Loss: 0.0063


Epoch 2/5:  80%|████████  | 4/5 [06:43<01:41, 101.41s/it]

Epoch 2/5, Batch 4/5, Loss: 0.0087


Epoch 2/5: 100%|██████████| 5/5 [08:21<00:00, 100.32s/it]

Epoch 2/5, Batch 5/5, Loss: 0.0141





Epoch 2/5
Train Loss: 0.0118, Val Loss: 0.0160
Epoch duration: 1013.43 seconds
Total duration: 2046.71 seconds
Estimated time left: 3040.28 seconds


Epoch 3/5:  20%|██        | 1/5 [01:38<06:32, 98.17s/it]

Epoch 3/5, Batch 1/5, Loss: 0.0044


Epoch 3/5:  40%|████      | 2/5 [03:23<05:06, 102.28s/it]

Epoch 3/5, Batch 2/5, Loss: 0.0125


Epoch 3/5:  60%|██████    | 3/5 [05:10<03:29, 104.66s/it]

Epoch 3/5, Batch 3/5, Loss: 0.0055


Epoch 3/5:  80%|████████  | 4/5 [06:59<01:46, 106.38s/it]

Epoch 3/5, Batch 4/5, Loss: 0.0119


Epoch 3/5: 100%|██████████| 5/5 [08:48<00:00, 105.60s/it]

Epoch 3/5, Batch 5/5, Loss: 0.0162





Epoch 3/5
Train Loss: 0.0082, Val Loss: 0.0143
Epoch duration: 1056.48 seconds
Total duration: 3103.27 seconds
Estimated time left: 2112.97 seconds


Epoch 4/5:  20%|██        | 1/5 [01:45<07:02, 105.51s/it]

Epoch 4/5, Batch 1/5, Loss: 0.0061


Epoch 4/5:  40%|████      | 2/5 [03:32<05:19, 106.67s/it]

Epoch 4/5, Batch 2/5, Loss: 0.0114


Epoch 4/5:  60%|██████    | 3/5 [05:21<03:34, 107.46s/it]

Epoch 4/5, Batch 3/5, Loss: 0.0078


Epoch 4/5:  80%|████████  | 4/5 [07:00<01:44, 104.27s/it]

Epoch 4/5, Batch 4/5, Loss: 0.0136


Epoch 4/5: 100%|██████████| 5/5 [08:44<00:00, 104.82s/it]

Epoch 4/5, Batch 5/5, Loss: 0.0044





Epoch 4/5
Train Loss: 0.0070, Val Loss: 0.0125
Epoch duration: 1052.65 seconds
Total duration: 4155.99 seconds
Estimated time left: 1052.65 seconds


Epoch 5/5:  20%|██        | 1/5 [01:49<07:19, 109.79s/it]

Epoch 5/5, Batch 1/5, Loss: 0.0126


Epoch 5/5:  40%|████      | 2/5 [03:32<05:17, 105.90s/it]

Epoch 5/5, Batch 2/5, Loss: 0.0068


Epoch 5/5:  60%|██████    | 3/5 [05:18<03:31, 105.51s/it]

Epoch 5/5, Batch 3/5, Loss: 0.0038


Epoch 5/5:  80%|████████  | 4/5 [06:56<01:42, 102.84s/it]

Epoch 5/5, Batch 4/5, Loss: 0.0053


Epoch 5/5: 100%|██████████| 5/5 [08:34<00:00, 102.89s/it]

Epoch 5/5, Batch 5/5, Loss: 0.0134





Epoch 5/5
Train Loss: 0.0059, Val Loss: 0.0118
Epoch duration: 1010.27 seconds
Total duration: 5166.32 seconds
Estimated time left: 0.00 seconds
Training completed in 5166.38 seconds


Code For Inference: 

In [9]:
import torch
import numpy as np
import time
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import random
import matplotlib.pyplot as plt
import os
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter


base_path = '/kaggle/working'
def show_images(noisy_image, denoised_image, title):
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(noisy_image)
    plt.title('Noisy Image')
    plt.axis('off')

    denoised_image_path = os.path.join(base_path, f"{title}.PNG")
    denoised_image.save(denoised_image_path)

    plt.subplot(1, 2, 2)
    plt.imshow(denoised_image)
    plt.title('Denoised Image')
    plt.axis('off')

    plt.suptitle(title)
    plt.savefig(os.path.join(base_path, f"{title}_comparison.png"))
    plt.close()



def add_image_noise(image, small_images, t, max_t):
    # Number of small images to paste based on timestep
    num_images = int((t / max_t) * 300)  # Adjust 300 as needed

    noisy_image = image.copy()
    for _ in range(num_images):
        small_img = random.choice(small_images)
        noisy_image = place_image(noisy_image, small_img)

    return torch.from_numpy(noisy_image).permute(2, 0, 1).unsqueeze(0).float()



class ImageDataset(Dataset):
    def __init__(self, folder_path, small_images_folder, transform=None):
        self.folder_path = folder_path
        self.image_files = [f for f in os.listdir(folder_path) if f.endswith(('jpg', 'png', 'jpeg'))]
        self.small_images = [Image.open(os.path.join(small_images_folder, f)) for f in os.listdir(small_images_folder) if f.endswith(('jpg', 'png', 'jpeg'))]
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.folder_path, self.image_files[idx])
        image = Image.open(img_path)

        # Resize image to ensure dimensions are divisible by 8
        w, h = image.size
        new_w, new_h = ((w + 7) // 8) * 8, ((h + 7) // 8) * 8
        image = image.resize((new_w, new_h), Image.LANCZOS)

        if self.transform:
            image = self.transform(image)

        return image

    
def place_image(main_img, small_img):
    # Convert numpy arrays to PIL Images
    main_img_pil = Image.fromarray((main_img * 255).astype(np.uint8))
    small_img_pil = small_img.convert("RGBA")  # Ensure small_img is in RGBA mode

    # Resize small image to be smaller if necessary
    small_img_pil.thumbnail((main_img_pil.width // 4, main_img_pil.height // 4), Image.LANCZOS)

    # Get dimensions
    main_width, main_height = main_img_pil.size
    small_width, small_height = small_img_pil.size

    # Generate random position
    x = random.randint(0, main_width - small_width)
    y = random.randint(0, main_height - small_height)

    # Adjust opacity
    datas = small_img_pil.getdata()
    new_data = []
    for item in datas:
        # Change all white (also shades of whites) pixels to transparent
        if item[0] in list(range(200, 256)):
            new_data.append((255, 255, 255, 0))
        else:
            new_data.append((item[0], item[1], item[2], 10))  # Adjust alpha value (0-255) for lower opacity
    small_img_pil.putdata(new_data)

    # Paste the image onto the main image
    main_img_pil.paste(small_img_pil, (x, y), small_img_pil)

    # Convert back to numpy array
    return np.array(main_img_pil) / 255.0

def inference(model, device, image_folder, small_images_folder, num_timesteps):
    model.to(device)
    model.eval()

    transform = transforms.Compose([
        transforms.ToTensor(),
    ])

    dataset = ImageDataset(image_folder, small_images_folder, transform=transform)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

    # Load small images here
    small_images = [Image.open(os.path.join(small_images_folder, f)) for f in os.listdir(small_images_folder) if f.endswith(('jpg', 'png', 'jpeg'))]

    with torch.no_grad():
        for i, main_image in enumerate(dataloader):
            main_image = main_image.to(device)

            noisy_image = main_image
            for t in reversed(range(num_timesteps)):
                noisy_image = add_image_noise(noisy_image.cpu().squeeze(0).permute(1, 2, 0).numpy(), small_images, t, num_timesteps)
                noisy_image = noisy_image.to(device)
                predicted = model(noisy_image, torch.tensor([t]).float().to(device))
                noisy_image = predicted

            # Convert tensors to PIL Images for saving
            noisy_image_pil = transforms.ToPILImage()(noisy_image.squeeze(0).cpu())
            denoised_image_pil = transforms.ToPILImage()(main_image.squeeze(0).cpu())

            show_images(noisy_image_pil, denoised_image_pil, f'inference_result_{i}')


class SimpleUNet(nn.Module):
    def __init__(self):
        super(SimpleUNet, self).__init__()
        # Encoder
        self.enc1 = self.conv_block(3, 64)
        self.enc2 = self.conv_block(64, 128)
        self.enc3 = self.conv_block(128, 256)
        self.enc4 = self.conv_block(256, 512)

        # Decoder
        self.dec1 = self.conv_block(512, 256)
        self.dec2 = self.conv_block(512, 128)
        self.dec3 = self.conv_block(256, 64)
        self.dec4 = nn.Conv2d(128, 3, kernel_size=3, padding=1)

    def conv_block(self, in_ch, out_ch):
        return nn.Sequential(
            nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x, t):
        # Encoder
        e1 = self.enc1(x)
        e2 = self.enc2(F.max_pool2d(e1, 2))
        e3 = self.enc3(F.max_pool2d(e2, 2))
        e4 = self.enc4(F.max_pool2d(e3, 2))

        # Decoder
        d1 = self.dec1(F.interpolate(e4, size=e3.shape[2:], mode='nearest'))
        d2 = self.dec2(torch.cat([d1, e3], dim=1))
        d3 = self.dec3(torch.cat([F.interpolate(d2, size=e2.shape[2:], mode='nearest'), e2], dim=1))
        output = self.dec4(torch.cat([F.interpolate(d3, size=e1.shape[2:], mode='nearest'), e1], dim=1))

        return output




inference_folder = '/kaggle/input/inference'  # Replace with your inference images folder
small_images_folder = '/kaggle/input/small-images1'
model_path = '/kaggle/input/model-best/best_model (2).pth'

model = SimpleUNet()
model.load_state_dict(torch.load(model_path))

# Set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Get small images for adding noise
small_images = [Image.open(os.path.join(small_images_folder, f)) for f in os.listdir(small_images_folder) if f.endswith(('jpg', 'png', 'jpeg'))]

# Set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

num_timesteps = 10  # Set based on your requirements
num_epochs = 5  # Set based on your requirements

# Run inference
inference(model, device, inference_folder, small_images_folder, num_timesteps)
