<a href="https://colab.research.google.com/github/IYERVKARTHIK/PRODIGY_GA_04/blob/main/PRODIGY_GA_04.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Image-to-Image Translation with cGAN

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
pip install torch torchvision matplotlib

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.utils import save_image
import os

In [7]:
class UNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, down=True, act='relu', use_dropout=False):
        super().__init__()
        self.down = down
        self.act = nn.ReLU() if act == 'relu' else nn.LeakyReLU(0.2)
        self.conv = nn.Conv2d(in_channels, out_channels, 4, 2, 1, bias=False) if down \
            else nn.ConvTranspose2d(in_channels, out_channels, 4, 2, 1, bias=False)
        self.norm = nn.BatchNorm2d(out_channels)
        self.dropout = nn.Dropout(0.5) if use_dropout else nn.Identity()

    def forward(self, x):
        x = self.conv(x)
        x = self.norm(x)
        x = self.dropout(x)
        return self.act(x)

class GeneratorUNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=3):
        super().__init__()

        self.down1 = UNetBlock(in_channels, 64, act='lrelu', use_dropout=False)
        self.down2 = UNetBlock(64, 128, act='lrelu')
        self.down3 = UNetBlock(128, 256, act='lrelu')
        self.down4 = UNetBlock(256, 512, act='lrelu')
        self.down5 = UNetBlock(512, 512, act='lrelu')
        self.down6 = UNetBlock(512, 512, act='lrelu')
        self.down7 = UNetBlock(512, 512, act='lrelu')
        self.bottleneck = UNetBlock(512, 512, act='relu')

        self.up1 = UNetBlock(512, 512, down=False, use_dropout=True)
        self.up2 = UNetBlock(1024, 512, down=False, use_dropout=True)
        self.up3 = UNetBlock(1024, 512, down=False, use_dropout=True)
        self.up4 = UNetBlock(1024, 512, down=False)
        self.up5 = UNetBlock(1024, 256, down=False)
        self.up6 = UNetBlock(512, 128, down=False)
        self.up7 = UNetBlock(256, 64, down=False)

        self.final = nn.Sequential(
            nn.ConvTranspose2d(128, out_channels, 4, 2, 1),
            nn.Tanh()
        )

    def forward(self, x):
        d1 = self.down1(x)
        d2 = self.down2(d1)
        d3 = self.down3(d2)
        d4 = self.down4(d3)
        d5 = self.down5(d4)
        d6 = self.down6(d5)
        d7 = self.down7(d6)
        bn = self.bottleneck(d7)

        u1 = self.up1(bn)
        u2 = self.up2(torch.cat([u1, d7], 1))
        u3 = self.up3(torch.cat([u2, d6], 1))
        u4 = self.up4(torch.cat([u3, d5], 1))
        u5 = self.up5(torch.cat([u4, d4], 1))
        u6 = self.up6(torch.cat([u5, d3], 1))
        u7 = self.up7(torch.cat([u6, d2], 1))

        return self.final(torch.cat([u7, d1], 1))

In [8]:
class Discriminator(nn.Module):
    def __init__(self, in_channels=3):
        super().__init__()

        def block(in_c, out_c, norm=True):
            layers = [nn.Conv2d(in_c, out_c, 4, 2, 1)]
            if norm:
                layers.append(nn.BatchNorm2d(out_c))
            layers.append(nn.LeakyReLU(0.2))
            return layers

        self.model = nn.Sequential(
            *block(in_channels * 2, 64, norm=False),
            *block(64, 128),
            *block(128, 256),
            *block(256, 512),
            nn.Conv2d(512, 1, 4, 1, 1)  # Patch output
        )

    def forward(self, x, y):
        # x: input image, y: target or generated image
        return self.model(torch.cat([x, y], dim=1))


In [9]:
criterion_GAN = nn.MSELoss()
criterion_L1 = nn.L1Loss()

In [10]:
def train(dataloader, generator, discriminator, g_optimizer, d_optimizer, device):
    generator.train()
    discriminator.train()

    for epoch in range(20):
        for i, (input_image, target_image) in enumerate(dataloader):
            input_image = input_image.to(device)
            target_image = target_image.to(device)

            # -----------------
            # Train Discriminator
            # -----------------
            fake_image = generator(input_image)
            real_pred = discriminator(input_image, target_image)
            fake_pred = discriminator(input_image, fake_image.detach())

            real_loss = criterion_GAN(real_pred, torch.ones_like(real_pred))
            fake_loss = criterion_GAN(fake_pred, torch.zeros_like(fake_pred))
            d_loss = (real_loss + fake_loss) * 0.5

            d_optimizer.zero_grad()
            d_loss.backward()
            d_optimizer.step()

            # -----------------
            # Train Generator
            # -----------------
            fake_pred = discriminator(input_image, fake_image)
            gan_loss = criterion_GAN(fake_pred, torch.ones_like(fake_pred))
            l1 = criterion_L1(fake_image, target_image) * 100

            g_loss = gan_loss + l1

            g_optimizer.zero_grad()
            g_loss.backward()
            g_optimizer.step()

    print(f"Epoch [{epoch}] | D Loss: {d_loss.item():.4f} | G Loss: {g_loss.item():.4f}")
    save_image(fake_image[:4], f"outputs/fake_{epoch}.png", nrow=2, normalize=True)

In [11]:
from PIL import Image
from torch.utils.data import Dataset
import os

class PairedImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.input_dir = os.path.join(root_dir, "input")
        self.target_dir = os.path.join(root_dir, "target")
        self.filenames = sorted(os.listdir(self.input_dir))
        self.transform = transform

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        input_path = os.path.join(self.input_dir, self.filenames[idx])
        target_path = os.path.join(self.target_dir, self.filenames[idx])

        input_image = Image.open(input_path).convert("RGB")
        target_image = Image.open(target_path).convert("RGB")

        if self.transform:
            input_image = self.transform(input_image)
            target_image = self.transform(target_image)

        return input_image, target_image


In [24]:
from torchvision import transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset = PairedImageDataset("/content/drive/MyDrive/Prodigy_Project/Data_set/", transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [30]:
device = "cuda" if torch.cuda.is_available() else "cpu"
G = GeneratorUNet().to(device)
D = Discriminator().to(device)

g_optimizer = optim.Adam(G.parameters(), lr=2e-4, betas=(0.5, 0.999))
d_optimizer = optim.Adam(D.parameters(), lr=2e-4, betas=(0.5, 0.999))

os.makedirs("outputs", exist_ok=True)
train(dataloader, G, D, g_optimizer, d_optimizer, device)

Epoch [19] | D Loss: 0.1269 | G Loss: 28.1047
