<th><Name></th>

<th><Name></th>

## <font color = green> Import Libraries</font>

In [None]:
#import numpy as np
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import random
from torch.utils.data import DataLoader

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
!pip install torchsummary
from torchsummary import summary

## <font color = green> Set Paths</font>

In [None]:
home = '/kaggle/input/landscape-image-colorization/landscape Images/'
total_images = len(os.listdir(home+'color'))

## <font color = green> Set up Training and Test Indices </font>
> 80% images are used for training

In [None]:
random_indices = random.sample(list(range(total_images)),total_images)
train_nums = round(total_images*0.8)
train_indices = random_indices[:train_nums]
test_indices = random_indices[train_nums:]
len(train_indices), len(test_indices)

## <font color = green> Encoder Dataset </font>
> Class takes input of training and test indices and creates datasets accordingly
> Transformation of images are done inside dataset class only

### <font color = orange> Steps </font>
 - read image
 - repeat grayscale image channel 3 times to create a prototype RGB image. Later the two other dimensions will be thrown away. This step is just to cconvert RGB image to LAB color space
 - divide RGB image by 255 to make values between (0,1)
 - Output of rgb2lab() provides L in range of (0,100), a in range of (-128(Green), 127(Red)) and b in range of (-128(Blue), 127(Yellow)). Hence tensor([0,128,128]) is added to each of the dimension followed by normalizing by tensor([100,255,255]).
 - Take L channel from GrayScale image to predict a,b Channel from Color Image


In [None]:
# Import necessary libraries
import os                           # For handling file and directory paths
import pandas as pd                 # For working with tabular data (not used in this code)
from torchvision.io import read_image  # For reading image files as PyTorch tensors
from torch.utils.data import Dataset   # Base class for creating custom datasets in PyTorch
import torch.nn.functional as nnFunctions     # For neural network functions like interpolation
import torch                        # PyTorch library for tensors and operations
from skimage.color import rgb2lab, lab2rgb  # For converting between RGB and LAB color spaces

# Define a custom dataset class for handling grayscale and color images
class EncoderDataset(Dataset):
    def __init__(self, indices, img_dir, transform=None):
        """
        Initialize the dataset with image indices, directory paths, and optional transforms.
        """
        self.img_dir = img_dir                      # Base directory containing images
        self.transform = transform                  # Transformations to apply to images (if any)
        self.img_indices = indices                  # List of image indices
        self.gray_path = img_dir + 'gray/'          # Directory containing grayscale images
        self.color_path = img_dir + 'color/'        # Directory containing color images
    
    def __len__(self):
        """
        Return the total number of images in the dataset.
        """
        return len(self.img_indices)                # Return the length of the indices list
        
    def __getitem__(self, idx):
        """
        Retrieve a single item (image and label) by index.
        """
        img_name = str(idx) + '.jpg'                # Construct the image filename
        
        # Read the grayscale image
        image = read_image(self.gray_path + img_name)  # Load grayscale image as a tensor ##read_image this function imported from torch
        image = image.unsqueeze(0)                  # Add a batch dimension (for interpolate function below)
        image = nnFunctions.interpolate(image, (160, 160))    # Resize the image to 160x160
        image = image.squeeze(0)                    # Remove the batch dimension
        image = image.repeat(3, 1, 1)  # Duplicate the grayscale channel directly to create 3 channels ## to make pseudo-RGB

        random_number = random.random()
         
        if (random_number > 0.5) :
            if self.transform:
              image = self.transform(image)
   
        # Read the color image
        label = read_image(self.color_path + img_name)  # Load color image as a tensor
        label = label.unsqueeze(0)                  # Add a batch dimension (for interpolation)
        label = nnFunctions.interpolate(label, (160, 160))    # Resize the image to 160x160
        label = label.squeeze(0)                    # Remove the batch dimension
        
        
        # Convert images to LAB color space and normalize
        #####---- we rearrange and change scale to be compatible with 'rgb2lab' function ----#####
        #• We permute to [Height, Width, Channels] and divide by 255 so pixels range from 0 to 1.
        #• rgb2lab(...) converts from RGB color space to LAB.
        #• Then we wrap the result in torch.tensor(...) to convert it back to a PyTorch tensor.
        
        
        image = torch.tensor( rgb2lab(image.permute(1, 2, 0) / 255) )  # Convert grayscale to LAB
        label = torch.tensor( rgb2lab(label.permute(1, 2, 0) / 255) )  # Convert color to LAB
        #print(f'image before normalization = {image}')
        
        # Normalize LAB values to be in the range [0, 1]
        image = (image + torch.tensor([0, 128, 128])) / torch.tensor([100, 255, 255])
        label = (label + torch.tensor([0, 128, 128])) / torch.tensor([100, 255, 255])
        #print(f'image after normalization = {image}')
        
        # Rearrange dimensions back to [Channels, Height, Width]
        image = image.permute(2, 0, 1)
        label = label.permute(2, 0, 1)
        
        # Extract the L channel (grayscale) from the input image
        image = image[:1, :, :]                     # Use only the L channel as input
        
        # Extract the a and b channels (color information) from the label
        label = label[1:, :, :]                     # Use only the a and b channels as the target
        
        return image, label                         # Return the processed input (L) and label (a, b)


In [None]:
import torch
from torchvision.transforms import transforms

train_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),  # horizontal flipping
    transforms.RandomRotation(degrees=15),  # Random rotation by +/- 15 degrees
    #transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Adjust color properties
    transforms.ToTensor()]
)
'''
test_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()]
)
'''


In [None]:
train_dataset = EncoderDataset(indices = train_indices,img_dir = home, transform = None)
test_dataset = EncoderDataset(indices = test_indices,img_dir = home, transform = None)
train_dataloader = DataLoader(train_dataset,batch_size=16,shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

## <font color = green> Create Datasets </font>

## <font color = green> Sample Image </font>
    - Remember we have 1 color channel input for image and 2 color channel output of label

In [None]:
img,label = next(iter(train_dataloader))
sample_image,sample_label = img[0], label[0]
print(sample_image.shape, sample_label.shape)

In [None]:
img,label = next(iter(train_dataloader))
img[0].shape

## <font color = green> Visualize Image </font>
    Grayscale and Color images produce same L (intensity) channel pixel values. Here first training image is plotted which is grayscale image. Followed by, from Label(Color image) 'a' and 'b' channels are plotted. Finally Grayscale image L channel and Color image 'a', 'b' channels are concatenated followed by lab2rgb() providing RGB channel output (plotted at the last)

In [None]:
random_number = random.randint(1, 16)
sample_image,sample_label = img[random_number], label[random_number]

In [None]:

random_number = random.randint(0, 15)
sample_image,sample_label = img[random_number], label[random_number]

fig = plt.figure(figsize=(16,16))
plt.subplot(441)
plt.imshow(sample_image.permute(1,2,0),cmap='gray')
plt.title('Image - Gray Scale "L" Channel')
plt.subplot(442)
plt.imshow(sample_label.permute(1,2,0)[:,:,0],cmap='Greens')
plt.title('Lab Image - "a" Channel')

plt.subplot(443)
plt.imshow(sample_label.permute(1,2,0)[:,:,1],cmap='Blues')
plt.title('Lab Image - "b" Channel')

plt.subplot(444)
color_image = torch.cat((sample_image,sample_label),dim=0).permute(1,2,0)
color_image = color_image * torch.tensor([100,255,255]) -torch.tensor([0,128,128])
color_image = lab2rgb(color_image)
plt.imshow(color_image)
plt.title('RGB Image')
plt.show()

## <font color = green> Convolution Autoencoder </font>
    - Network takes 1 channel input
    - No maxpool layer is added at the encoder step
    - Transpose Convolution takes place in decoder step
    - Decoder outputs are concatenated with encoder output of the same layer
    - Dropout layer added in decoder layer only
    - Final layer CNN is the converging layer which outputs 2 channels

In [None]:
# Import PyTorch modules for defining neural network layers and functions
import torch.nn as nn            # For defining layers like Conv2d, ConvTranspose2d, etc.
import torch.nn.functional as F  # For activation functions like ReLU

# Define the Convolutional Autoencoder class
class ConvAutoencoder(nn.Module):
    def __init__(self):  # Constructor to define the layers
        super(ConvAutoencoder, self).__init__()  # Call the parent class (nn.Module) constructor
        
        ## Encoder layers ##
        # First convolution layer: input channel = 1 (grayscale), output channels = 64
        # Kernel size = 3x3, stride = 1, padding = 1 (output size is preserved)
        self.conv1 = nn.Conv2d(1, 64, 3, stride=1, padding=1)
        
        # Second convolution layer: input channels = 64, output channels = 64
        # Stride = 2 reduces the spatial dimensions by half
        self.conv2 = nn.Conv2d(64, 64, 3, stride=2, padding=1)
        
        # Third convolution layer: input channels = 64, output channels = 128
        # Stride = 2 further reduces spatial dimensions by half
        self.conv3 = nn.Conv2d(64, 128, 3, stride=2, padding=1)
        
        # Fourth convolution layer: input channels = 128, output channels = 256
        # Stride = 2 reduces spatial dimensions again
        self.conv4 = nn.Conv2d(128, 256, 3, stride=2, padding=1)
        
        # Max-pooling layer (not used in this model but defined for possible use)
        self.pool = nn.MaxPool2d(2, 2)  # Pooling reduces spatial dimensions by a factor of 2

        ## Decoder layers ##
        # First transpose convolution layer: upsampling the feature map
        # Input channels = 256, output channels = 128
        # Stride = 2 increases spatial dimensions by a factor of 2
        self.t_conv1 = nn.ConvTranspose2d(256, 128, 3, stride=2, padding=1, output_padding=1)
        
        # Second transpose convolution layer: upsampling and concatenating with skip connection
        # Input channels = 256 (128 + 128 from skip connection), output channels = 64
        self.t_conv2 = nn.ConvTranspose2d(256, 64, 3, stride=2, padding=1, output_padding=1)
        
        # Third transpose convolution layer: upsampling
        # Input channels = 128 (64 + 64 from skip connection), output channels = 128
        self.t_conv3 = nn.ConvTranspose2d(128, 128, 3, stride=2, padding=1, output_padding=1)
        
        # Fourth transpose convolution layer: refining the output
        # Input channels = 192 (128 + 64 from skip connection), output channels = 15
        self.t_conv4 = nn.ConvTranspose2d(192, 15, 3, stride=1, padding=1)
        
        # Dropout layer: randomly disables 20% of neurons during training to reduce overfitting
        self.dropout = nn.Dropout(0.2)
        
        # Final convolution layer: reduces output channels to 2 (for 'a' and 'b' channels)
        self.converge = nn.Conv2d(16, 2, 3, stride=1, padding=1)

    def forward(self, x):  # Defines the forward pass of the network
        # Encoder part
        x1 = F.relu(self.conv1(x))  # Pass through first convolution and ReLU activation
        x2 = F.relu(self.conv2(x1)) # Pass through second convolution and ReLU activation
        x3 = F.relu(self.conv3(x2)) # Pass through third convolution and ReLU activation
        x4 = F.relu(self.conv4(x3)) # Pass through fourth convolution and ReLU activation
        
        # Decoder part
        xd = F.relu(self.t_conv1(x4))  # Upsample the feature map
        xd = torch.cat((xd, x3), dim=1)  # Concatenate with skip connection from x3
        xd = self.dropout(xd)  # Apply dropout
        
        xd = F.relu(self.t_conv2(xd))  # Upsample again
        xd = torch.cat((xd, x2), dim=1)  # Concatenate with skip connection from x2
        xd = self.dropout(xd)  # Apply dropout
        
        xd = F.relu(self.t_conv3(xd))  # Upsample
        xd = torch.cat((xd, x1), dim=1)  # Concatenate with skip connection from x1
        xd = self.dropout(xd)  # Apply dropout
        
        xd = F.relu(self.t_conv4(xd))  # Upsample to match input size
        xd = torch.cat((xd, x), dim=1)  # Concatenate with the original input (L channel)
        
        # Final layer to predict 'a' and 'b' channels
        x_out = F.relu(self.converge(xd))
        return x_out  # Output the predicted 'a' and 'b' channels

# Instantiate the model
model = ConvAutoencoder()

# Print the model architecture to verify
print(model)

# Move the model to GPU (if available)
model = model.to('cuda')


In [None]:
summary(model,input_size=(1,160,160))

In [None]:
# specify loss function
criterion = nn.MSELoss()

# specify optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Define the number of epochs for training the model
n_epochs = 1  # The model will be trained for 30 iterations over the entire dataset

# Lists to store the training and testing losses for each epoch
train_losses = []  # Will store average training loss for each epoch
test_losses = []   # Will store average testing loss for each epoch

# Loop over the number of epochs
for epoch in range(1, n_epochs + 1):  # Epoch counter starts from 1 to n_epochs (inclusive)
    train_loss = 0.0  # Initialize the cumulative training loss for the current epoch to 0
    # Loop over batches in the training data
    for data in tqdm(train_dataloader):  # tqdm provides a progress bar for visualization
        images, labels = data           # Extract images (inputs) and labels (targets) from the batch
        # Move images and labels to the GPU (if available)
        images = images.float().to('cuda')  # Convert images to float and move to GPU
        labels = labels.float().to('cuda')  # Convert labels to float and move to GPU
        optimizer.zero_grad()  # Reset gradients for the optimizer
        outputs = model(images)  # Forward pass: Compute predictions from the model
        loss = criterion(outputs, labels)  # Compute the loss between predictions and targets
        loss.backward()  # Backward pass: Compute gradients of loss w.r.t model parameters
        optimizer.step()  # Update model parameters based on computed gradients
        # Accumulate training loss (scaled by batch size)
        train_loss += loss.item() * images.size(0)
    # Compute average training loss for the epoch
    train_loss = train_loss / len(train_dataloader.dataset)
    train_losses.append(train_loss)  # Append the average training loss to the list

    # Print the training loss for the current epoch
    print('Epoch: {} \tTraining Loss: {:.6f}'.format(
        epoch, 
        train_loss
    ))
    
    # Initialize cumulative testing loss for the current epoch
    test_loss = 0
    #Turn off gradient computation for validation/testing to save memory and speed up computation
    with torch.no_grad():  
        model.eval()  # Set the model to evaluation mode (disables dropout, batch normalization updates)
        # Loop over batches in the testing data
        for images, labels in test_dataloader:
            # Move images and labels to the GPU (if available)
            images, labels = images.to('cuda'), labels.to('cuda')
            output = model(images)  # Forward pass: Compute predictions from the model
            loss = criterion(output, labels)  # Compute the loss between predictions and targets
            # Accumulate testing loss (scaled by batch size)
            test_loss += loss.item() * images.size(0)

    model.train()  # Switch the model back to training mode (enables dropout, batch normalization updates)

    # Compute average testing loss for the epoch
    test_loss = test_loss / len(test_dataloader.dataset)
    test_losses.append(test_loss)  # Append the average testing loss to the list

    # Print the testing loss for the current epoch
    print("Test Loss: {:.3f}.. ".format(test_loss))


## <font color = green>Visualize Prediction </font>
 - While the model is trained as per expectation and producing colors which are not way-off, this model can be set for a base line model. 

In [None]:
## Train Data
i = 0  # Initialize a counter to keep track of the number of images to process

# Loop to process and visualize 20 images
while i < 20:  # Run the loop until 20 images have been processed
    # Fetch a batch of images and labels from the training dataloader
    test_img, test_label = next(iter(train_dataloader))

    # Pass the first image from the batch through the model for prediction
    # Convert the image to float, move it to GPU, and reshape to match the model's input shape
    pred = model.forward(test_img[0].float().cuda().view(1, 1, 160, 160))

    # Concatenate the grayscale channel (L) with the predicted color channels (a, b)
    # to form a complete LAB image
    lab_pred = torch.cat((test_img[0].view(1, 160, 160), pred[0].cpu()), dim=0)

    # Inverse scaling of the LAB image to bring it back to its original range
    # Rearranges the dimensions of lab_pred from [Channels, Height, Width] to [Height, Width, Channels].
    # Scales each channel of the LAB image back to its original range:
    # Shifts the A and B channels back to their original center point
    lab_pred_inv_scaled = lab_pred.permute(1, 2, 0) * torch.tensor([100, 255, 255]) - torch.tensor([0, 128, 128])

    # Convert the LAB image to RGB format for visualization
    # Used to detach the tensor from the computation graph in PyTorch.
    # Ensures that this operation doesn’t compute gradients 
    rgb_pred = lab2rgb(lab_pred_inv_scaled.detach().numpy())

    # Create a new figure for displaying the images
    fig = plt.figure(figsize=(10, 10))

    # Display the original grayscale image (L channel)
    plt.subplot(221)  # Position the subplot in a 2x2 grid at position 1
    plt.imshow(test_img[0].permute(1, 2, 0), cmap='gray')  # Rearrange dimensions for display
    plt.title('GrayScale Image')  # Add title to the subplot

    # Display the predicted colorized RGB image
    plt.subplot(222)  # Position the subplot in a 2x2 grid at position 2
    plt.imshow(rgb_pred)  # Show the predicted RGB image
    plt.title('Predicted Color Image')  # Add title to the subplot

    # Show the figure with the two subplots
    plt.show()

    i += 1  # Increment the counter to process the next image


In [None]:
##Test Data
i=0
while i<20:
    test_img,test_label = next(iter(test_dataloader))
    pred = model.forward(test_img[0].float().cuda().view(1,1,160,160))
    lab_pred = torch.cat((test_img[0].view(1,160,160),pred[0].cpu()),dim=0)
    lab_pred_inv_scaled = lab_pred.permute(1,2,0) * torch.tensor([100,255,255]) - torch.tensor([0,128,128])
    rgb_pred = lab2rgb(lab_pred_inv_scaled.detach().numpy())
    fig = plt.figure(figsize=(10,10))
    plt.subplot(221)
    plt.imshow(test_img[0].permute(1,2,0),cmap='gray')
    plt.title('GrayScale Image')
    plt.subplot(222)
    plt.imshow(rgb_pred)
    plt.title('Predicted Color Image')
    plt.show()
    i+=1