In [1]:
import os
import numpy as np
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, UpSampling2D, InputLayer, Activation
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import train_test_split
import tensorflow as tf
from PIL import Image
from tqdm import tqdm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, UpSampling2D, InputLayer
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split





### Drive Dataset Processing

In [2]:
# Count images in the train folder
num_train_images = len([f for f in os.listdir('dataset/Train') if f.endswith(('jpg', 'jpeg', 'png'))])
num_test_images = len([f for f in os.listdir('dataset/Test') if f.endswith(('jpg', 'jpeg', 'png'))])
print(f"Number of training images: {num_train_images}")
print(f"Number of testing images: {num_test_images}")

Number of training images: 9196
Number of testing images: 486


In [3]:
train_folder = 'dataset/train'

# Get a list of all image files in the folder
image_files = [f for f in os.listdir(train_folder) if f.endswith(('jpg', 'jpeg', 'png'))]

# Initialize a set to store unique sizes
unique_sizes = set()

# Loop through each image to get its dimensions
for image_file in image_files:
    image_path = os.path.join(train_folder, image_file)
    with Image.open(image_path) as img:
        size = img.size
        unique_sizes.add(size)

# Print the number of different image sizes
print(f"Different image sizes found: {len(unique_sizes)}")
for size in unique_sizes:
    print(f"Size: {size[0]}x{size[1]}")


Different image sizes found: 1
Size: 256x256


Our images size is 256x256 but since we wont to concatenate it with cifar dataset and train the model on the concatenated dataset, we need to downsize it to same size as cifar datasey (32x32)

In [4]:
# Resize Train Local Images from 256x256 to 32x32 to be same as Cifar dataset
resized_images = []

for image_file in os.listdir('dataset/Train'):
    if image_file.endswith(('jpg', 'jpeg', 'png')):  
        image_path = os.path.join('dataset/Train', image_file)
        with Image.open(image_path) as img:
            resized_img = img.resize((100, 100))
            resized_img.save(os.path.join('dataset/Train_resized', image_file))

Know doing the same process with the test data

In [5]:
# Resize Test Local Images from 256x256 to 32x32 to be same as Cifar dataset
resized_images = []
test_folder = 'dataset/Test'
# Loop through each image in the dataset folder
for image_file in os.listdir(test_folder):
    if image_file.endswith(('jpg', 'jpeg', 'png')):  # Make sure to filter image files
        image_path = os.path.join(test_folder, image_file)
        
        # Open the image and resize it to 32x32
        with Image.open(image_path) as img:
            resized_img = img.resize((100, 100))
            resized_img.save(os.path.join('dataset/Test_resized', image_file))

Now, we convert images to LAB format with, Normalize L-channel values for each image to [0, 1] (for CNN input) and  Normalize A and B channel to [-1, 1].

In [6]:
import os
import cv2
import numpy as np
from tqdm import tqdm

def load_lab_dataset(folder, batch_size=32):
    l_channel_list = []
    ab_channel_list = []
    image_files = [f for f in os.listdir(folder) if f.endswith(('jpg', 'jpeg', 'png'))]

    for i in tqdm(range(0, len(image_files), batch_size), desc="Processing images"):
        batch_files = image_files[i:i + batch_size]
        batch_l = []
        batch_ab = []

        for image_file in batch_files:
            image_path = os.path.join(folder, image_file)

            # Load image with OpenCV
            img = cv2.imread(image_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)  # Convert to LAB

            # Extract L (grayscale input) and AB (color output)
            l_channel, a_channel, b_channel = cv2.split(img)

            # Normalize L channel to [0, 1]
            l_channel = l_channel.astype('float32') / 255.0

            # Normalize A & B channels to [-1, 1] (original range is -128 to 127)
            a_channel = (a_channel.astype('float32') - 128) / 128.0
            b_channel = (b_channel.astype('float32') - 128) / 128.0

            # Stack A & B channels together
            ab_channels = np.stack([a_channel, b_channel], axis=-1)

            # Append to batch lists
            batch_l.append(l_channel)
            batch_ab.append(ab_channels)

        # Append batch to dataset lists
        l_channel_list.extend(batch_l)
        ab_channel_list.extend(batch_ab)

    # Convert lists to NumPy arrays
    l_channel_array = np.array(l_channel_list)  # Shape: (num_samples, 256, 256)
    ab_channel_array = np.array(ab_channel_list)  # Shape: (num_samples, 256, 256, 2)

    return l_channel_array, ab_channel_array

# Load dataset in smaller batches to avoid MemoryError
batch_size = 32  # Adjust based on your memory capacity
x_train_l, y_train_ab = load_lab_dataset('dataset/Train_resized', batch_size=batch_size)
x_test_l, y_test_ab = load_lab_dataset('dataset/Test_resized', batch_size=batch_size)
# Save training data
np.save('x_train_l.npy', x_train_l)
np.save('y_train_ab.npy', y_train_ab)

# Save test data
np.save('x_test_l.npy', x_test_l)
np.save('y_test_ab.npy', y_test_ab)
# Print dataset shapes
print(f"Training Input Shape (L Channel): {x_train_l.shape}")  
print(f"Training Output Shape (AB Channels): {y_train_ab.shape}")  
print(f"Test Input Shape (L Channel): {x_test_l.shape}")  
print(f"Test Output Shape (AB Channels): {y_test_ab.shape}")  

Processing images:   0%|          | 0/288 [00:00<?, ?it/s]

Processing images: 100%|██████████| 288/288 [02:07<00:00,  2.26it/s]
Processing images: 100%|██████████| 16/16 [00:07<00:00,  2.14it/s]


Training Input Shape (L Channel): (9196, 100, 100)
Training Output Shape (AB Channels): (9196, 100, 100, 2)
Test Input Shape (L Channel): (486, 100, 100)
Test Output Shape (AB Channels): (486, 100, 100, 2)


### Cifar-10 Dataset Processing

In [8]:

# Load the CIFAR-10 dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

num_train_images = X_train.shape[0]
num_test_images = X_test.shape[0]

# Get the image size (height, width, channels)
image_height, image_width, image_channels = X_train.shape[1], X_train.shape[2], X_train.shape[3]

# Print the number of images and their size
print(f"Number of training images: {num_train_images}")
print(f"Number of test images: {num_test_images}")
print(f"Image size: {image_height}x{image_width} with {image_channels} channels")


Number of training images: 50000
Number of test images: 10000
Image size: 32x32 with 3 channels


In [10]:
def convert_to_lab(dataset):
    """
    Convert CIFAR-10 dataset from RGB to LAB format.
    - L channel: grayscale input.
    - AB channels: color output.
    """
    l_channel_list = []
    ab_channel_list = []

    for img in dataset:
        # Convert to LAB color space
        lab_img = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)

        # Extract L (grayscale input) and AB (color output)
        l_channel, a_channel, b_channel = cv2.split(lab_img)

        # Normalize L channel to [0, 1]
        l_channel = l_channel.astype('float32') / 255.0

        # Normalize A & B channels to [-1, 1]
        a_channel = (a_channel.astype('float32') - 128) / 128.0
        b_channel = (b_channel.astype('float32') - 128) / 128.0

        # Stack A & B channels together
        ab_channels = np.stack([a_channel, b_channel], axis=-1)

        # Append to dataset lists
        l_channel_list.append(l_channel)
        ab_channel_list.append(ab_channels)

    # Convert lists to NumPy arrays
    l_channel_array = np.array(l_channel_list)  
    ab_channel_array = np.array(ab_channel_list)  

    return l_channel_array, ab_channel_array

# Convert training and test data to LAB format
x_train_l_c, y_train_ab_c = convert_to_lab(X_train)
x_test_l_c, y_test_ab_c = convert_to_lab(X_test)

# Save arrays as .npy files
np.save("x_train_l_c.npy", x_train_l_c)
np.save("y_train_ab_c.npy", y_train_ab_c)
np.save("x_test_l_c.npy", x_test_l_c)
np.save("y_test_ab_c.npy", y_test_ab_c)

# Print dataset shapes
print(f"Training Input Shape (L Channel): {x_train_l_c.shape}")  
print(f"Training Output Shape (AB Channels): {y_train_ab_c.shape}")  
print(f"Test Input Shape (L Channel): {x_test_l_c.shape}")  
print(f"Test Output Shape (AB Channels): {y_test_ab_c.shape}") 


Training Input Shape (L Channel): (50000, 32, 32)
Training Output Shape (AB Channels): (50000, 32, 32, 2)
Test Input Shape (L Channel): (10000, 32, 32)
Test Output Shape (AB Channels): (10000, 32, 32, 2)


### Modeling

In [53]:

# Concatenate training data
x_train = np.concatenate([x_train_l, x_train_l_c], axis=0)
y_train = np.concatenate([y_train_ab, y_train_ab_c], axis=0)

# Concatenate test data
x_test = np.concatenate([x_test_l, x_test_l_c], axis=0)
y_test= np.concatenate([y_test_ab, y_test_ab_c], axis=0)

# Print the new dataset shapes
print(f"Combined Training Input Shape (L Channel): {x_train.shape}")  # (total_samples, height, width)
print(f"Combined Training Output Shape (AB Channels): {y_train.shape}")  # (total_samples, height, width, 2)
print(f"Combined Test Input Shape (L Channel): {x_test.shape}")  # (total_samples, height, width)
print(f"Combined Test Output Shape (AB Channels): {y_test.shape}")  # (total_samples, height, width, 2)





Combined Training Input Shape (L Channel): (59196, 32, 32)
Combined Training Output Shape (AB Channels): (59196, 32, 32, 2)
Combined Test Input Shape (L Channel): (10486, 32, 32)
Combined Test Output Shape (AB Channels): (10486, 32, 32, 2)


In [None]:

# Ensure L channel has the correct shape for CNN input
#x_train = x_train[..., np.newaxis]  
#x_test = x_test[..., np.newaxis]

# Split training data into train and validation sets
#x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=42)

# Define the CNN model for colorization
model = Sequential([
    InputLayer(input_shape=(x_train.shape[1], x_train.shape[2], 1)),
  
    # Convolutional layers
    Conv2D(64, (3,3), activation='relu', padding='same'),
    Conv2D(128, (3,3), activation='relu', padding='same'),
    Conv2D(256, (3,3), activation='relu', padding='same'),

    # Downsampling
    Conv2D(512, (3,3), activation='relu', padding='same', strides=2),
    Conv2D(512, (3,3), activation='relu', padding='same'),
    Conv2D(256, (3,3), activation='relu', padding='same'),

    # Upsampling
    UpSampling2D((2,2)),
    Conv2D(128, (3,3), activation='relu', padding='same'),
    Conv2D(64, (3,3), activation='relu', padding='same'),

    # Output: 2-channel AB prediction
    Conv2D(2, (3,3), activation='tanh', padding='same')  # tanh to output [-1,1] range
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

# Train the model with validation split
history = model.fit(x_train, y_train, epochs=5, batch_size=200, validation_data=(x_val, y_val))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
  5/267 [..............................] - ETA: 36:21 - loss: 0.0096 - accuracy: 0.6099

KeyboardInterrupt: 

In [None]:
# Evaluate on test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_acc}")