<div class="alert alert-block alert-warning">

# **LoadComicData_lab.ipynb**
***
### **This script loads comic book images for LAB neural network training. The images are made in a pair of one colored, and one grayscaled.**


In [None]:
import os
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

### Loads API-key

In [None]:
os.environ['KAGGLE_CONFIG_DIR'] = '..\API Key here'

### Download the dataset - **Warning it's 2 GB**

In [None]:
print("Downloading dataset from Kaggle...")
!kaggle datasets download -d cenkbircanoglu/comic-books-classification -p comic_dataset --unzip

### Process images into grayscale and colored pairs and splitting some proportion of the data into training data and test data. 

In [None]:
from sklearn.model_selection import train_test_split

input_images = []
output_images = []

image_paths = glob.glob('comic_dataset/**/*.jpg', recursive=True)
print(f"Found {len(image_paths)} images.")

for path in tqdm(image_paths[:2000]):
    color_img = cv2.imread(path)
    if color_img is None:
        continue
    color_img = cv2.resize(color_img, (256, 256))
    lab_img = cv2.cvtColor(color_img, cv2.COLOR_BGR2LAB)

    l_channel = (lab_img[..., 0:1] / 255.0).astype(np.float32)     # Input grayscale (L)
    ab_channels = (((lab_img[..., 1:] / 255.0) * 2.0) - 1.0).astype(np.float32) # Output color (AB), scaled to [-1, 1]

    input_images.append(l_channel)
    output_images.append(ab_channels)

X = np.array(input_images)
Y = np.array(output_images)


print(f"Input shape: {X.shape}, Output shape: {Y.shape}")

# Split to avoid testing on same data the model is trained on. 20% (0.2) goes to testing  
X_train, X_test, y_train, y_test = train_test_split(
    input_images, output_images, test_size=0.2, random_state=42)

### Save processed data

In [None]:
# Creates folder if it doesn't exist yet
os.makedirs("../Data/prepared_data/LAB", exist_ok=True)

# Saves files
np.save("../Data/prepared_data/LAB/comic_input_grayscale_train.npy", X_train)
np.save("../Data/prepared_data/LAB/comic_output_color_train.npy", y_train)
np.save("../Data/prepared_data/LAB/comic_input_grayscale_test.npy", X_test)
np.save("../Data/prepared_data/LAB/comic_output_color_test.npy", y_test)
print("LAB data saved to ../Data/prepared_data/LAB/")

### Display processed data to confirm stuff is working.

In [None]:
def lab_to_rgb(X_gray, Y_ab):
    L = X_gray.squeeze() * 100          
    ab = Y_ab * 128                 

    lab = np.zeros((256, 256, 3), dtype=np.float32)
    lab[..., 0] = L
    lab[..., 1:] = ab

    rgb = cv2.cvtColor(lab, cv2.COLOR_Lab2RGB)
    return np.clip(rgb, 0, 1)


num_samples = 10
plt.figure(figsize=(12, 4))

for i in range(num_samples):
    plt.subplot(2, num_samples, i + 1)
    plt.imshow(X[i].squeeze(), cmap='gray')
    plt.title("Grayscale")
    plt.axis('off')

    plt.subplot(2, num_samples, i + 1 + num_samples)
    plt.imshow(np.clip(lab_to_rgb(X[i], Y[i]), 0, 1))
    plt.title("LAB→RGB")
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Display from test 
plt.subplot(1, 2, 1)
plt.imshow(X_test[0].squeeze(), cmap='gray')
plt.title("Grayscale Input")
plt.axis('off')

rgb = lab_to_rgb(X_test[0], y_test[0])
plt.subplot(1, 2, 2)
plt.imshow(np.clip(rgb, 0, 1))
plt.title("Color Output (LAB→RGB)")
plt.axis('off')
plt.show()

In [None]:
# Display from train
plt.subplot(1, 2, 1)
plt.imshow(X_train[0].squeeze(), cmap='gray')
plt.title("Grayscale Input")
plt.axis('off')

rgb = lab_to_rgb(X_train[0], y_train[0])
plt.subplot(1, 2, 2)
plt.imshow(np.clip(rgb, 0, 1))
plt.title("Color Output (LAB→RGB)")
plt.axis('off')
plt.show()

