<div class="alert alert-block alert-success">

# **LoadComicData_hsv.ipynb**
***
### **This script loads comic book images for HSV neural network training. The images are made in a pair of one colored, and one grayscaled.**


In [None]:
import os
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

### Loads API-key

In [None]:
os.environ['KAGGLE_CONFIG_DIR'] = '..\API Key here'

### Download the dataset - **Warning it's 2 GB**

In [None]:
print("Downloading dataset from Kaggle...")
!kaggle datasets download -d cenkbircanoglu/comic-books-classification -p comic_dataset --unzip

### Process images into grayscale and colored pairs and splitting some proportion of the data into training data and test data. 

In [None]:
from sklearn.model_selection import train_test_split

input_images = []
output_images = []

image_paths = glob.glob('comic_dataset/**/*.jpg', recursive=True)
print(f"Found {len(image_paths)} images.")

for path in tqdm(image_paths[:2000]):  # Ajustiable limit of how many image the model loads
    color_img = cv2.imread(path)
    if color_img is None:
     continue
    color_img = cv2.resize(color_img, (256, 256))
    hsv_img = cv2.cvtColor(color_img, cv2.COLOR_BGR2HSV)

    v_channel = (hsv_img[..., 2:3] / 255.0).astype(np.float32)     # Input (V channel)
    hs_channels = np.empty_like(hsv_img[..., 0:2], dtype=np.float32)
    hs_channels[..., 0] = hsv_img[..., 0] / 179.0                  # Hue in [0, 1]
    hs_channels[..., 1] = hsv_img[..., 1] / 255.0                  # Sat in [0, 1]

    input_images.append(v_channel)
    output_images.append(hs_channels)

    X = np.array(input_images)
    Y = np.array(output_images)


    print(f"Input shape: {X.shape}, Output shape: {Y.shape}")

# Split to avoid testing on same data the model is trained on. 20% (0.2) goes to testing  
X_train, X_test, y_train, y_test = train_test_split(
    input_images, output_images, test_size=0.2, random_state=42)

### Save processed data

In [None]:
# Creates folder if it doesn't exist yet
os.makedirs("../Data/prepared_data/HSV", exist_ok=True)

# Saves files
np.save("../Data/prepared_data/HSV/comic_input_grayscale_train.npy", X_train)
np.save("../Data/prepared_data/HSV/comic_output_color_train.npy", y_train)
np.save("../Data/prepared_data/HSV/comic_input_grayscale_test.npy", X_test)
np.save("../Data/prepared_data/HSV/comic_output_color_test.npy", y_test)
print("HSV data saved to ../Data/prepared_data/HSV/")

### Display processed data to confirm stuff is working.

In [None]:
def hsv_to_rgb_img(hs_channels, v_channel):
    H = (hs_channels[..., 0] * 179).astype(np.uint8)
    S = (hs_channels[..., 1] * 255).astype(np.uint8)
    V = (v_channel.squeeze() * 255).astype(np.uint8)

    hsv_img = np.stack([H, S, V], axis=-1)
    rgb_img = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2RGB)
    return rgb_img

num_samples = 10
plt.figure(figsize=(12, 4))

for i in range(num_samples):
    plt.subplot(2, num_samples, i + 1)
    plt.imshow(X[i].squeeze(), cmap='gray')
    plt.title("Grayscale")
    plt.axis('off')

    plt.subplot(2, num_samples, i + 1 + num_samples)
    rgb = hsv_to_rgb_img(Y[i], X[i])
    plt.imshow(rgb)
    plt.title("HSV→RGB")
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Display from test 
plt.subplot(1, 2, 1)
plt.imshow(X_test[0].squeeze(), cmap='gray')
plt.title("Grayscale Input")
plt.axis('off')

plt.subplot(1, 2, 2)
rgb = hsv_to_rgb_img(y_test[0], X_test[0])
plt.imshow(rgb)
plt.title("Color Output (HSV→RGB)")
plt.axis('off')

plt.show()

In [None]:
# Display from train
plt.subplot(1, 2, 1)
plt.imshow(X_train[0].squeeze(), cmap='gray')
plt.title("Grayscale Input")
plt.axis('off')

plt.subplot(1, 2, 2)
rgb = hsv_to_rgb_img(y_train[0], X_train[0])
plt.imshow(rgb)
plt.title("Color Output (HSV→RGB)")
plt.axis('off')

plt.show()

