<div class="alert alert-block alert-info">

# **LoadComicData.ipynb**
***
### **This script loads comic book images for neural network training. The images are made in a pair of one colored, and one grayscaled.**


In [None]:
import os
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

### Loads API-key

In [None]:
os.environ['KAGGLE_CONFIG_DIR'] = 'API Key here\kaggle.json'

### Download the dataset - **Warning it's 2 GB**

In [None]:
print("Downloading dataset from Kaggle...")
!kaggle datasets download -d cenkbircanoglu/comic-books-classification -p comic_dataset --unzip

### Process images into grayscale and colored pairs and splitting some proportion of the data into training data and test data. 

In [None]:
from sklearn.model_selection import train_test_split

input_images = []
output_images = []

image_paths = glob.glob('comic_dataset/**/*.jpg', recursive=True)
print(f"Found {len(image_paths)} images.")

for path in tqdm(image_paths[:2000]):  # Ajustiable limit of how many image the model loads
    color_img = cv2.imread(path)
    if color_img is None:
        continue
    color_img = cv2.resize(color_img, (256, 256))
    gray_img = cv2.cvtColor(color_img, cv2.COLOR_BGR2GRAY)
    gray_img = np.expand_dims(gray_img, axis=-1)

    color_img = color_img / 255.0
    gray_img = gray_img / 255.0

    input_images.append(gray_img)
    output_images.append(color_img)

    X = np.array(input_images)
    Y = np.array(output_images)


    print(f"Input shape: {X.shape}, Output shape: {Y.shape}")

# Split to avoid testing on same data the model is trained on. 20% (0.2) goes to testing  
X_train, X_test, y_train, y_test = train_test_split(
    input_images, output_images, test_size=0.2, random_state=42)

### Save processed data

In [None]:
# Saves files
np.save("prepared_data/comic_input_grayscale_train.npy", X_train)
np.save("prepared_data/comic_output_color_train.npy", y_train)
np.save("prepared_data/comic_input_grayscale_test.npy", X_test)
np.save("prepared_data/comic_output_color_test.npy", y_test)
print("Data saved to prepared_data/")

### Display processed data to confirm stuff is working.

In [None]:
num_samples = 10
plt.figure(figsize=(12, 4))
for i in range(num_samples):
    plt.subplot(2, num_samples, i+1)
    plt.imshow(X[i].squeeze(), cmap='gray')
    plt.title("Grayscale")
    plt.axis('off')

    plt.subplot(2, num_samples, i+1+num_samples)
    plt.imshow(Y[i])
    plt.title("Color")
    plt.axis('off')
plt.tight_layout()
plt.show()