In [2]:
# FINAL DATASET

import kagglehub

# Download latest version
path = kagglehub.dataset_download("ll01dm/t91-image-dataset")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\simon\.cache\kagglehub\datasets\ll01dm\t91-image-dataset\versions\1


In [3]:
import os
import numpy as np
import cv2

def analyze_dataset(dir_path):
    total_w = total_h = count = 0

    for root, _, files in os.walk(dir_path):
        for fname in files:
            fp = os.path.join(root, fname)
            try:
                img = cv2.imread(fp)
                if img is None:
                    continue
                h, w = img.shape[:2]
                total_w += w
                total_h += h
                count += 1
            except Exception:
                # skip unreadable files
                continue

    if count == 0:
        print(f"No images found in {dir_path!r}")
        return

    avg_w = total_w / count
    avg_h = total_h / count
    print(f"Dataset: {dir_path}")
    print(f"Number of images: {count}")
    print(f"Average width: {avg_w:.2f}")
    print(f"Average height: {avg_h:.2f}")


analyze_dataset(path)

Dataset: C:\Users\simon\.cache\kagglehub\datasets\ll01dm\t91-image-dataset\versions\1
Number of images: 182
Average width: 264.12
Average height: 203.58


In [6]:
downscale_factor = 2

def resize_images(input_dir, output_dir, downscale_factor):
    os.makedirs(output_dir, exist_ok=True)

    for root, _, files in os.walk(input_dir):
        for fname in files:
            input_fp = os.path.join(root, fname)
            relative_path = os.path.relpath(root, input_dir)
            output_subdir = os.path.join(output_dir, str(relative_path))
            os.makedirs(output_subdir, exist_ok=True)
            output_fp = os.path.join(output_subdir, str(fname))

            try:
                img = cv2.imread(input_fp)
                if img is None:
                    continue
                h, w = img.shape[:2]
                new_w = max(1, int(w / downscale_factor))
                new_h = max(1, int(h / downscale_factor))
                img_resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(output_fp, img_resized)
            except Exception:
                # skip unreadable files
                continue

datasets_folder = "datasets"

output_path = os.path.join(datasets_folder, f"t91-image-dataset-x{downscale_factor}")

resize_images(path, output_path, downscale_factor)
print("Resized images saved to:", output_path)


Resized images saved to: datasets\t91-image-dataset-x2


In [None]:
def rgb_to_ycbcr(img):
    """
    Convert RGB image to YCbCr color space.

    Args:
        img: RGB image as numpy array (H, W, 3) with values in [0, 255]

    Returns:
        YCbCr image as numpy array (H, W, 3) with Y in [16, 235], Cb/Cr in [16, 240]
    """
    # OpenCV uses BGR, so convert RGB to BGR first, then to YCrCb (OpenCV's YCbCr)
    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    img_ycbcr = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2YCrCb)

    return img_ycbcr

def ycbcr_to_rgb(img):
    """
    Convert YCbCr image back to RGB color space.

    Args:
        img: YCbCr image as numpy array (H, W, 3)

    Returns:
        RGB image as numpy array (H, W, 3) with values in [0, 255]
    """
    # OpenCV YCrCb to BGR, then BGR to RGB
    img_bgr = cv2.cvtColor(img, cv2.COLOR_YCrCb2BGR)
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

    return img_rgb

def extract_y_channel(img_ycbcr):
    """
    Extract only the Y (luminance) channel from YCbCr image.
    This channel will be used as input/output for the neural network.

    Args:
        img_ycbcr: YCbCr image as numpy array (H, W, 3)

    Returns:
        Y channel as numpy array (H, W) or (H, W, 1)
    """
    return img_ycbcr[:, :, 0]

def reconstruct_rgb(y_channel_hr, img_lr, target_size):
    """
    Reconstruct full RGB image by combining high-res Y channel from the network
    with interpolated Cb/Cr channels from the low-resolution image.

    Args:
        y_channel_hr: High-resolution Y channel output from network (H, W) or (H, W, 1)
        img_lr: Low-resolution RGB image (h, w, 3)
        target_size: Target size (width, height) for the output image

    Returns:
        Reconstructed high-resolution RGB image (H, W, 3)
    """
    # Convert low-res RGB to YCbCr
    img_lr_ycbcr = rgb_to_ycbcr(img_lr)

    # Extract Cb and Cr channels from low-res image
    cb_lr = img_lr_ycbcr[:, :, 1]
    cr_lr = img_lr_ycbcr[:, :, 2]

    # Interpolate Cb and Cr to match high-res size using bilinear interpolation
    cb_hr = cv2.resize(cb_lr, target_size, interpolation=cv2.INTER_LINEAR)
    cr_hr = cv2.resize(cr_lr, target_size, interpolation=cv2.INTER_LINEAR)

    # Ensure Y channel has correct shape
    if len(y_channel_hr.shape) == 3 and y_channel_hr.shape[2] == 1:
        y_channel_hr = y_channel_hr[:, :, 0]

    # Combine Y, Cb, Cr channels
    img_hr_ycbcr = np.stack([y_channel_hr, cb_hr, cr_hr], axis=2).astype(np.uint8)

    # Convert back to RGB
    img_hr_rgb = ycbcr_to_rgb(img_hr_ycbcr)

    return img_hr_rgb


# Example usage with OpenCV:
# 1. Convert high-res training image to YCbCr and extract Y channel
#    img_hr_bgr = cv2.imread("high_res.png")
#    img_hr_rgb = cv2.cvtColor(img_hr_bgr, cv2.COLOR_BGR2RGB)
#    img_hr_ycbcr = rgb_to_ycbcr(img_hr_rgb)
#    y_hr = extract_y_channel(img_hr_ycbcr)  # This is the ground truth for training
#
# 2. Convert low-res input to YCbCr and extract Y channel for network input
#    h, w = img_hr_rgb.shape[:2]
#    img_lr_rgb = cv2.resize(img_hr_rgb, (w//2, h//2), interpolation=cv2.INTER_LINEAR)
#    img_lr_ycbcr = rgb_to_ycbcr(img_lr_rgb)
#    y_lr = extract_y_channel(img_lr_ycbcr)  # This is the input to the network
#
# 3. After network inference, reconstruct RGB from Y channel output
#    y_output = model.predict(y_lr)  # Network output (high-res Y channel)
#    img_reconstructed = reconstruct_rgb(y_output, img_lr_rgb, (w, h))
