### 1. Read in target pictures

In [27]:
from pathlib import Path

def read_images(directory: Path = Path("./")) -> list[Path]:
    """
    Return a sorted list of all file paths in `directory` (non-recursive)
    excluding any .ipynb files.
    """
    directory = directory.resolve()
    paths = [
        p for p in directory.iterdir()
        if p.is_file() and p.suffix.lower() != ".ipynb"
    ]
    return sorted(paths)

In [28]:
targets = read_images()

### 2. Image processing

Resize to 512x512 for ControlNet

In [29]:
import cv2
from matplotlib import pyplot as plt
import numpy as np

In [30]:
def to_512(img_path: str, interpolation=cv2.INTER_AREA):
    """
    Load an image, pad it to a square canvas (keeping the original centered),
    then resize to 512x512.

    Args:
        img_path (str): Path to the input image.
        interpolation (int): OpenCV interpolation flag:
            - cv2.INTER_AREA for downscaling,
            - cv2.INTER_CUBIC / cv2.INTER_LINEAR for upscaling.

    Returns:
        np.ndarray: The 512x512 BGR image.
    """
    # Load image (preserve alpha if present)
    img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
    if img is None:
        raise FileNotFoundError(f"Could not load image at '{img_path}'")

    h, w = img.shape[:2]
    # Compute padding to make square
    if h == w:
        square = img
    else:
        # Determine new square size and padding amounts
        size = max(h, w)
        pad_vert = (size - h) // 2
        pad_horiz = (size - w) // 2

        # Padding: top, bottom, left, right
        top    = pad_vert
        bottom = size - h - pad_vert
        left   = pad_horiz
        right  = size - w - pad_horiz

        # If the image has alpha channel, pad with (0,0,0,0), otherwise with black
        if img.shape[2] == 4:
            border_color = [0, 0, 0, 0]
        else:
            border_color = [0, 0, 0]

        square = cv2.copyMakeBorder(
            img,
            top, bottom, left, right,
            borderType=cv2.BORDER_CONSTANT,
            value=border_color
        )

    # Resize square to 512x512
    resized = cv2.resize(square, (512, 512), interpolation=interpolation)
    return resized

Convert to greyscale for input into edge detection code

In [None]:
def to_grayscale(img):
    """
    Convert img to a single-channel grayscale image, handling:
     - already gray (HxW)
     - BGR (HxWx3)
     - BGRA (HxWx4)
    """
    h, w = img.shape[:2]
    chans = 1 if img.ndim == 2 else img.shape[2]

    if chans == 1:
        # already gray
        return img
    elif chans == 3:
        # BGR → GRAY
        return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    elif chans == 4:
        # BGRA → GRAY
        return cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
    else:
        raise ValueError(f"Unexpected channel count: {chans}")

Use CLAHE to boost details such as wood panelling

In [32]:
def CLAHE(img):
    """
    Use CLAHE to boost contrast in detailed areas of the image.
    Standard histogram equalisation is too noisy.
    """

    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))

    enhanced = clahe.apply(img)

    return enhanced

Perform Prewitt edge detection to create the corresponding training sketch

In [33]:
def prewitt_edge_detection(image, threshold):
    """
    Apply Prewitt edge detection to an image.
    
    Args:
        image: numpy array - the input image (grayscale or color)
        threshold: int - threshold value for edge detection
    
    Returns:
        numpy.ndarray: the processed image as numpy array with Prewitt edges detected
    """
    # Convert to greyscale if needed
    if len(image.shape) == 3:
        np_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    else:
        np_image = image
    
    # Define masks for Prewitt operator
    xkernel = np.array([[1, 0, -1],
                        [1, 0, -1],
                        [1, 0, -1]])
   
    ykernel = np.array([[1, 1, 1],
                        [0, 0, 0],
                        [-1, -1, -1]])
   
    # Apply convolutions
    prewittx = cv2.filter2D(np_image, cv2.CV_64F, xkernel)
    prewitty = cv2.filter2D(np_image, cv2.CV_64F, ykernel)
    
    # Take the magnitude
    prewitt = np.sqrt(prewittx ** 2 + prewitty ** 2)
    
    # Normalize the filtered image to 0-255 range
    prewitt = cv2.normalize(prewitt, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
   
        
    # Apply thresholding
    prewitt = (prewitt > threshold) * prewitt
    
    return prewitt

Create the training dataset

In [34]:
str(targets[0]).split("\\")[-1]

'01.jpg'

In [None]:
# Read each image from the image path list
# Perform the following steps:
#   1. Resize to 512x512 to fit ControlNet
#   2. Greyscale
#   3. CLAHE to denoise whilst preserving fine details
#   4. Prewitt edge detection
for image in targets:
    resized = to_512(image)
    grey = to_grayscale(resized)
    enhanced = CLAHE(grey)
    edges = prewitt_edge_detection(enhanced, 5)

    # turn into sketch
    sketch = cv2.bitwise_not(edges)

    # file name
    filename = str(image).split("\\")[-1].replace(".jpg", "")
    
    # write input file
    cv2.imwrite(f"./sketch/{filename}.png", sketch)
    # write output target pair
    cv2.imwrite(f"./image/{filename}.png", resized)
    