In [1]:
import pathlib
import shutil
from PIL import Image
import tensorflow as tf
from datetime import datetime, timedelta

2025-04-09 13:40:25.282119: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-09 13:40:25.284720: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-09 13:40:25.292326: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744198825.305355   57875 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744198825.309512   57875 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744198825.319224   57875 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

# Import the dataset

In [2]:
dataset_name = "facades"

In [3]:
_URL = f'http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/{dataset_name}.tar.gz'

path_to_zip = tf.keras.utils.get_file(
    fname=f"{dataset_name}.tar.gz",
    origin=_URL,
    extract=True)

path_to_zip  = pathlib.Path(path_to_zip)

PATH = path_to_zip/dataset_name
print(str(PATH))

/home/anna/.keras/datasets/facades_extracted/facades


# Resize pictures to desired ratio

In [4]:
# Desired dimensions for the target images:
TARGET_WIDTH = 256      # Width is set to 128 pixels.
TARGET_HEIGHT = 128     # Height remains 256 pixels.

# Global variables used by the date generator.
global_date_counter = 0
base_date = datetime(2020, 1, 1)


In [5]:


def get_unique_date():
    """
    Returns a unique date string in the format YYYY-MM-DD based on a global counter.
    Each call advances the date by one day from the base date.
    """
    global global_date_counter
    unique_date = (base_date + timedelta(days=global_date_counter)).strftime("%Y-%m-%d")
    global_date_counter += 1
    return unique_date

def clear_output_directory(output_dir):
    """
    Remove all contents within the output directory.
    If the directory exists, it is removed entirely and then re-created.
    """
    output_path = pathlib.Path(output_dir)
    if output_path.exists():
        shutil.rmtree(output_path)
    output_path.mkdir(parents=True, exist_ok=True)
    print(f"Cleared and recreated output directory: {output_path.resolve()}")

def process_image(image_path, target_folder, red_folder, green_folder, blue_folder):
    """
    Process a single image:
      - Open the image and convert it to RGB.
      - Split the image horizontally: the left half is the target image and the right half is the input.
      - Resize each output image to 256x128.
      - Generate one unique date for the image and use it for all channel outputs.
      - Save the images with a naming pattern:
          <parent_folder>_<YYYY-MM-DD>.jpg
    """
    try:
        img = Image.open(image_path).convert("RGB")
    except Exception as e:
        print(f"Error opening {image_path}: {e}")
        return

    width, height = img.size
    half_width = width // 2

    # Crop the left half as the target image, and the right half as the input image.
    target_img = img.crop((0, 0, half_width, height))
    input_img = img.crop((half_width, 0, width, height))

    # Resize the images to 256x128.
    target_resized = target_img.resize((TARGET_WIDTH, TARGET_HEIGHT))
    
    # For the input image, split into red, green, blue channels and resize them.
    r_channel, g_channel, b_channel = input_img.split()
    red_resized   = r_channel.resize((TARGET_WIDTH, TARGET_HEIGHT))
    green_resized = g_channel.resize((TARGET_WIDTH, TARGET_HEIGHT))
    blue_resized  = b_channel.resize((TARGET_WIDTH, TARGET_HEIGHT))
    
    # Get the name of the immediate parent folder (e.g., "test", "train", or "val").
    parent_folder = image_path.parent.name
    img_type = "png"
    unique_date = get_unique_date()
    
    # Define output file paths with parent folder prefix.
    target_path = target_folder / f"scaled-facade_{unique_date}.{img_type}"
    red_path    = red_folder / f"scaled-facade_{unique_date}.{img_type}"
    green_path  = green_folder / f"scaled-facade_{unique_date}.{img_type}"
    blue_path   = blue_folder / f"scaled-facade_{unique_date}.{img_type}"
    
    target_resized.save(target_path)
    red_resized.save(red_path)
    green_resized.save(green_path)
    blue_resized.save(blue_path)
    
    print(f"Processed: {image_path}")

def process_images_in_directory(source_dir, output_dir):
    """
    Recursively process all image files (jpg, jpeg, png) from all folders in source_dir.
    Saves outputs into output_dir under:
      - target_images/  : Resized target images.
      - input_red/      : Grayscale images from the red channel.
      - input_green/    : Grayscale images from the green channel.
      - input_blue/     : Grayscale images from the blue channel.
    """

    clear_output_directory(output_dir)

    source_path = pathlib.Path(source_dir)
    
    # Define output directories.
    target_folder = pathlib.Path(output_dir) / "target"
    red_folder = pathlib.Path(output_dir) / "red"
    green_folder = pathlib.Path(output_dir) / "green"
    blue_folder = pathlib.Path(output_dir) / "blue"
    
    # Create output directories if they do not exist.
    for folder in (target_folder, red_folder, green_folder, blue_folder):
        folder.mkdir(parents=True, exist_ok=True)
    
    # Valid image file extensions (modify if needed).
    valid_extensions = {".jpg", ".jpeg", ".png"}
    
    # Recursively find all image files.
    image_files = [p for p in source_path.rglob("*") if p.suffix.lower() in valid_extensions]
    
    print(f"Scanning directory: {source_path}")
    print(f"Found {len(image_files)} image files.")
    # For debugging: list found image paths.
    for p in image_files:
        print(p)
    
    if not image_files:
        print("No image files found. Please verify that the source directory contains images with valid extensions.")
    
    for image_path in image_files:
        process_image(image_path, target_folder, red_folder, green_folder, blue_folder)


In [6]:
source_directory = str(PATH)
output_directory = "/home/anna/msc_oppgave/fish-forecast/facades_dataset"

process_images_in_directory(source_directory, output_directory)

Cleared and recreated output directory: /home/anna/msc_oppgave/fish-forecast/facades_dataset
Scanning directory: /home/anna/.keras/datasets/facades_extracted/facades
Found 606 image files.
/home/anna/.keras/datasets/facades_extracted/facades/test/37.jpg
/home/anna/.keras/datasets/facades_extracted/facades/test/14.jpg
/home/anna/.keras/datasets/facades_extracted/facades/test/12.jpg
/home/anna/.keras/datasets/facades_extracted/facades/test/88.jpg
/home/anna/.keras/datasets/facades_extracted/facades/test/30.jpg
/home/anna/.keras/datasets/facades_extracted/facades/test/73.jpg
/home/anna/.keras/datasets/facades_extracted/facades/test/5.jpg
/home/anna/.keras/datasets/facades_extracted/facades/test/15.jpg
/home/anna/.keras/datasets/facades_extracted/facades/test/45.jpg
/home/anna/.keras/datasets/facades_extracted/facades/test/35.jpg
/home/anna/.keras/datasets/facades_extracted/facades/test/28.jpg
/home/anna/.keras/datasets/facades_extracted/facades/test/77.jpg
/home/anna/.keras/datasets/facad