In [None]:
# src/preprocessing_image
"""
Preprocess image data to prepare for training image recognition models.

Function list:
-------
1. preprocess_image: Load images from multiple folders and assign corresponding labels.
"""
import os
import tensorflow as tf
import numpy as np

# pulling variables from config
import json

with open('config.json') as f:
    cfg = json.load(f)

img_rows = cfg["data_parameters"]["img_rows"]
img_cols = cfg["data_parameters"]["img_cols"]

def preprocess_image(paths_and_labels, image_size=(img_rows,img_cols)):
    """
    Load images from multiple folders and assign corresponding labels.

    Parameters
    ----------
    paths_and_labels : list of tuples
        Each tuple = (folder_path, label_string)
    image_size : tuple
        Resize target (height, width)

    Returns
    -------
    data : list of (image_tensor, label_string)

    Example usage:
    -------
    data_folder = [
    (train_hot_dog_path, "hot_dog"),
    (train_not_hot_dog_path, "not_hot_dog")
    ]
    train_data = preprocess_image(train_folder,image_size= (50,50))
    """
    data = []
    for folder_path, label in paths_and_labels:
        for file in os.listdir(folder_path):
            if file.lower().endswith(('.jpeg', '.jpg')):  # case-insensitive
                img_path = os.path.join(folder_path, file)
                img = tf.io.read_file(img_path)
                img = tf.image.decode_jpeg(img, channels=3)
                img = tf.image.resize(img, image_size)
                data.append((img, label))
    return data

def image_to_array(train_data:list,test_data:list):
  """
  convert image data into arrays

  Parameters
  ---------
    train_data (tensor): list carrying training data (output of preprocess_image)
    test_data (tensor): list carrying testing data (output of preprocess_image)

  Return
  ------
  Dict of arrays of x_train, y_train, x_test, y_test
  """

  # Extract the image data and labels from the training data
  x_train, y_train = zip(*train_data)

  # Extract the image data and labels from the testing data
  x_test, y_test = zip(*test_data)

  ## Convert the image data and labels into NumPy arrays
  x_train = np.array(x_train)
  y_train = np.array(y_train)
  x_test = np.array(x_test)
  y_test = np.array(y_test)

  return {'x_train':x_train,
          'y_train':y_train,
          'x_test':x_test,
          'y_test':y_test}

def image_data_normalizer(x_train,x_test,scale_type):
  """
  normalize (rescale) image data from arrays to prepare for model training

  Parameters
  ---------
  x_train: training data in form of arrays (output of image_to_array)
  x_test: testing data in form of arrays (output of image_to_array)
  scale_type: method of scaling (options: "0-1" and "-1-+1")

  Return
  -------
  Normalized arrays
  """
  # change integers to 32-bit floating point numbers
  x_train = x_train.astype('float32')
  x_test = x_test.astype('float32')

  # normalize
  if scale_type == "0-1":
    x_train /= 255
    x_test /= 255
  elif scale_type == "-1-+1":
    x_train = (x_train - 127.5)/127.5
  else:
    print("unregconized scale type")

  return {'x_train':x_train,
          'x_test': x_test}

