# 1. Imports and Setup

In this cell, we import all the necessary libraries and set up the working environment.


In [None]:
# Import necessary libraries
import os
import json
import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image

# TensorFlow and Keras modules
import tensorflow as tf
from tensorflow.keras import layers, models, Input, Model, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, CSVLogger, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator, image_dataset_from_directory
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

# scikit-learn for data splitting
from sklearn.model_selection import train_test_split

2024-07-28 10:07:57.338679: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# Function to load configuration from a JSON file
def load_config(filename: str = "config-Unet.json") -> dict:
    """
    Loads the configuration from a JSON file.

    Parameters:
    - filename (str): The path to the configuration file.

    Returns:
    - dict: Configuration parameters loaded from the JSON file.
    """
    with open(filename, 'r') as f:
        return json.load(f)

# Load configuration
config = load_config()

In [None]:
# Image paths
data_dir = "/media/neurone-pc6/Volume/Michele/Prog_GAF_Michele/pythonProject/data/GADF"
label_0_folder = os.path.join(data_dir, "Label_0")
label_1_folder = os.path.join(data_dir, "Label_1")

# Directory paths for saving metadata, results, and models
metadata_dir = 'metadata'
results_dir = 'results'
model_dir = 'models'

# Paths for logger and model saving
logger_path = os.path.join(results_dir, 'training_u-net_32x32_gadf.log')
model_save_path = os.path.join(model_dir, 'u-net_gadf.h5')


In [None]:
# Training parameters
BATCH_SIZE = config["unet"]["training"]["batch_size"]
AUTOTUNE = tf.data.experimental.AUTOTUNE
INPUT_SHAPE = (32, 32, 1)
EPOCHS = config["unet"]["training"]["epochs"]


# Define steps per epoch and validation steps based on dataset cardinality
STEPS_PER_EPOCH = tf.data.experimental.cardinality(train_ds).numpy()
VALIDATION_STEPS = tf.data.experimental.cardinality(val_ds).numpy()

# Define optimizer, loss, and metrics from configuration
OPTIMIZER = tf.keras.optimizers.get({
    "class_name": config["unet"]["training"]["optimizer"],
    "config": {
        "learning_rate": config["unet"]["training"]["learning_rate"]
    }
})

LOSS = config["unet"]["training"]["loss"]
METRICS = config["unet"]["training"]["metrics"]

# Configure callbacks for training
early_stopping = EarlyStopping(
    monitor=config["unet"]["training"]["early_stopping"]["monitor"],
    patience=config["unet"]["training"]["early_stopping"]["patience"],
    restore_best_weights=config["unet"]["training"]["early_stopping"]["restore_best_weights"]
)

model_checkpoint = ModelCheckpoint(
    filepath=config["unet"]["training"]["model_checkpoint"]["filepath"],
    monitor=config["unet"]["training"]["model_checkpoint"]["monitor"],
    save_best_only=config["unet"]["training"]["model_checkpoint"]["save_best_only"]
)

reduce_lr = ReduceLROnPlateau(
    monitor=config["unet"]["training"]["lr_scheduler"]["monitor"],
    factor=config["unet"]["training"]["lr_scheduler"]["factor"],
    patience=config["unet"]["training"]["lr_scheduler"]["patience"]
)

csv_logger = CSVLogger(logger_path)


# 2. Utility Functions

In this section, we define the utility functions for loading configuration, data, and creating the U-Net model.


In [None]:
# Function to create a pandas DataFrame with image paths and labels
def create_dataframe(label_0_folder: str, label_1_folder: str) -> pd.DataFrame:
    """
    Creates a DataFrame containing image paths and labels.

    Parameters:
    - label_0_folder (str): Directory containing images for label 0.
    - label_1_folder (str): Directory containing images for label 1.

    Returns:
    - pd.DataFrame: A DataFrame with columns 'image_path' and 'label'.
    """
    data = []
    for filename in os.listdir(label_0_folder):
        img_path = os.path.join(label_0_folder, filename)
        data.append((img_path, 0))

    for filename in os.listdir(label_1_folder):
        img_path = os.path.join(label_1_folder, filename)
        data.append((img_path, 1))

    df = pd.DataFrame(data, columns=["image_path", "label"])
    return df

# Function to load and preprocess an image
def load_and_preprocess_image(image_path: tf.Tensor, label: int) -> tuple:
    """
    Loads and preprocesses an image from a file path, normalizing it and adding a channel dimension if needed.

    Parameters:
    - image_path (tf.Tensor): Tensor containing the path to the image.
    - label (int): Label associated with the image.

    Returns:
    - tuple: The preprocessed image and its label.
    """
    def _load_image(image_path):
        image = np.load(image_path.decode('utf-8')).astype(np.float32)
        # Normalize image values from [-1, 1] to [0, 1]
        image = (image + 1) / 2.0
        if image.ndim == 2:  # If the image is grayscale
            image = np.expand_dims(image, axis=-1)  # Add channel dimension
        return image

    image = tf.numpy_function(_load_image, [image_path], tf.float32)
    image.set_shape([32, 32, 1])  # Explicitly set shape for TensorFlow compatibility
    return image, image

# 3. U-Net Model Creation

In this section, we define the U-Net model with its encoder and decoder blocks.


In [None]:
def unet(input_shape: tuple[int, int, int]) -> Model:
    """
    Builds a U-Net model for image segmentation.

    Parameters:
    - input_shape (tuple[int, int, int]): Shape of the input image (height, width, channels).

    Returns:
    - Model: A compiled U-Net model.
    """
    inputs = Input(shape=input_shape)

    # Encoder
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)

    # Decoder
    up4 = UpSampling2D(size=(2, 2))(conv3)
    up4 = Conv2D(64, (2, 2), activation='relu', padding='same')(up4)
    up4 = Concatenate()([up4, conv2])

    conv4 = Conv2D(64, (3, 3), activation='relu', padding='same')(up4)
    conv4 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv4)

    up5 = UpSampling2D(size=(2, 2))(conv4)
    up5 = Conv2D(32, (2, 2), activation='relu', padding='same')(up5)
    up5 = Concatenate()([up5, conv1])

    conv5 = Conv2D(32, (3, 3), activation='relu', padding='same')(up5)
    conv5 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv5)

    outputs = Conv2D(1, (1, 1), activation='sigmoid')(conv5)

    # Create model
    model = Model(inputs=[inputs], outputs=[outputs])
    return model


# 4. Dataset Creation

In this section, we create the training, validation, and test datasets from the previously created dataframe.


In [None]:
# Creation of the pandas DataFrame with image paths and labels
df = create_dataframe(label_0_folder, label_1_folder)
df["label"] = df["label"].astype(np.float32)  # Convert labels to float32 for compatibility with TensorFlow

# Split into training, validation, and test sets
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Function to create a TensorFlow dataset from a DataFrame
def create_tf_dataset(df: pd.DataFrame, batch_size: int, autotune: int) -> tf.data.Dataset:
    """
    Converts a DataFrame containing image paths and labels into a TensorFlow dataset.

    Parameters:
    - df (pd.DataFrame): DataFrame with columns 'image_path' and 'label'.
    - batch_size (int): Number of samples per batch.
    - autotune (int): Number of parallel calls to optimize performance.

    Returns:
    - tf.data.Dataset: A batched and prefetched TensorFlow dataset ready for training.
    """
    ds = tf.data.Dataset.from_tensor_slices((df["image_path"].values, df["label"].values))
    ds = ds.map(load_and_preprocess_image, num_parallel_calls=autotune)
    ds = ds.batch(batch_size).prefetch(autotune)
    return ds

# Create TensorFlow datasets for training, validation, and testing
train_ds = create_tf_dataset(train_df, BATCH_SIZE, AUTOTUNE)
val_ds = create_tf_dataset(val_df, BATCH_SIZE, AUTOTUNE)
test_ds = create_tf_dataset(test_df, 1, AUTOTUNE)  # Batch size of 1 for test set


# 5. Model Training and Evaluation

We will now train the U-Net model and evaluate it on the test data.


In [None]:
def train_unet_model(model: Model, train_ds: tf.data.Dataset, val_ds: tf.data.Dataset, 
                     epochs: int, optimizer: tf.keras.optimizers.Optimizer, 
                     loss: str, metrics: list[str], callbacks: list[tf.keras.callbacks.Callback]) -> tf.keras.callbacks.History:
    """
    Compiles and trains a U-Net model.

    Parameters:
    - model (Model): The U-Net model to train.
    - train_ds (tf.data.Dataset): The training dataset.
    - val_ds (tf.data.Dataset): The validation dataset.
    - epochs (int): Number of epochs to train the model.
    - optimizer (tf.keras.optimizers.Optimizer): The optimizer to use for training.
    - loss (str): The loss function to use for training.
    - metrics (list[str]): A list of metrics to evaluate during training.
    - callbacks (list[tf.keras.callbacks.Callback]): A list of callbacks to use during training.

    Returns:
    - tf.keras.callbacks.History: The history object containing the training details.
    """
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

    # Train the model
    history = model.fit(
        train_ds,
        epochs=epochs,
        validation_data=val_ds,
        callbacks=callbacks
    )

    return history

# Model generation and compilation
model = unet(INPUT_SHAPE)

# Train the U-Net model using the defined function
history = train_unet_model(
    model=model,
    train_ds=train_ds,
    val_ds=val_ds,
    epochs=EPOCHS,
    optimizer=OPTIMIZER,
    loss=LOSS,
    metrics=METRICS,
    callbacks=[early_stopping, model_checkpoint, reduce_lr, csv_logger]
)


Epoch 1/30


2024-07-24 18:35:05.374863: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [128000]
	 [[{{node Placeholder/_1}}]]
2024-07-24 18:35:05.375038: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [128000]
	 [[{{node Placeholder/_1}}]]
2024-07-24 18:35:05.976153: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600
2024-07-24 18:35:06.054256: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory




2024-07-24 19:17:36.344561: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [16000]
	 [[{{node Placeholder/_1}}]]
2024-07-24 19:17:36.344969: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [16000]
	 [[{{node Placeholder/_1}}]]


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


# 7. Test Evaluation and Saving Results

Evaluate the model on the test set and save the results.


In [None]:
def evaluate_and_save_results(model: Model, test_ds: tf.data.Dataset, metrics: list[str], results_dir: str) -> None:
    """
    Evaluates the model on the test dataset and saves the results to a file.

    Parameters:
    - model (Model): The trained model to evaluate.
    - test_ds (tf.data.Dataset): The test dataset.
    - metrics (list[str]): A list of metrics to include in the evaluation.
    - results_dir (str): Directory to save the evaluation results.
    """
    # Evaluate the model on the test dataset
    test_results = model.evaluate(test_ds)

    # Print the results to the console
    print("Test Loss:", test_results[0])
    print("Test Accuracy:", test_results[1])

    # Save the results to a text file
    test_results_path = os.path.join(results_dir, 'test_results_u-net_32x32_gadf.txt')
    with open(test_results_path, 'w') as f:
        f.write(f"Test Loss: {test_results[0]}\n")
        for i, metric in enumerate(metrics):
            f.write(f"Test {metric}: {test_results[i + 1]}\n")

# Evaluate the model and save the results
evaluate_and_save_results(model, test_ds, METRICS, results_dir)

2024-07-24 20:21:01.362260: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [16000]
	 [[{{node Placeholder/_1}}]]
2024-07-24 20:21:01.362473: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [16000]
	 [[{{node Placeholder/_1}}]]


Test Loss: 9.261943887395319e-06
Test Accuracy: 9.261943887395319e-06


# 6. Model Saving

After training, we save the trained model.


In [None]:
def save_model(model: Model, model_save_path: str) -> None:
    """
    Saves the trained model to the specified path.

    Parameters:
    - model (Model): The trained model to save.
    - model_save_path (str): Path where the model will be saved.
    """
    # Save the model to the specified path
    model.save(model_save_path)
    print(f"Model saved to {model_save_path}")

# Save the trained model
save_model(model, model_save_path)

Model saved to models/u-net_32x32.h5
