## Modello

In [None]:
from keras import layers
import tensorflow as tf


class SpatialAttention(layers.Layer):
    """
    A custom Keras layer implementing a more attentive spatial attention mechanism.
    It now includes an intermediate convolutional layer to learn richer attention features.
    """

    def __init__(self, kernel_size=7, **kwargs):
        super(SpatialAttention, self).__init__(**kwargs)
        self.kernel_size = kernel_size

        # Intermediate conv layer to learn more complex features from concatenated avg/max pools
        # More filters (e.g., 8 or 16) for more capacity, smaller kernel for local patterns.
        self.intermediate_conv = layers.Conv2D(
            filters=32,  # Increased filters for more "intelligence"
            kernel_size=5,  # Small kernel for local feature extraction within attention
            padding='same',
            activation='relu',  # Added ReLU activation for non-linearity
            name='attention_intermediate_conv'
        )
        # Final conv layer to produce the single-channel attention map
        self.final_conv = layers.Conv2D(
            filters=1,
            kernel_size=self.kernel_size,  # Use the initial kernel_size for the final aggregation
            padding='same',
            activation='sigmoid',  # Sigmoid to output values between 0 and 1
            use_bias=False,
            name='attention_final_conv'
        )

    def build(self, input_shape):
        # Keras will automatically build internal layers like self.intermediate_conv and self.final_conv
        super(SpatialAttention, self).build(input_shape)

    def call(self, inputs):
        """
        Applies spatial attention to the input feature map.
        The attention map is now generated by a slightly deeper sub-network.
        """
        # Compute average and max pool along the channel axis
        avg_pool = tf.reduce_mean(inputs, axis=-1, keepdims=True)
        max_pool = tf.reduce_max(inputs, axis=-1, keepdims=True)
        concat = layers.concatenate([avg_pool, max_pool], axis=-1, name='attention_concat_pools')

        # Process through intermediate conv layer
        x = self.intermediate_conv(concat)
        # Apply final conv to get the attention map
        attention_map = self.final_conv(x)

        return inputs * attention_map

    def get_config(self):
        config = super(SpatialAttention, self).get_config()
        config.update({"kernel_size": self.kernel_size})
        return config

In [None]:
import tensorflow as tf
from keras import layers, regularizers, models

from src.Models.AttentionLayer import \
    SpatialAttention  # Assuming this import path is correct and SpatialAttention is defined elsewhere


# --- Age Prediction Model with Attention ---
class AgePredictionModel:
    """
    Represents a deep learning model for automated bone age prediction from hand radiographs.

    This model employs a multi-layered Convolutional Neural Network (CNN) backbone
    for feature extraction, integrated with a custom spatial attention mechanism,
    and a robust fully connected regression head. It is designed to be gender-agnostic,
    relying solely on image-derived features for prediction.
    """

    def __init__(self, img_size=(128, 128)):
        """
        Initializes the AgePredictionModel.

        Args:
            img_size (tuple, optional): The target dimensions (height, width) for input images.
                                        Defaults to (128, 128). This should match the size
                                        used in preprocessing.
        """
        self.img_size = img_size
        # Build the Keras model graph immediately upon initialization
        self.model = self._build_model()

    def _create_cnn_branch(self, input_tensor: tf.Tensor, name_prefix: str) -> tf.Tensor:
        """
        Constructs a single Convolutional Neural Network (CNN) branch for feature extraction.

        This branch consists of five sequential blocks, each typically comprising two
        convolutional layers, Batch Normalization, ReLU activation, and MaxPooling.
        The number of filters progressively increases with depth.

        Args:
            input_tensor (tf.Tensor): The input tensor to the CNN branch (e.g., image input).
            name_prefix (str): A prefix for naming the layers within this branch
                               to ensure uniqueness (e.g., 'prep', 'raw', 'extr').

        Returns:
            tf.Tensor: The output tensor from the final MaxPooling layer of this CNN branch,
                       representing extracted spatial features.
        """
        # Block 1: Conv -> BN -> ReLU -> Conv -> BN -> ReLU -> MaxPool
        # Captures initial low-level features
        x = layers.Conv2D(32, (3, 3), padding='same',
                          kernel_regularizer=regularizers.l2(1e-4), name=f'{name_prefix}_conv1a')(input_tensor)
        x = layers.BatchNormalization(name=f'{name_prefix}_bn1a')(x)
        x = layers.Activation('relu', name=f'{name_prefix}_relu1a')(x)
        x = layers.Conv2D(32, (3, 3), padding='same',
                          kernel_regularizer=regularizers.l2(1e-4), name=f'{name_prefix}_conv1b')(x)
        x = layers.BatchNormalization(name=f'{name_prefix}_bn1b')(x)
        x = layers.Activation('relu', name=f'{name_prefix}_relu1b')(x)
        x = layers.MaxPooling2D((2, 2), strides=(2, 2), name=f'{name_prefix}_pool1')(x)

        # Block 2: Increase filters, capture more complex features
        x = layers.Conv2D(64, (3, 3), padding='same',
                          kernel_regularizer=regularizers.l2(1e-4), name=f'{name_prefix}_conv2a')(x)
        x = layers.BatchNormalization(name=f'{name_prefix}_bn2a')(x)
        x = layers.Activation('relu', name=f'{name_prefix}_relu2a')(x)
        x = layers.Conv2D(64, (3, 3), padding='same',
                          kernel_regularizer=regularizers.l2(1e-4), name=f'{name_prefix}_conv2b')(x)
        x = layers.BatchNormalization(name=f'{name_prefix}_bn2b')(x)
        x = layers.Activation('relu', name=f'{name_prefix}_relu2b')(x)
        x = layers.MaxPooling2D((2, 2), strides=(2, 2), name=f'{name_prefix}_pool2')(x)

        # Block 3: Further increase filters for higher-level feature abstraction
        x = layers.Conv2D(128, (3, 3), padding='same',
                          kernel_regularizer=regularizers.l2(1e-4), name=f'{name_prefix}_conv3a')(x)
        x = layers.BatchNormalization(name=f'{name_prefix}_bn3a')(x)
        x = layers.Activation('relu', name=f'{name_prefix}_relu3a')(x)
        x = layers.Conv2D(128, (3, 3), padding='same',
                          kernel_regularizer=regularizers.l2(1e-4), name=f'{name_prefix}_conv3b')(x)
        x = layers.BatchNormalization(name=f'{name_prefix}_bn3b')(x)
        x = layers.Activation('relu', name=f'{name_prefix}_relu3b')(x)
        x = layers.MaxPooling2D((2, 2), strides=(2, 2), name=f'{name_prefix}_pool3')(x)

        # Block 4: Continue increasing filter depth
        x = layers.Conv2D(256, (3, 3), padding='same',
                          kernel_regularizer=regularizers.l2(1e-4), name=f'{name_prefix}_conv4a')(x)
        x = layers.BatchNormalization(name=f'{name_prefix}_bn4a')(x)
        x = layers.Activation('relu', name=f'{name_prefix}_relu4a')(x)
        x = layers.Conv2D(256, (3, 3), padding='same',
                          kernel_regularizer=regularizers.l2(1e-4), name=f'{name_prefix}_conv4b')(x)
        x = layers.BatchNormalization(name=f'{name_prefix}_bn4b')(x)
        x = layers.Activation('relu', name=f'{name_prefix}_relu4b')(x)
        x = layers.MaxPooling2D((2, 2), strides=(2, 2), name=f'{name_prefix}_pool4')(x)

        # Block 5: Final convolutional block in the backbone
        # Note: Filter count adjusted to 512 in the theoretical write-up for deeper abstraction,
        # but kept at 256 here based on provided code's last working state.
        # If input size is 256x256, after 5 pools, spatial dim becomes 8x8.
        x = layers.Conv2D(256, (3, 3), padding='same', name='conv5a',
                          kernel_regularizer=regularizers.l2(1e-4))(x)  # Added L2 regularization for consistency
        x = layers.BatchNormalization(name='bn5a')(x)
        x = layers.Activation('relu', name='relu5a')(x)
        x = layers.Conv2D(256, (3, 3), padding='same', name='conv5b',
                          kernel_regularizer=regularizers.l2(1e-4))(x)  # Added L2 regularization for consistency
        x = layers.BatchNormalization(name='bn5b')(x)
        x = layers.Activation('relu', name='relu5b')(x)
        x = layers.MaxPooling2D((2, 2), name='pool5')(x)

        return x

    def _build_model(self) -> models.Model:
        """
        Constructs the complete Keras model for bone age prediction.

        The model integrates a CNN backbone for feature extraction,
        a Spatial Attention layer, and a multi-layered regression head.

        Returns:
            tf.keras.Model: The compiled Keras Model instance.
        """
        # Define the input layer for preprocessed images.
        # The input shape is (height, width, channels), where channels=1 for grayscale.
        prep_input = layers.Input(shape=(*self.img_size, 1), name='prep_input')

        # Create the CNN branch for feature extraction from the preprocessed input.
        prep_features = self._create_cnn_branch(prep_input, 'prep')

        # Apply the custom Spatial Attention mechanism to the extracted features.
        # This layer selectively re-weights spatial regions, focusing on diagnostically
        # relevant areas of the radiograph.
        attended_prep_features = SpatialAttention(name='attention_prep')(prep_features)

        # Flatten the attended features to prepare for the fully connected layers.
        x = layers.Flatten(name='flatten_features')(attended_prep_features)

        # --- Regression Head: Fully Connected Layers for Age Prediction ---
        # Dense Layer 1: Processes the flattened features.
        # Followed by Batch Normalization and Dropout for regularization.
        x = layers.Dense(512, activation='relu', name='dense1',
                         kernel_regularizer=regularizers.l2(1e-4))(x)
        x = layers.BatchNormalization(name='bn_dense1')(x)
        x = layers.Dropout(0.4, name='dropout1')(x)

        # Dense Layer 2: Further refines the features.
        # Includes Batch Normalization and Dropout.
        x = layers.Dense(256, activation='relu', name='dense2',
                         kernel_regularizer=regularizers.l2(1e-4))(x)
        x = layers.BatchNormalization(name='bn_dense2')(x)
        x = layers.Dropout(0.4, name='dropout2')(x)

        # Dense Layer 3: Adds more complexity to the mapping.
        # Also includes Batch Normalization and Dropout.
        x = layers.Dense(128, activation='relu', name='dense3',
                         kernel_regularizer=regularizers.l2(1e-4))(x)
        x = layers.BatchNormalization(name='bn_dense3')(x)
        x = layers.Dropout(0.3, name='dropout3')(x)

        # Final Output Layer: Predicts the bone age in months.
        # 'linear' activation allows for any real value output.
        # A subsequent 'relu' activation is applied to ensure predictions are non-negative.
        output_linear = layers.Dense(1, name='age_output_linear',
                                     kernel_regularizer=regularizers.l2(1e-4))(x)  # Added L2 for consistency
        # Force predictions to be non-negative (bone age cannot be < 0)
        output = layers.Activation('relu', name='age_output_relu')(output_linear)

        # Create the Keras Model instance, defining its inputs and outputs.
        model = models.Model(inputs=prep_input, outputs=output, name='AgePredictionModel')
        return model

    def compile_model(self, learning_rate: float = 0.0005):
        """
        Compiles the Keras model with a specified optimizer, loss function, and metrics.

        Args:
            learning_rate (float, optional): The initial learning rate for the Adam optimizer.
                                             Defaults to 0.0005.
        """
        # Use Adam optimizer for efficient training with adaptive learning rates.
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

        # Compile the model specifying Mean Absolute Error (MAE) as the loss function
        # (directly interpretable as error in months) and also track it as a metric.
        self.model.compile(optimizer=optimizer, loss='mae', metrics=['mae'])

In [None]:
import tensorflow as tf
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from typing import Optional, Tuple  # Import for type hinting


def train_model(model: tf.keras.Model,
                train_dataset: tf.data.Dataset,
                epochs: int = 100,
                validation_dataset: Optional[tf.data.Dataset] = None,
                model_save_path: str = 'best_age_prediction_model.keras') -> Tuple[
    tf.keras.Model, tf.keras.callbacks.History]:
    """
    Trains the provided Keras model using specified datasets and callbacks.

    This function orchestrates the training process, including saving the best model,
    implementing early stopping to prevent overfitting, and dynamically adjusting
    the learning rate during training based on validation performance.

    Args:
        model (tf.keras.Model): The already compiled Keras model to be trained.
        train_dataset (tf.data.Dataset): The TensorFlow Dataset for training.
                                         It should yield (input_data, labels) tuples,
                                         where input_data matches the model's input
                                         (e.g., preprocessed images).
        epochs (int, optional): The maximum number of training epochs. Training
                                may stop earlier due to EarlyStopping. Defaults to 50.
        validation_dataset (tf.data.Dataset, optional): The TensorFlow Dataset for validation.
                                                        Used to monitor performance and guide
                                                        EarlyStopping and ReduceLROnPlateau.
                                                        Should have the same structure as `train_dataset`.
                                                        Defaults to None.
        model_save_path (str, optional): The file path where the best performing model
                                         (based on validation MAE) will be saved.
                                         Defaults to 'best_age_prediction_model.keras'.

    Returns:
        Tuple[tf.keras.Model, tf.keras.callbacks.History]: A tuple containing:
            - tf.keras.Model: The trained model instance. Its weights will be restored
                              to the best observed performance during training if EarlyStopping
                              with `restore_best_weights=True` is used.
            - tf.keras.callbacks.History: An object containing the history of loss and
                                           metric values during training.
    """
    print(f"\nStarting model training for {epochs} epochs...")

    # Define Keras Callbacks for enhanced training control and regularization.
    callbacks = [
        # ModelCheckpoint: Saves the best model weights observed during training.
        # It monitors 'val_mae' (Mean Absolute Error on validation set) and saves
        # only if the monitored metric improves (mode='min' for MAE).
        ModelCheckpoint(
            filepath=model_save_path,
            monitor='val_mae' if validation_dataset else 'mae',
            # Monitor validation MAE if validation set exists, else training MAE
            save_best_only=True,  # Only save the model when validation MAE improves
            mode='min',  # 'min' mode means lower is better for the monitored metric (MAE)
            verbose=1  # Display messages when a better model is saved
        ),
        # EarlyStopping: Halts training if the monitored metric does not improve
        # for a specified number of epochs (patience). This prevents overfitting.
        EarlyStopping(
            monitor='val_mae' if validation_dataset else 'mae',  # Monitor validation MAE
            patience=10,  # Number of epochs with no improvement after which training will be stopped
            mode='min',  # 'min' mode for MAE
            verbose=1,  # Display messages when early stopping is triggered
            restore_best_weights=True  # Restore model weights from the epoch with the best monitored value
        ),
        # ReduceLROnPlateau: Dynamically reduces the learning rate when a metric
        # has stopped improving. This helps the model to converge more finely.
        ReduceLROnPlateau(
            monitor='val_mae' if validation_dataset else 'mae',  # Monitor validation MAE
            factor=0.5,  # Factor by which the learning rate will be reduced (new_lr = lr * factor)
            patience=5,  # Number of epochs with no improvement after which the learning rate will be reduced
            min_lr=1e-6,  # Lower bound on the learning rate
            mode='min',  # 'min' mode for MAE
            verbose=1  # Display messages when learning rate is reduced
        )
    ]

    # Start the model training process.
    # The 'initial_epoch=5' parameter means training will start from epoch 5.
    # This might be useful if resuming training and wanting to skip initial already-converged epochs
    # or to allow callbacks to become active after a few epochs.
    # If resuming from a loaded model, Keras automatically handles the initial epoch correctly.
    # It's important to ensure this parameter aligns with the overall training strategy.
    history = model.fit(
        train_dataset,
        epochs=epochs,
        validation_data=validation_dataset,
        callbacks=callbacks,
        initial_epoch=0
    )
    print("\nModel training completed.")
    return model, history

In [1]:
# IPERPARAMETRI:
learning_rate = 0.0005
img_sizes = 256
epochs = 100


model_builder = AgePredictionModel(img_size=(img_sizes, img_sizes))
# Compile the newly created model with the specified learning rate.
model_builder.compile_model(learning_rate=learning_rate)
model_to_train = model_builder.model
print("\nCreating and compiling a new model for training.")

# --- Model Training Execution ---
print("\nStarting training:")
try:
    # Call the core `train_model` function to execute the training loop.
    # This function handles epochs, validation, and callbacks (e.g., ModelCheckpoint, EarlyStopping).
    trained_model, history = train_model(
        model=model_to_train,  # The model to be trained
        train_dataset=train_dataset,
        validation_dataset=val_dataset,
        epochs=epochs,
        model_save_path=model_save_path
    )
    # Plot the training and validation loss/metrics history.
    #plot_training_history(history, save_path='training_history_plots.png')

    print(f"Training history keys: {history.history.keys()}")
    print(f"Model trained and saved to '{model_save_path}'.")
    return trained_model  # Return the trained model instance
except Exception as e:
    # Catch any exceptions during training and print a detailed traceback.
    print(f"Error during model training: {e}")
    import traceback
    traceback.print_exc()  # Print the full stack trace for debugging
    return None  # Indicate that training failed by returning None

NameError: name 'AgePredictionModel' is not defined