Imports
-------

In [7]:
from typing import List, Tuple, Union
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models
import sklearn.model_selection as sk

Data generation
---------------

In [95]:
def generate_gaussian_data(num_samples, input_length, num_gaussian=(1, 5), amplitude_range=(1, 5), center_range=(32, 96), width_range=(5, 20)):
    """
    Generate a dataset of Gaussian curves, with a variable or fixed number of peaks per slice.

    Parameters
    ----------
    num_samples : int
        Number of slices to generate.
    input_length : int
        Length of each slice.
    num_gaussian : int or tuple of int
        If int, fixed number of Gaussian peaks per slice.
        If tuple, (min_peaks, max_peaks), a random number of peaks per slice within this range.
    amplitude_range : tuple of float
        Range of amplitudes for the Gaussian peaks.
    center_range : tuple of int
        Range of center positions for the Gaussian peaks.
    width_range : tuple of float
        Range of standard deviations (widths) for the Gaussian peaks.

    Returns
    -------
    X_train : numpy.ndarray
        Array of slices with Gaussian peaks, shape (num_samples, input_length, 1).
    y_train : numpy.ndarray
        Array of maximum values for each peak in every slice, shape (num_samples, max_peaks).
    num_peaks : numpy.ndarray
        Array with the number of peaks in each slice, shape (num_samples,).
    peak_positions : numpy.ndarray
        Array with the positions of peaks in each slice, shape (num_samples, max_peaks).
        Unused entries are filled with zeros.
    """
    if isinstance(num_gaussian, tuple):
        min_peaks, max_peaks = num_gaussian
    else:
        min_peaks = max_peaks = num_gaussian

    max_peaks = max(max_peaks, 1)  # Ensure at least one peak
    X_train = np.zeros((num_samples, input_length, 1))
    amplitudes = np.zeros((num_samples, max_peaks))  # Store amplitudes of peaks
    num_peaks = np.zeros(num_samples, dtype=int)  # Store the number of peaks per slice
    peak_positions = np.zeros((num_samples, max_peaks))  # Store the positions of peaks

    x = np.linspace(0, input_length - 1, input_length)

    for i in range(num_samples):
        slice_curve = np.zeros(input_length)  # Initialize the slice

        # Determine the number of peaks for this slice
        num_peaks[i] = np.random.randint(min_peaks, max_peaks + 1)
        peak_amplitudes = []
        peak_centers = []

        for _ in range(num_peaks[i]):
            amplitude = np.random.uniform(*amplitude_range)
            center = np.random.uniform(*center_range)
            width = np.random.uniform(*width_range)

            # Gaussian curve: y = A * exp(-((x - center)^2) / (2 * width^2))
            gaussian_curve = amplitude * np.exp(-((x - center) ** 2) / (2 * width ** 2))
            slice_curve += gaussian_curve  # Add the Gaussian peak to the slice
            peak_amplitudes.append(amplitude)
            peak_centers.append(center)

        # Update the arrays
        X_train[i, :, 0] = slice_curve
        amplitudes[i, :num_peaks[i]] = sorted(peak_amplitudes, reverse=True)
        peak_positions[i, :num_peaks[i]] = peak_centers

    return X_train, amplitudes, num_peaks, peak_positions

Model generation
----------------

In [None]:
def build_model(input_length):
    """
    Build a model that predicts both the maximum value and the position of the maximum value
    from the input signal.

    Parameters
    ----------
    input_length : int
        Length of the input signal (e.g., 128).

    Returns
    -------
    tensorflow.keras.Model
        The constructed Keras model.
    """
    # Define the input layer
    input_layer = layers.Input(shape=(input_length, 1))

    # Apply several convolutional layers to extract features
    x = layers.Conv1D(filters=32, kernel_size=3, activation="relu", padding="same")(input_layer)
    x = layers.BatchNormalization()(x)
    x = layers.Conv1D(filters=64, kernel_size=3, activation="relu", padding="same")(x)
    x = layers.Conv1D(filters=128, kernel_size=3, activation="relu", padding="same")(x)

    # Flatten the features to feed into dense layers
    x = layers.Flatten()(x)

    # Shared dense layers
    x = layers.Dense(64, activation="relu")(x)
    x = layers.Dense(32, activation="relu")(x)

    num_peaks_dense = layers.Dense(32, activation="relu")(x)  # For max value

    num_peaks_output = layers.Dense(1, activation="linear", name="num_peaks")(num_peaks_dense)

    model = models.Model(inputs=input_layer, outputs=[num_peaks_output])

    return model

Utils
-----

In [96]:
def plot_model_performance(history, model, validation_data, num_examples=5):
    """
    Plot training history and visualize validation cases for a trained model.

    Parameters
    ----------
    history : tensorflow.keras.callbacks.History
        The training history object from model.fit().
    model : tensorflow.keras.Model
        The trained Keras model.
    validation_data : tuple
        Tuple containing validation inputs and expected outputs:
        (X_val, y_val).
    num_examples : int, optional
        Number of validation cases to visualize. Default is 5.
    """
    plt.close('all')
    
    # Unpack validation data
    X_val, y_val = validation_data

    # Visualize validation cases in subplots
    num_rows = (num_examples + 1)

    fig, axes = plt.subplots(num_rows, 1, figsize=(12, num_rows * 3))

    ax = axes[0]
    ax.plot(history.history['loss'], label='Training Loss')
    ax.plot(history.history['val_loss'], label='Validation Loss')
    ax.set_xlabel('Epochs')
    ax.set_ylabel('Loss')

    indices = np.random.choice(len(X_val), num_examples, replace=False)
    for ax, (idx, _) in zip(axes[1:], enumerate(indices)):
        input_signal = X_val[idx, :, 0]
        
        true_num_peaks = y_val[idx]
        predicted_num_peaks = model.predict(X_val[idx:idx + 1])[0]
        
        ax.plot(input_signal, label=f"GD: {true_num_peaks} -- Pred: {predicted_num_peaks}")

        ax.legend()

    plt.tight_layout()
    plt.show()

Script
------

In [94]:
# Generate data with up to 5 peaks per slice
gaussian_slices, amplitudes, num_peaks, positions = generate_gaussian_data(
    num_samples=300,
    input_length=128,
    num_gaussian=(1, 2),
    amplitude_range=(1, 5),
    center_range=(20, 100),
    width_range=(5, 5)
)

slices_train, slices_validation, amplitudes_train, amplitudes_validation, num_peaks_train, num_peaks_validation, positions_train, positions_validation = sk.train_test_split(
    gaussian_slices, amplitudes, num_peaks, positions, test_size=0.2, random_state=42
)


model = build_model(input_length=128)


model.compile(
    optimizer='adam',
    loss={'num_peaks': 'mse',},
    metrics={'num_peaks': ['mae'],}
)

history = model.fit(
    slices_train,
    num_peaks_train,
    validation_data=(slices_validation, num_peaks_validation),
    epochs=120,
    batch_size=16
)

# %matplotlib qt
plot_model_performance(history, model, (slices_validation, num_peaks_validation) , num_examples=5)

Epoch 1/120
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 2.0088 - mae: 1.0217 - val_loss: 1.9137 - val_mae: 1.3050
Epoch 2/120
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.1634 - mae: 0.3333 - val_loss: 1.8301 - val_mae: 1.2751
Epoch 3/120
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.1221 - mae: 0.2820 - val_loss: 1.8163 - val_mae: 1.2701
Epoch 4/120
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.1241 - mae: 0.2867 - val_loss: 1.5969 - val_mae: 1.1845
Epoch 5/120
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0955 - mae: 0.2466 - val_loss: 1.5343 - val_mae: 1.1584
Epoch 6/120
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0724 - mae: 0.2118 - val_loss: 1.4638 - val_mae: 1.1292
Epoch 7/120
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: