### _Setup_

In [None]:
# Reset memory
%reset -f

In [None]:
# Install correct package versions
!pip install "tensorflow[and-cuda]"
!pip uninstall numpy pandas -y
!pip install "numpy<2.0" pandas --upgrade --no-cache-dir

In [None]:
# Packages
from typing import Union, List, Tuple, Dict, Any
import time
import warnings
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, StratifiedKFold, StratifiedShuffleSplit, train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, initializers, optimizers, callbacks
from tensorflow.keras.layers import Layer
import tensorflow_probability as tfp
import optuna
import matplotlib.pyplot as plt

tfpl = tfp.layers
tfd = tfp.distributions

In [None]:
# GPU check
print("Available GPUs:", tf.config.list_physical_devices('GPU'))

In [None]:
# Data
df = pd.read_csv('data.csv')

### _Functions_

In [None]:
def find_col_index_of_spectra(
    df: pd.DataFrame
) -> int:
    """
    Find the column index where spectral data starts.

    Assumes spectral column names can be converted to float (e.g., "730.5", "731.0").

    Parameters:
        df : Input DataFrame

    Returns:
        Index of the first spectral column, or -1 if not found.
    """
    for idx, col in enumerate(df.columns):
        try:
            float(col)
            return idx
        except (ValueError, TypeError):
            continue
    return -1

def split_train_test(
    df: pd.DataFrame,
    test_variety: str,
    test_season: int       
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Split a DataFrame into one training set and two test sets:

    - Variety test set: Variety == test_variety AND Year == 2024
    - Season test set : Year == test_season 

    The training set excludes all rows that belong to any of the test sets.
    The season test set only includes varieties that are present in the training set.

    Parameters:
        df           : Full pandas DataFrame
        test_variety : Variety used for the test set
        test_season  : Year used for the season test

    Returns:
        df_train        : Training set
        df_test_variety : Test set for specified variety and 2024
        df_test_season  : Test set for specified season (filtered by train varieties)
    """

    # Select test set for the specified variety in year 2024
    df_test_variety = df[
        (df["Variety"] == test_variety) &
        (df["Scan Date Year"] == 2024)
    ]

    # Select test set for the specified season (regardless of variety)
    df_test_season = df[
        df["Scan Date Year"] == test_season
    ]

    # Select training set (exclude test variety and test season)
    df_train = df[
        (df["Variety"] != test_variety) &
        (df["Scan Date Year"] != test_season)
    ]

    # Filter season test set to only include varieties present in training set
    train_varieties = df_train["Variety"].unique()
    df_test_season = df_test_season[
        df_test_season["Variety"].isin(train_varieties)
    ]

    return df_train, df_test_variety, df_test_season

def split_x_y(
    df: pd.DataFrame,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Split a DataFrame into X (spectral features) and y (target) arrays.
    Assumes find_col_index_of_spectra() is defined globally and returns the index
    where spectral data starts.

    Parameters:
        df : Input DataFrame containing both metadata and spectral data.

    Returns:
        x : NumPy array of shape (n_samples, n_spectral_features)
        y : NumPy array of shape (n_samples, 1) containing Brix values
    """
    # Identify spectral columns (those that can be cast to float, e.g. wavelengths)
    spectra_cols = list(df.columns[find_col_index_of_spectra(df):])

    # Define the target column
    target_cols = ['Brix (Position)']

    # Extract feature and target arrays
    x = df[spectra_cols].values
    y = df[target_cols].values

    return x, y

def take_subset(
    df: pd.DataFrame, 
    n_subset: int,
    random_state: int
) -> pd.DataFrame:
    """
    Return a stratified subset of the DataFrame based on 10 Brix bins.

    If n_subset >= len(df), the original DataFrame is returned.

    Parameters:
        df       : Input DataFrame with 'Brix (Position)' column
        n_subset : Desired subset size
        random_state : Random seed for reproducibility

    Returns:
        Subset of df with stratification over 10 quantile bins of Brix
    """
    # If requested subset size exceeds full dataset, return a copy of the full DataFrame
    if n_subset >= len(df):
        return df.copy()

    # Bin the Brix values into 10 quantile-based bins for stratification
    binned = pd.qcut(df["Brix (Position)"], q=10, labels=False, duplicates='drop')

    # Initialize stratified sampler
    splitter = StratifiedShuffleSplit(
        n_splits=1,
        train_size=n_subset,
        random_state=random_state
    )

    # Perform stratified split and extract subset indices
    idx_subset, _ = next(splitter.split(df, binned))

    # Return the stratified subset as a new DataFrame with reset index
    return df.iloc[idx_subset].reset_index(drop=True)

def create_train_val_split(
    df: pd.DataFrame,
    validation_size: float,
    random_state: int
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Split a DataFrame into train and validation sets using stratified sampling
    based on 10 quantile bins of the 'Brix (Position)' column.

    Parameters:
        df              : Input DataFrame
        validation_size : Proportion of validation samples (0 < float < 1)
        random_state    : Seed for reproducibility

    Returns:
        df_train, df_val : Stratified training and validation DataFrames
    """
    # Bin the Brix values into 10 quantile-based bins for stratified splitting
    binned = pd.qcut(df["Brix (Position)"], q=10, labels=False, duplicates="drop")

    # Perform stratified train/validation split based on the binned Brix values
    df_train, df_val = train_test_split(
        df,
        test_size=validation_size,
        random_state=random_state,
        stratify=binned
    )

    # Return splits with reset indices
    return df_train.reset_index(drop=True), df_val.reset_index(drop=True)

def rmse_loss(
    y_true, 
    y_pred
):
    """
    Compute the Root Mean Squared Error (RMSE) as a loss function.

    Parameters:
        y_true : Tensor of true target values
        y_pred : Tensor of predicted values

    Returns:
        RMSE as a scalar Tensor
    """
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))  

def rmse_metric(
    y_true, 
    y_pred
):
    """
    Compute the Root Mean Squared Error (RMSE) as a performance metric.

    Parameters:
        y_true : Tensor of true target values
        y_pred : Tensor of predicted values

    Returns:
        RMSE as a scalar Tensor
    """
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))  

class FusedLassoPrior(tfd.Distribution):
    """
    Custom Fused Lasso prior distribution for use in Bayesian neural networks.

    Combines:
    - L1 regularization (lasso) to encourage sparsity
    - Fused penalty to encourage smoothness across adjacent coefficients
    """

    def __init__(
        self,
        shape,
        lambda_l1,
        lambda_fused,
        validate_args=False,
        allow_nan_stats=True
    ):
        """
        Initialize the fused lasso distribution.

        Parameters:
            shape           : Shape of the distribution's support
            lambda_l1       : Strength of L1 regularization
            lambda_fused    : Strength of fused (difference) penalty
            validate_args   : Whether to validate distribution arguments
            allow_nan_stats : Whether to allow NaNs in statistical outputs
        """
        super().__init__(
            dtype=tf.float32,
            reparameterization_type=tfd.NOT_REPARAMETERIZED,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats
        )
        self.lambda_l1 = lambda_l1
        self.lambda_fused = lambda_fused
        self._shape = tf.TensorShape(shape)

    def _log_prob(self, value):
        """
        Compute the unnormalized log-probability of a sample.

        Applies:
        - L1 penalty: sum of absolute values
        - Fused penalty: sum of absolute differences between adjacent elements
        """
        l1_term = tf.reduce_sum(tf.abs(value))
        fused_term = tf.reduce_sum(tf.abs(value[..., 1:] - value[..., :-1]))
        return -self.lambda_l1 * l1_term - self.lambda_fused * fused_term

    def _batch_shape(self):
        # No explicit batch shape; return total shape
        return self._shape

    def _event_shape(self):
        # Event shape is the full shape of the variable
        return self._shape

def fused_lasso_prior_fn(lambda_l1, lambda_fused):
    """
    Returns a callable prior function for use in Bayesian layers.

    This function is designed to be passed to a Bayesian layer (e.g. tfp.layers.DenseVariational),
    where it constructs a FusedLassoPrior distribution for each weight tensor.

    Parameters:
        lambda_l1    : Strength of the L1 sparsity penalty
        lambda_fused : Strength of the fused smoothness penalty

    Returns:
        fn : A callable that returns a FusedLassoPrior when given a shape
    """

    def fn(
        dtype=tf.float32,
        shape=None,
        name=None,
        trainable=True,
        add_variable_fn=None
    ):
        # Shape is required to build the prior distribution
        if shape is None:
            raise ValueError("Shape must be provided to construct the FusedLassoPrior.")
        
        # Return a FusedLassoPrior distribution for the given shape
        return FusedLassoPrior(
            shape=shape,
            lambda_l1=lambda_l1,
            lambda_fused=lambda_fused
        )

    return fn

def bcnn_model(
    input_shape: int,
    kernel_size: int,
    dropout_rate: float,
    l2_strength: float,
    learning_rate: float,
    random_state: int,
    kl_scale: float,
    lambda_l1: float,
    lambda_fused: float
) -> tf.keras.Model:
    """
    Builds and compiles a Bayesian CNN using a fused lasso prior on the convolutional weights.

    Parameters:
        input_shape   : Number of input features (spectral length).
        kernel_size   : Size of the 1D convolutional kernel.
        dropout_rate  : Dropout rate for regularization.
        l2_strength   : L2 regularization strength applied to Dense layers.
        learning_rate : Learning rate for the Adam optimizer.
        random_state  : Seed used for weight initialization.
        kl_scale      : Scale factor for KL divergence loss.
        lambda_l1     : Weight of L1 sparsity term in fused lasso prior.
        lambda_fused  : Weight of smoothness term in fused lasso prior.

    Returns:
        model : A compiled Keras model ready for training and evaluation.
    """
    # Define kernel regularizer and initializer
    kernel_reg  = regularizers.l2(l2_strength)
    kernel_init = initializers.HeNormal(seed=random_state)

    # Build sequential model
    model = models.Sequential([
        # Input layer: Reshape flat vector to (length, 1) for Conv1D
        tf.keras.Input(shape=(input_shape,)),
        layers.Reshape((input_shape, 1)),

        # Bayesian Conv1D with fused lasso prior
        tfpl.Convolution1DReparameterization(
            filters=1,
            kernel_size=kernel_size,
            padding="same",
            activation="elu",
            kernel_posterior_fn=tfpl.default_mean_field_normal_fn(),
            kernel_prior_fn=fused_lasso_prior_fn(lambda_l1=lambda_l1, lambda_fused=lambda_fused),
            kernel_divergence_fn=lambda q, p, _: tfd.kl_divergence(q, p) * kl_scale,
            bias_posterior_fn=tfpl.default_mean_field_normal_fn(is_singular=False),
            bias_prior_fn=tfpl.default_multivariate_normal_fn,
            bias_divergence_fn=lambda q, p, _: tfd.kl_divergence(q, p) * kl_scale,
        ),

        # Dropout after convolution
        layers.Dropout(dropout_rate),

        # Flatten for Dense layers
        layers.Flatten(),

        # Dense regression head with dropout and L2 regularization
        layers.Dense(36, activation="elu", kernel_initializer=kernel_init, kernel_regularizer=kernel_reg),
        layers.Dropout(dropout_rate),
        layers.Dense(18, activation="elu", kernel_initializer=kernel_init, kernel_regularizer=kernel_reg),
        layers.Dropout(dropout_rate),
        layers.Dense(12, activation="elu", kernel_initializer=kernel_init, kernel_regularizer=kernel_reg),

        # Output layer (regression target)
        layers.Dense(1, activation="linear", kernel_initializer=kernel_init, kernel_regularizer=kernel_reg),
    ])

    # Compile model with RMSE loss and metric
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss=lambda y_true, y_pred: tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true))),
        metrics=[lambda y_true, y_pred: tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))]
    )

    return model

def train_bcnn(
    x_train: np.ndarray,
    y_train: np.ndarray,
    input_shape: int,
    kernel_size: int,
    dropout_rate: float,
    l2_strength: float,
    random_state: int,
    kl_scale: float,
    batch_size: int,
    epochs: int,
    patience_reduce_lr: int,
    patience_early_stop: int,
    min_lr: float,
    x_val: np.ndarray,
    y_val: np.ndarray,
    lambda_l1: float,
    lambda_fused: float,
    verbose: int = 0
) -> Tuple[tf.keras.Model, tf.keras.callbacks.History]:
    """
    Train a Bayesian CNN (BCNN) model with fused lasso prior.

    If a validation set is provided, callbacks monitor 'val_loss'.
    Otherwise, training is done without validation and callbacks monitor 'loss'.

    Parameters:
        x_train           : Training feature matrix
        y_train           : Training target vector
        input_shape       : Number of features per input sample
        kernel_size       : Size of the convolutional kernel
        dropout_rate      : Dropout rate for regularization
        l2_strength       : L2 regularization strength for dense layers
        random_state      : Random seed for reproducibility
        kl_scale          : KL divergence scale for Bayesian regularization
        batch_size        : Number of samples per gradient update
        epochs            : Maximum number of training epochs
        patience_reduce_lr: Patience for reducing LR on plateau
        patience_early_stop: Patience for early stopping
        min_lr            : Minimum learning rate during ReduceLROnPlateau
        x_val             : Validation features (optional)
        y_val             : Validation targets (optional)
        lambda_l1         : L1 penalty coefficient in fused lasso prior
        lambda_fused      : Fused penalty coefficient in fused lasso prior
        verbose           : Keras verbosity mode (0, 1, or 2)

    Returns:
        model   : Trained BCNN Keras model
        history : Keras training history object
    """

    # Build the BCNN model using specified parameters
    model = bcnn_model(
        input_shape=input_shape,
        kernel_size=kernel_size,
        dropout_rate=dropout_rate,
        l2_strength=l2_strength,
        learning_rate=0.01 * batch_size / 256,  # Linear scaling rule
        random_state=random_state,
        kl_scale=kl_scale,
        lambda_l1=lambda_l1,
        lambda_fused=lambda_fused
    )

    # Determine monitoring target and validation data
    if x_val is not None and y_val is not None:
        monitor_metric = "val_loss"
        validation_data = (x_val, y_val)
    else:
        monitor_metric = "loss"
        validation_data = None

    # Define training callbacks
    cb = [
        callbacks.ReduceLROnPlateau(
            monitor=monitor_metric,
            factor=0.5,
            patience=patience_reduce_lr,
            min_lr=min_lr,
            verbose=0
        ),
        callbacks.EarlyStopping(
            monitor=monitor_metric,
            patience=patience_early_stop,
            restore_best_weights=True,
            verbose=0
        )
    ]

    # Fit the model
    history = model.fit(
        x_train,
        y_train,
        validation_data=validation_data,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=cb,
        verbose=verbose
    )

    return model, history

def perform_optuna_hyperparameter_optimization(
    x_train: np.ndarray,
    y_train: np.ndarray,
    x_val: np.ndarray,
    y_val: np.ndarray,
    input_shape: int,
    random_state: int,
    epochs: int,
    patience_reduce_lr: int,
    patience_early_stop: int,
    min_lr: float,
    kernel_size_range: Tuple[int, int],
    batch_size_list: list,
    dropout_range: Tuple[float, float],
    l2_range: Tuple[float, float],
    kl_range: Tuple[float, float],
    lasso_range: Tuple[float, float],
    timeout_time: float
) -> Tuple['optuna.study.Study', float, dict]:
    """
    Runs Optuna hyperparameter optimization for a Bayesian CNN with fused lasso priors.

    Parameters:
        x_train            : Training features
        y_train            : Training targets
        x_val              : Validation features
        y_val              : Validation targets
        input_shape        : Number of input features
        random_state       : Seed for reproducibility
        epochs             : Maximum number of training epochs
        patience_reduce_lr : Patience for ReduceLROnPlateau
        patience_early_stop: Patience for EarlyStopping
        min_lr             : Minimum learning rate
        kernel_size_range  : Tuple (min, max) for kernel size
        batch_size_list    : List of possible batch sizes
        dropout_range      : Tuple (min, max) for dropout rate
        l2_range           : Tuple (min, max) for L2 regularization
        lr_range           : [Unused] Learning rate is scaled from batch size
        kl_range           : Tuple (min, max) for KL divergence weight
        lasso_range        : Tuple (min, max) for lambda_l1 and lambda_fused
        timeout_time       : Maximum search time for Optuna (in seconds)

    Returns:
        study          : Optuna study object
        best_val_rmse  : Best validation RMSE found
        best_params    : Dictionary of best hyperparameters
    """

    def objective(trial):
        # Sample hyperparameters from the defined search space
        kernel_size   = trial.suggest_int("kernel_size", kernel_size_range[0], kernel_size_range[1])
        batch_size    = trial.suggest_categorical("batch_size", batch_size_list)
        dropout_rate  = trial.suggest_float("dropout_rate", dropout_range[0], dropout_range[1])
        l2_strength   = trial.suggest_float("l2_strength", l2_range[0], l2_range[1], log=True)
        kl_scale      = trial.suggest_float("kl_scale", kl_range[0], kl_range[1], log=True)
        lambda_l1     = trial.suggest_float("lambda_l1", lasso_range[0], lasso_range[1], log=True)
        lambda_fused  = trial.suggest_float("lambda_fused", lasso_range[0], lasso_range[1], log=True)

        # Use linear scaling rule for learning rate
        learning_rate = 0.01 * batch_size / 256

        # Log selected trial parameters
        print(f"\n[Optuna Trial {trial.number}] Hyperparameters:")
        print(f"  kernel_size   = {kernel_size}")
        print(f"  batch_size    = {batch_size}")
        print(f"  dropout_rate  = {dropout_rate:.4f}")
        print(f"  l2_strength   = {l2_strength:.2e}")
        print(f"  learning_rate = {learning_rate:.2e}")
        print(f"  kl_scale      = {kl_scale:.2e}")
        print(f"  lambda_l1     = {lambda_l1:.2e}")
        print(f"  lambda_fused  = {lambda_fused:.2e}")

        # Train BCNN model with sampled parameters
        model, history = train_bcnn(
            x_train=x_train,
            y_train=y_train,
            x_val=x_val,
            y_val=y_val,
            input_shape=input_shape,
            kernel_size=kernel_size,
            dropout_rate=dropout_rate,
            l2_strength=l2_strength,
            random_state=random_state,
            kl_scale=kl_scale,
            lambda_l1=lambda_l1,
            lambda_fused=lambda_fused,
            batch_size=batch_size,
            epochs=epochs,
            patience_reduce_lr=patience_reduce_lr,
            patience_early_stop=patience_early_stop,
            min_lr=min_lr,
            verbose=0
        )

        # Compute validation RMSE for evaluation
        y_pred = model.predict(x_val, batch_size=batch_size, verbose=0).flatten()
        y_true = y_val.flatten()
        val_rmse = float(np.sqrt(mean_squared_error(y_true, y_pred)))
        print(f"  Validation RMSE: {val_rmse:.5f}")
        return val_rmse

    # Create and run the Optuna study
    study = optuna.create_study(direction="minimize")
    study.optimize(
        objective,
        timeout=timeout_time,
        show_progress_bar=True,
        catch=(Exception,)  # Ensures optimization continues on error
    )

    best_val_rmse = study.best_value
    best_params = study.best_trial.params

    return study, best_val_rmse, best_params

def test_bcnn(
    model: tf.keras.Model,
    x_test_data: np.ndarray,
    y_test_data: np.ndarray,
    batch_size: int,
    num_monte_carlo: int
) -> tuple:
    """
    Evaluate a trained Bayesian CNN model on a hold-out test set using Monte Carlo sampling.

    Parameters:
        model            : Trained BCNN model
        x_test_data      : Test features
        y_test_data      : Test targets
        batch_size       : Batch size used during prediction
        num_monte_carlo  : Number of forward passes for uncertainty estimation

    Returns:
        test_rmsep              : Root mean squared error of prediction
        test_r2                 : R² score
        test_practical_accuracy: % predictions within ±20% of observed values
        df_predictions          : DataFrame containing MC predictions, observed and averaged predictions
    """

    # === Monte Carlo Predictions ===
    # Perform multiple stochastic forward passes
    mc_preds = []
    for i in range(num_monte_carlo):
        preds = model.predict(
            x_test_data,
            batch_size=batch_size,
            verbose=0
        )
        mc_preds.append(preds.flatten())

    # Stack into [n_samples, num_monte_carlo] matrix
    mc_preds = np.stack(mc_preds, axis=1)

    # === Create Prediction DataFrame ===
    df_predictions = pd.DataFrame(
        mc_preds,
        columns=[f"mc_pass_{i+1}" for i in range(num_monte_carlo)]
    )

    # === Evaluation ===
    y_true = y_test_data.flatten()
    y_pred = df_predictions.mean(axis=1).values

    # Add to DataFrame for inspection
    df_predictions["observed"] = y_true
    df_predictions["predicted"] = y_pred

    # Compute metrics
    test_rmsep = float(np.sqrt(mean_squared_error(y_true, y_pred)))
    test_r2 = float(r2_score(y_true, y_pred))
    pct_error = np.abs(y_pred - y_true) / np.abs(y_true)
    test_practical_accuracy = float((pct_error <= 0.2).mean() * 100.0)

    # === Reporting ===
    print(f"Test RMSEP: {test_rmsep:.4f}")
    print(f"Test R²: {test_r2:.4f}")
    print(f"Practical accuracy (±20%): {test_practical_accuracy:.1f}%")

    # ---------- Parity Plot ----------
    plt.figure(figsize=(8, 6))
    plt.scatter(y_true, y_pred, alpha=0.7, label="Test Data")
    plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], "k--", lw=2, label="Ideal")
    plt.xlabel("Observed")
    plt.ylabel("Predicted")
    plt.title("Observed vs. Predicted on Test Set (BCNN)")
    plt.legend()
    plt.grid(True)
    plt.show()

    return test_rmsep, test_r2, test_practical_accuracy, df_predictions

@tfd.RegisterKL(tfd.Independent, FusedLassoPrior)
def kl_independent_fused_lasso(q, p, **kwargs):
    """
    Custom KL divergence between an Independent distribution (posterior)
    and a FusedLassoPrior (prior). Used in variational inference for Bayesian layers.

    Parameters:
        q : tfd.Independent
            The approximate posterior distribution (usually Normal wrapped with Independent).
        p : FusedLassoPrior
            The custom fused lasso prior distribution.
        **kwargs : dict
            Extra keyword arguments required by TFP's KL interface (unused here).

    Returns:
        kl_divergence : tf.Tensor
            A scalar tensor representing the KL divergence approximation.
    """
    # Evaluate the log-probability of the posterior mean under the prior
    # This acts as a surrogate for the full KL divergence
    return -p.log_prob(q.mean())

### _Parameters_

In [None]:
# Parameters
DF                              = df
RANDOM_STATE                    = 27

N_SUBSET                        = 22960
VALIDATION_SIZE                 = 0.1
TEST_VARIETY                    = "TestVariety"
TEST_SEASON                     = 2025

PATIENCE_CALLBACK_REDUCE_LR     = 25
PATIENCE_CALLBACK_EARLY_STOP    = 50
MIN_LR                          = 1e-6

KERNEL_SIZE_RANGE               = (3, 1025)
BATCH_SIZE_OPTIONS              = [32, 64, 128, 256, 512, 1024]     
DROPOUT_RANGE                   = (0.01, 0.4)
L2_RANGE                        = (1e-6, 1e-2)
KL_SCALE_RANGE                  = (1e-6, 1e-2)
LASSO_RANGE                     = (1e-4, 10)
TIMEOUT_TIME                    = 60 * 60 * 72

TRAIN_EPOCHS                    = 250
TEST_EPOCHS                     = 1000
NUM_MONTE_CARLO                 = 100

In [None]:
# === Split into train and test sets ===
df_train_all, df_test_variety, df_test_season = split_train_test(
    df,
    test_variety=TEST_VARIETY,
    test_season=TEST_SEASON,
)

# === Take subset ===
df_subset = take_subset(
    df_train_all, 
    n_subset=N_SUBSET, 
    random_state=RANDOM_STATE
)

# === Make train/validation split ===
df_train, df_val = create_train_val_split(
    df=df_subset,
    validation_size=VALIDATION_SIZE,
    random_state=RANDOM_STATE
)

# === Convert to x and y arrays ===
x_train_all, y_train_all = split_x_y(
    df_train_all,
)
x_train, y_train = split_x_y(
    df_train,
)
x_val, y_val = split_x_y(
    df_val,
)
x_test_variety, y_test_variety = split_x_y(
    df_test_variety,
)
x_test_season, y_test_season = split_x_y(
    df_test_season,
)

# === Search the best hyperparameters ===
study, best_val_rmse, best_params = perform_optuna_hyperparameter_optimization(
    x_train=x_train,
    y_train=y_train,
    x_val=x_val,
    y_val=y_val,
    input_shape=x_train.shape[1],
    random_state=RANDOM_STATE,
    epochs=TRAIN_EPOCHS,
    patience_reduce_lr=PATIENCE_CALLBACK_REDUCE_LR,
    patience_early_stop=PATIENCE_CALLBACK_EARLY_STOP,
    min_lr=MIN_LR,
    kernel_size_range=KERNEL_SIZE_RANGE,
    batch_size_list=BATCH_SIZE_OPTIONS,
    dropout_range=DROPOUT_RANGE,
    l2_range=L2_RANGE,
    kl_range=KL_SCALE_RANGE,
    lasso_range=LASSO_RANGE,
    timeout_time=TIMEOUT_TIME
)

print("Best hyperparameters found:")
for k, v in best_params.items():
    print(f"  {k:<15} = {v}")

# === Train the best model ===    
bcnn_trained, _ = train_bcnn(
    x_train=x_train_all,
    y_train=y_train_all,
    x_val=None,
    y_val=None,
    input_shape=x_train_all.shape[1],
    kernel_size=best_params["kernel_size"],
    dropout_rate=best_params["dropout_rate"],
    l2_strength=best_params["l2_strength"],
    random_state=RANDOM_STATE,
    kl_scale=best_params["kl_scale"],
    batch_size=best_params["batch_size"],
    epochs=TEST_EPOCHS,
    patience_reduce_lr=PATIENCE_CALLBACK_REDUCE_LR,
    patience_early_stop=PATIENCE_CALLBACK_EARLY_STOP,
    min_lr=MIN_LR,
    lambda_l1=best_params["lambda_l1"],
    lambda_fused=best_params["lambda_fused"]
)


# === Test on the test sets ===
rmsep_variety, r2_variety, acc_variety, df_predictions_variety = test_bcnn(
    model=bcnn_trained,
    x_test_data=x_test_variety,
    y_test_data=y_test_variety,
    batch_size=best_params["batch_size"],
    num_monte_carlo=NUM_MONTE_CARLO
)
print(f"VARIETY: RMSEP={rmsep_variety:.3f}, R2={r2_variety:.3f}, ACC(±20%)={acc_variety:.1f}%s")

rmsep_season, r2_season, acc_season, df_predictions_season = test_bcnn(
    model=bcnn_trained,
    x_test_data=x_test_season,
    y_test_data=y_test_season,
    batch_size=best_params["batch_size"],
    num_monte_carlo=NUM_MONTE_CARLO
)
print(f"SEASON:  RMSEP={rmsep_season:.3f}, R2={r2_season:.3f}, ACC(±20%)={acc_season:.1f}%s")

# Construct the results
results = {
    "Test Set": ["VARIETY", "SEASON"],
    "RMSEP": [rmsep_variety, rmsep_season],
    "R2": [r2_variety, r2_season],
    "Accuracy (±20%)": [acc_variety, acc_season]
}

# Create a DataFrame
df_results = pd.DataFrame(results)

### _Inference Time Analysis_

In [None]:
def get_inference_sample_set(
    df_variety: pd.DataFrame,
    df_season: pd.DataFrame,
    random_state: int,
    sample_size: int = 1000
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Combine two test sets (variety and season), sample rows randomly, and return X and y arrays.

    Parameters:
        df_variety   : DataFrame for variety-based test set
        df_season    : DataFrame for season-based test set
        random_state : Random seed for reproducibility
        sample_size  : Number of rows to sample from combined test set

    Returns:
        x_sample : NumPy array of shape (sample_size, n_features) with spectral features
        y_sample : NumPy array of shape (sample_size,) with corresponding Brix values
    """
    # Combine the two test sets
    df_combined = pd.concat([df_variety, df_season], axis=0)

    # Randomly sample rows from the combined test set
    df_sample = df_combined.sample(
        n=sample_size,
        random_state=random_state
    )

    # Split into X and y arrays
    x_sample, y_sample = split_x_y(df_sample)

    return x_sample, y_sample

def test_bcnn_inference_time(
    model: tf.keras.Model,
    x_test: np.ndarray,
    num_monte_carlo: int
) -> float:
    """
    Measure average one-by-one inference time of a Bayesian CNN model
    using Monte Carlo sampling over all test samples.

    Parameters:
        model            : Trained Bayesian CNN model
        x_test           : Test feature matrix
        num_monte_carlo  : Number of Monte Carlo forward passes per sample

    Returns:
        avg_inference_time_ms : Average inference time per sample in milliseconds
    """
    times = []

    for x in x_test:
        x_input = np.expand_dims(x, axis=0)  # shape: (1, n_features)
        start = time.time()

        for _ in range(num_monte_carlo):
            _ = model(x_input, training=True).numpy()

        end = time.time()
        times.append(end - start)

    avg_inference_time_ms = np.mean(times) * 1000  # Convert to ms

    print(f"Average BCNN inference time: {avg_inference_time_ms:.3f} ms/sample "
          f"(with {num_monte_carlo} MC passes)")

    return avg_inference_time_ms


In [None]:
# === Create sample set for inference time measurement ===
x_inference_time, y_inference_time = get_inference_sample_set(
    df_test_variety,
    df_test_season,
    random_state=RANDOM_STATE
)

# === Compute the average inference time ===
bcnn_time_ms = test_bcnn_inference_time(
    model=bcnn_trained,
    x_test=x_inference_time,
    num_monte_carlo=NUM_MONTE_CARLO
)