In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LSTM, TimeDistributed, Reshape, Bidirectional, Conv1D, Concatenate, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import backend as K
from scipy.spatial.distance import cdist
import pickle
import gc
import xarray as xr 
import sys


In [None]:
## Load data from previous model runs 
# need to have pairs of parameters and modelled MB and snowlines
# try with modelled MB for now!
if 'win' in sys.platform:
    path = "E:/OneDrive/PhD/PhD/Data/Hintereisferner/COSIPY/MiscTests/LHS/"
else:
    path = "/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/COSIPY/MiscTests/LHS/"
    tsla = pd.read_csv("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/snowlines/HEF-snowlines-1999-2010_manual_filtered.csv")
# params = pd.read_csv(path+"cosipy_synthetic_params_lhs.csv", index_col=0)
params = pd.read_csv(path+"LHS-narrow_1D20m_1999_2010_fullprior.csv", index_col=0)
params

In [None]:
time_start_dt = pd.to_datetime("2000-01-01") #config starts with spinup - need to add 1year
time_end_dt = pd.to_datetime("2009-12-31")

tsla_true_obs = tsla.copy()
tsla_true_obs['LS_DATE'] = pd.to_datetime(tsla_true_obs['LS_DATE'])
print("Start date:", time_start_dt)
print("End date:", time_end_dt)
tsla_true_obs = tsla_true_obs.loc[(tsla_true_obs['LS_DATE'] > time_start_dt) & (tsla_true_obs['LS_DATE'] <= time_end_dt)]
tsla_true_obs.set_index('LS_DATE', inplace=True)
#Normalize standard deviation if necessary
tsla_true_obs['SC_stdev'] = (tsla_true_obs['SC_stdev']) / (tsla_true_obs['glacier_DEM_max'] - tsla_true_obs['glacier_DEM_min'])

thres_unc = (20) / (tsla_true_obs['glacier_DEM_max'].iloc[0] - tsla_true_obs['glacier_DEM_min'].iloc[0])
print(thres_unc)

## Set observational uncertainty where smaller to atleast model resolution (20m) and where larger keep it
sc_norm = np.where(tsla_true_obs['SC_stdev'] < thres_unc, thres_unc, tsla_true_obs['SC_stdev'])
tsla_true_obs['SC_stdev'] = sc_norm

In [None]:
tsla_true_obs[['TSL_normalized']].plot()

In [None]:
## Load albedo reference
albobs = xr.open_dataset("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Climate/HEF_processed_HRZ-20CC-filter_albedos.nc") #old for prev. version - adjusted version coming
albobs = albobs.sortby("time")
albobs


In [None]:
## get rid of redudant variables
params = params.drop(['center_snow_transfer', 'spread_snow_transfer','roughness_fresh_snow', 'roughness_firn','aging_factor_roughness'], axis=1)
params_full = params.copy()
params

In [7]:
## randomly select n samples
#size = 500
#params = params_full.sample(n=500, random_state=77)
#params

In [None]:
params[["rrr_factor", "alb_ice", "alb_snow", "alb_firn", "albedo_aging", "albedo_depth", "roughness_ice"]].plot.hist(subplots=True)
#Show that samples really are covering full space

In [None]:
"""
import numpy as np
time_normalized = time / 365.0  # Assuming time is in days of the year
time_sin = np.sin(2 * np.pi * time_normalized)
time_cos = np.cos(2 * np.pi * time_normalized)
X_train_scaled = np.concatenate([X_train_scaled, time_sin, time_cos], axis=1)
"""
#get time variable for snowline points - time dependency added to training
doy = tsla_true_obs.index.dayofyear
# Define time features
time_sin = np.sin(2 * np.pi * doy / 365)
time_cos = np.cos(2 * np.pi * doy / 365)
time_features = np.stack([time_sin, time_cos], axis=-1)  # Shape: (62, 2)
time_features
#params['DOY'] = doy

### Repeat for albedo
#get time variable for snowline points - time dependency added to training
doy_alb = albobs.time.dt.dayofyear.data
# Define time features
time_sin_alb = np.sin(2 * np.pi * doy_alb / 365)
time_cos_alb = np.cos(2 * np.pi * doy_alb / 365)
time_features_alb = np.stack([time_sin_alb, time_cos_alb], axis=-1)  # Shape: (62, 2)
time_features_alb
#params['DOY'] = doy

In [None]:
# Load albedo observations
list_sim_alb = []

for i,r in params.iterrows():
    if i % 250 == 0:
        print(f"Processing file {i}/2500")
    rrr_factor = round(r['rrr_factor'],4)
    alb_ice = round(r['alb_ice'],4)
    alb_snow = round(r['alb_snow'],4)
    alb_firn = round(r['alb_firn'],4)
    alb_aging = round(r['albedo_aging'],4)
    alb_depth = round(r['albedo_depth'],4)
    roughness_ice = round(r['roughness_ice'], 4)

    filename = f"HEF_COSMO_1D20m_1999_2010_HORAYZON_IntpPRES_LHS-narrow_19990101-20091231_RRR-{rrr_factor}_{alb_snow}_{alb_ice}_{alb_firn}_{alb_aging}_{alb_depth}_0.24_{roughness_ice}_4.0_0.0026_num2.nc"
    if 'win' in sys.platform:
        sim_alb = xr.open_dataset("E:/OneDrive/PhD/PhD/Data/Hintereisferner/Output/albedo_files/LHS/alb_only/"+\
            filename)
    else:
        sim_alb = xr.open_dataset("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Output/albedo_files/LHS/alb_only/"+\
            filename)

    #sort by time
    #sim_alb = sim_alb.sel(time=albobs.time) 
    #sim_alb = sim_alb.sortby("time")
    list_sim_alb.append(sim_alb.ALBEDO_weighted.data)

In [None]:
df_albedo = pd.DataFrame(list_sim_alb, columns=[f'alb{i+1}' for i in range(len(sim_alb.ALBEDO_weighted.data))])
df_albedo

In [None]:
## Select albedo data
# drop index from dataframe - since we loop over it, dfs are aligned
params.reset_index(drop=True, inplace=True)
params

In [None]:
params_merged = pd.concat([params, df_albedo], axis=1)
params_merged 

In [17]:
params_subset = params_merged.copy()

In [None]:
albobs

In [19]:
def build_emulator(seed=None):
    if seed is not None:
        tf.keras.utils.set_random_seed(seed)
        tf.config.experimental.enable_op_determinism()

    mass_balance_input = Input(shape=(7,), name="mass_balance_input")
    snowlines_input = Input(shape=(58, 9), name="snowlines_input")
    alb_input = Input(shape=(98, 9), name="alb_input")

    # Shared Layers for Mass Balance
    shared_mb = Dense(64, activation='relu')(mass_balance_input)
    shared_mb = Dense(128, activation='relu')(shared_mb)
    shared_mb = Dropout(0.1)(shared_mb)

    # Mass Balance Branch
    mb_branch = Dense(64, activation='relu')(shared_mb)
    mb_output = Dense(1, name="mass_balance_output")(mb_branch)

    # Shared Layers for Snowlines
    # LSTM Layers (Stacked)
    lstm_out = Bidirectional(LSTM(64, return_sequences=True))(snowlines_input)
    lstm_out = Bidirectional(LSTM(64, return_sequences=True))(lstm_out)

    # Shared Layers for Albedo
    # LSTM Layers (Stacked)
    lstm_alb = Bidirectional(LSTM(64, return_sequences=True))(alb_input)
    lstm_alb = Bidirectional(LSTM(64, return_sequences=True))(lstm_alb)

    # Multi-Scale CNN (Inception Style)
    #conv1 = Conv1D(filters=64, kernel_size=2, padding="same", activation="relu")(lstm_alb) #3 months, 6 months, 12 months
    #conv2 = Conv1D(filters=64, kernel_size=3, padding="same", activation="relu")(lstm_out)

    # Combine Multi-Scale Features
    #x = Concatenate()([conv1, conv2])
    #x = BatchNormalization()(x)

    #shared_sl = LSTM(64, return_sequences=True)(snowlines_input)
    shared_sl = Dense(128, activation='relu')(lstm_out)

    # Snowlines Branch
    sl_branch = Dense(64, activation='relu')(shared_sl)
    #sl_branch = Dropout(0.1)(sl_branch)  # Add Dropout here for snowlines branch
    snowlines_output = Dense(1, activation='sigmoid')(sl_branch)  # Predict one value per time point
    snowlines_output = Reshape((58,), name="snowlines_output")(snowlines_output)  # Adjust shape to (batch_size, 62)

    ### Albedo
    #shared_alb = Dense(128, activation='relu')(conv1)
    shared_alb = Dense(128, activation='relu')(lstm_alb)

    # Snowlines Branch
    alb_branch = Dense(64, activation='relu')(shared_alb)
    #sl_branch = Dropout(0.1)(sl_branch)  # Add Dropout here for snowlines branch
    alb_output = Dense(1, activation='sigmoid')(alb_branch)  # Predict one value per time point
    alb_output = Reshape((98,), name="alb_output")(alb_output)  # Adjust shape to (batch_size, 62)

    # Combine
    model = Model(inputs=[mass_balance_input, snowlines_input, alb_input],
                  outputs=[mb_output, snowlines_output, alb_output])

    loss_type = "huber"
    # Compile the model with a combined loss function
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss={
            'mass_balance_output': "mse",
            'snowlines_output': loss_type,
            'alb_output': loss_type
        },
        loss_weights={
            'mass_balance_output': 1.0,  # You can tune this
            'snowlines_output': 1.0,      # You can tune this
            'alb_output': 1.0
        },
        metrics=[keras.metrics.RootMeanSquaredError(),keras.metrics.RootMeanSquaredError(),keras.metrics.RootMeanSquaredError()]
    )
    
    return model

In [20]:
from scipy.stats import gaussian_kde
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error
plt.rcParams.update({'font.size': 20})

def plot_loss(history, loss_var):
    """
    Plot training and validation loss for mass balance and snowlines.
    
    Parameters:
    history: History object returned by the Keras model.fit() method.
    """
    # Plot loss for mass balance output
    plt.figure(figsize=(14, 6))

    # Plot training and validation loss for mass balance
    plt.subplot(1, 3, 1)
    plt.plot(history.history['mass_balance_output_loss'], label='Train Mass Balance Loss')
    plt.plot(history.history['val_mass_balance_output_loss'], label='Val Mass Balance Loss')
    plt.title('Mass Balance Loss')
    plt.xlabel('Epoch')
    plt.ylabel(loss_var)
    plt.legend()

    # Plot training and validation loss for snowlines
    plt.subplot(1, 3, 2)
    plt.plot(history.history['snowlines_output_loss'], label='Train Snowlines Loss')
    plt.plot(history.history['val_snowlines_output_loss'], label='Val Snowlines Loss')
    plt.title('Snowlines Loss')
    plt.xlabel('Epoch')
    plt.ylabel(loss_var)
    plt.legend()
    
    # Plot training and validation loss for albedo
    plt.subplot(1, 3, 3)
    plt.plot(history.history['alb_output_loss'], label='Train Albedo Loss')
    plt.plot(history.history['val_alb_output_loss'], label='Val Albedo Loss')
    plt.title('Albedo Loss')
    plt.xlabel('Epoch')
    plt.ylabel(loss_var)
    plt.legend()

    plt.tight_layout()
    plt.show()


def plot_scatter(y_train, y_pred, savefig=False):
    idxs = np.arange(len(y_train))
    np.random.shuffle(idxs)

    y_expected = y_train.reshape(-1)[idxs[:]]
    y_predicted = y_pred.reshape(-1)[idxs[:]]

    xy = np.vstack([y_expected, y_predicted])
    z = gaussian_kde(xy)(xy)
    print(xy.shape)
    # Sort the points by density, so that the densest points are plotted last
    idx = z.argsort()
    y_plt, ann_plt, z = y_expected[idx], y_predicted[idx], z[idx]
    """
    plt.figure(figsize=(8,8))
    plt.title("Model Evaluation", fontsize=17)
    plt.ylabel('Emulated MB (m.w.e)', fontsize=16)
    plt.xlabel('Reference MB (m.w.e)', fontsize=16)
    sc = plt.scatter(y_plt, ann_plt, s=20)
    #plt.clim(0,0.4)
    plt.tick_params(labelsize=14)
    #plt.colorbar(sc) 
    lineStart = -4.5
    lineEnd = 1.5
    plt.plot([lineStart, lineEnd], [lineStart, lineEnd], 'k-')
    plt.axvline(0.0, ls='-.', c='k')
    plt.axhline(0.0, ls='-.', c='k')
    plt.xlim(lineStart, lineEnd)
    plt.ylim(lineStart, lineEnd)
    plt.gca().set_box_aspect(1)
    plt.grid()
    """
    mae = mean_absolute_error(y_expected, y_predicted)
    r2 = r2_score(y_expected, y_predicted)
    rmse = root_mean_squared_error(y_expected, y_predicted)
    # Calculate quantile edges (4 bins)
    quantiles = np.quantile(y_expected, [0.0, 0.25, 0.5, 0.75, 1.0])
    bin_labels = ['Q1 (lowest)', 'Q2', 'Q3', 'Q4 (highest)']

    # Assign each obs value to a quantile bin
    bin_indices = pd.cut(y_expected, bins=quantiles, labels=bin_labels, include_lowest=True)

    # Compute RMSE per bin
    rmse_by_bin = {}
    for label in bin_labels:
        mask = bin_indices == label
        if np.any(mask):
            rmse = np.sqrt(np.mean((y_predicted[mask] - y_expected[mask])**2))
            rmse_by_bin[label] = rmse

    # Print results
    for label, rmse in rmse_by_bin.items():
        print(f"{label}: RMSE = {rmse:.4f}")
    """
    textstr = '\n'.join((
        r'$MAE=%.2f$' % (mae, ),
        r'$R^2=%.2f$' % (r2, ), 
        r'$RMSE=%.2f$' % (rmse, )))
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    # place a text box in upper left in axes coords
    plt.text(0.05, 0.95, textstr, transform=plt.gca().transAxes, fontsize=14,
            verticalalignment='top', bbox=props)
    if savefig:
        plt.savefig("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Figures/mb_emulator.png")
    plt.show()
    """
    return (mae, r2, rmse, rmse_by_bin)

In [None]:
sample_sizes = [300, 400, 500, 1000, 1500, 2000]
num_repeats = 10

results = {}

for n in sample_sizes:
    print(f"Starting NN generation with {n} samples.")
    results[n] = []
    for repeat in range(num_repeats):
        print("N. repetetion: ", repeat)
        seed = repeat  # Different seed per run

        # Random sample from the full training data
        train_dataset, validation_dataset = train_test_split(params_subset.index, 
                                                    train_size=0.8,
                                                    test_size=0.2, random_state=seed)

        df_train = params_subset.loc[train_dataset]
        df_validation = params_subset.loc[validation_dataset]
        list_sims = [x for x in params_subset.columns if 'sim' in x]
        list_albs = [x for x in params_subset.columns if 'alb' in x if x not in ['alb_ice','alb_snow','alb_firn','albedo_aging','albedo_depth']]
        
        # Fit scalers using training data
        features_to_drop = ['mb'] + list_sims + list_albs
        df_train_X = df_train.drop(features_to_drop, axis=1)

        scaler = StandardScaler()
        scaler.fit(df_train_X.values)

        # Transform inputs and outputs
        X_train_scaled = scaler.transform(df_train_X.values)  # Shape: (n_samples, 6)
        
        
        # Now run maximin sampling on this scaled space
        # i.e. maximize the minimum distance between the selected points.
        def maximin_subset(X, n_samples):
            selected = [np.random.randint(0, X.shape[0])]
            while len(selected) < n_samples:
                dists = cdist(X[selected], X)
                min_dist = np.min(dists, axis=0)
                min_dist[selected] = -np.inf
                next_idx = np.argmax(min_dist)
                selected.append(next_idx)
            return selected

        # Sample sizes     
        subsets = {}
        idx = maximin_subset(X_train_scaled, n)

        df_subset = df_train.iloc[idx].reset_index(drop=True)

        # Inputs: same features used in full training input
        X = df_subset.drop(features_to_drop, axis=1).values
        X_scaled = scaler.transform(X)

        # Outputs
        y_mb = df_subset[['mb']].values
        y_tsla = df_subset[list_sims].values
        y_alb = df_subset[list_albs].values

        # Store everything
        subsets[n] = {
            'X_scaled': X_scaled,
            'y_mb': y_mb,
            'y_tsla': y_tsla,
            'y_alb': y_alb,
        }
        # load results
        n_samples = n
        subset = subsets[n_samples]

        X_train = subset['X_scaled']
        df_train_y_mb = subset['y_mb']
        df_train_y_tsla = subset['y_tsla']
        df_train_y_alb = subset['y_alb']

        # Repeat the input features for time points
        X_train_expanded = np.repeat(X_train[:, None, :], len(doy), axis=1)  # Snowlines
        X_train_expanded_alb = np.repeat(X_train[:, None, :], len(doy_alb), axis=1) # Albedo

        # Add time features without scaling
        X_train_with_time = np.concatenate([X_train_expanded, np.tile(time_features, (X_train.shape[0], 1, 1))], axis=-1)
        X_train_with_time_alb = np.concatenate([X_train_expanded_alb, np.tile(time_features_alb, (X_train.shape[0], 1, 1))], axis=-1)
        
        # Repeat the same for validation data
        df_validation_X = df_validation.drop(features_to_drop, axis=1)
        df_validation_y_mb = df_validation[['mb']].values
        df_validation_y_tsla = df_validation[list_sims].values
        df_validation_y_alb = df_validation[list_albs].values

        X_validation = scaler.transform(df_validation_X.values)  # Shape: (n_samples, 6)

        X_validation_expanded = np.repeat(X_validation[:, None, :], len(doy), axis=1)  # Shape: (n_samples, 62, 6)
        X_validation_with_time = np.concatenate(
            [X_validation_expanded, np.tile(time_features, (X_validation.shape[0], 1, 1))], axis=-1
        )
        # Final shape: (n_samples, 62, 8)
        X_validation_expanded_alb = np.repeat(X_validation[:, None, :], len(doy_alb), axis=1)  # Shape: (n_samples, 62, 6)
        X_validation_with_time_alb = np.concatenate(
            [X_validation_expanded_alb, np.tile(time_features_alb, (X_validation.shape[0], 1, 1))], axis=-1
        )
        
        print(X_train.shape)
        print(df_train_y_mb.shape)
        print(df_train_y_tsla.shape)
        print(df_train_y_alb.shape)
        
        
        # Build and train
        model = build_emulator(seed=seed)
        # Early Stopping and Learning Rate Scheduler
        early_stopping = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True, verbose=1)
        lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=20, min_lr=1e-6, verbose=1)
        # Fit the model
        history = model.fit(
            x={
                'mass_balance_input': X_train, 
                'snowlines_input': X_train_with_time,
                'alb_input': X_train_with_time_alb
            },
            y={
                'mass_balance_output': df_train_y_mb, 
                'snowlines_output': df_train_y_tsla,
                'alb_output': df_train_y_alb
            },
            validation_data=(
                {
                    'mass_balance_input': X_validation,
                    'snowlines_input': X_validation_with_time,
                    'alb_input': X_validation_with_time_alb
                },
                {
                    'mass_balance_output': df_validation_y_mb,
                    'snowlines_output': df_validation_y_tsla,
                    'alb_output': df_validation_y_alb
                }
            ),
            epochs=300,
            batch_size=32,
            callbacks=[early_stopping, lr_scheduler],
            verbose=0
        )
        
        # Evaluate the model
        loss, mb_loss, sl_loss, alb_loss, mb_mae, sl_mae, alb_mae = model.evaluate(
            x={
                'mass_balance_input': X_validation, 
                'snowlines_input': X_validation_with_time,
                'alb_input': X_validation_with_time_alb
            },
            y={
                'mass_balance_output': df_validation_y_mb, 
                'snowlines_output': df_validation_y_tsla,
                'alb_output': df_validation_y_alb
            },
            verbose=0
        )

        print(f"Total Loss: {loss}")
        print(f"Mass Balance Loss: {mb_loss}, MAE: {mb_mae}")
        print(f"Snowlines Loss: {sl_loss}, MAE: {sl_mae}")
        print(f"Albedo Loss: {alb_loss}, MAE: {alb_mae}")
        
        
        #
        #plot_loss(history, "huber")
        #
        predictions = model.predict({
            'mass_balance_input': X_validation,
            'snowlines_input': X_validation_with_time,
            'alb_input': X_validation_with_time_alb
            })
        predicted_mass_balance = predictions[0]  # Shape: (N, 1)
        predicted_snowlines = predictions[1]     # Shape: (N, 62)
        predicted_albedos = predictions[2]
        
        metrics_dict = {
            "MB": {
                "MAE": -9999,
                "RMSE": -9999,
                "R2": -9999,
            },
            "TSLA": {
                "MAE": -9999,
                "RMSE": -9999,
                "R2": -9999,
            },
            "ALB": {
                "MAE": -9999,
                "RMSE": -9999,
                "R2": -9999,
            },
        }

        mae_mb, r2_mb, rmse_mb, rmse_by_bin_mb = plot_scatter(df_validation_y_mb, predicted_mass_balance, savefig=False)
        metrics_dict["MB"]["MAE"] = mae_mb
        metrics_dict["MB"]["RMSE"] = rmse_mb
        metrics_dict["MB"]["R2"] = r2_mb
        
        max_glacier_elev = 3700.0
        min_glacier_elev = 2440.0
        
        # Compute point density
        """
        plt.rcParams.update({'font.size': 18})

        xy = np.vstack([df_validation_y_tsla.flatten(), predicted_snowlines.flatten()])
        density = gaussian_kde(xy)(xy)

        # Sort the points by density to ensure denser points appear on top
        
        idx = density.argsort()
        x_sorted, y_sorted, density_sorted = df_validation_y_tsla.flatten()[idx], predicted_snowlines.flatten()[idx], density[idx]
        """
        """
        # Create the plot
        fig, ax = plt.subplots(1,1, figsize=(8,8))
        ax.set_title("Model Evaluation", fontsize=17)
        ax.set_ylabel('Emulated Norm. TSLA', fontsize=16)
        ax.set_xlabel('Reference Norm. TSLA', fontsize=16)
        lineStart = 0.0
        lineEnd = 1.0
        ax.plot([lineStart, lineEnd], [lineStart, lineEnd], 'k-')
        ax.set_xlim(lineStart, lineEnd)
        ax.set_ylim(lineStart, lineEnd)
        plt.gca().set_box_aspect(1)
        """
        # Compute error metrics
        mae_score_tsla = mean_absolute_error(df_validation_y_tsla.flatten(), predicted_snowlines.flatten())
        r2_scores_tsla = r2_score(df_validation_y_tsla.flatten(), predicted_snowlines.flatten())
        mae_in_meter = mae_score_tsla * (max_glacier_elev - min_glacier_elev)
        rmse_score_tsla = root_mean_squared_error(df_validation_y_tsla.flatten(), predicted_snowlines.flatten())
        rmse_in_meter = rmse_score_tsla * (max_glacier_elev - min_glacier_elev)
        """
        textstr = '\n'.join((
            r'$MAE=%.4f$' % (mae_score_tsla, ),
            r'$MAE (m)=%.4f$' % (mae_in_meter, ),
            r'$RMSE=%.4f$' % (rmse_score_tsla, ),
            r'$RMSE (m)=%.4f$' % (rmse_in_meter, ),
            r'$R^2=%.4f$' % (r2_scores_tsla, )))
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
        plt.text(0.05, 0.95, textstr, transform=plt.gca().transAxes, fontsize=14,
                verticalalignment='top', bbox=props)
        #
        """
        # Compute quantiles and remove duplicates
        quantiles = np.quantile(df_validation_y_tsla.flatten(), [0.0, 0.25, 0.5, 0.75, 1.0])
        quantiles = np.unique(quantiles)  # ensures unique edges

        # Dynamically generate labels
        bin_labels = [f"Q{i+1}" for i in range(len(quantiles)-1)]

        # Assign to bins using the valid (unique) quantiles and labels
        bin_indices = pd.cut(
            df_validation_y_tsla.flatten(),
            bins=quantiles,
            labels=bin_labels,
            include_lowest=True
        )

        # Compute RMSE per bin
        rmse_by_bin = {}
        for label in bin_labels:
            mask = bin_indices == label
            if np.any(mask):
                rmse = np.sqrt(np.mean((predicted_snowlines.flatten()[mask] - df_validation_y_tsla.flatten()[mask])**2))
                rmse_by_bin[label] = rmse

        # Print results
        for label, rmse in rmse_by_bin.items():
            rmse_m = rmse * (max_glacier_elev - min_glacier_elev)
            print(f"{label}: RMSE = {rmse_m:.4f}")
        """
        # Secondary axes
        def custom_ticks(y):
            orig_max = max_glacier_elev
            orig_min = min_glacier_elev
            return y * (orig_max - orig_min) + orig_min

        ax2y = ax.secondary_yaxis(-0.14, functions=(custom_ticks, custom_ticks))
        ax2x = ax.secondary_xaxis(-0.1, functions=(custom_ticks, custom_ticks))
        list_labels = [custom_ticks(x) for x in np.arange(0,1+0.1,0.1)]
        ax2x.set_xticks(list_labels)
        ax2y.set_yticks(list_labels)
        ax2x.set_xticklabels([round(x) for x in ax2x.get_xticks()], rotation=30)

        # Scatter plot with density coloring
        sc = ax.scatter(x_sorted, y_sorted, c=density_sorted, s=20, cmap='plasma', alpha=0.7)
        #cb = plt.colorbar(sc, ax=ax, label="Density")

        ax.set_xticks(np.arange(0,1+0.1,0.1))
        ax.set_yticks(np.arange(0,1+0.1,0.1))
        ax.grid(True)
        #plt.savefig("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Figures/tsla_emulator.png")
        """
        metrics_dict["TSLA"]["MAE"] = mae_score_tsla
        metrics_dict["TSLA"]["RMSE"] = rmse_score_tsla
        metrics_dict["TSLA"]["R2"] = r2_scores_tsla
        
        ## ALbedo
        # Compute point density
        """
        xy = np.vstack([df_validation_y_alb.flatten(), predicted_albedos.flatten()])
        density = gaussian_kde(xy)(xy)

        # Sort the points by density to ensure denser points appear on top
        idx = density.argsort()
        x_sorted, y_sorted, density_sorted = df_validation_y_alb.flatten()[idx], predicted_albedos.flatten()[idx], density[idx]
        """
        # Create the plot
        """
        fig, ax = plt.subplots(1,1, figsize=(8,8))
        ax.set_title("Model Evaluation", fontsize=17)
        ax.set_ylabel('Modeled Mean Albedo', fontsize=16)
        ax.set_xlabel('Reference Mean Albedo', fontsize=16)
        lineStart = 0.0
        lineEnd = 1.0
        ax.plot([lineStart, lineEnd], [lineStart, lineEnd], 'k-')
        ax.set_xlim(lineStart, lineEnd)
        ax.set_ylim(lineStart, lineEnd)
        plt.gca().set_box_aspect(1)
        """
        # Compute error metrics
        mae_score_alb = mean_absolute_error(df_validation_y_alb.flatten(), predicted_albedos.flatten())
        r2_scores_alb = r2_score(df_validation_y_alb.flatten(), predicted_albedos.flatten())
        rmse_score_alb = root_mean_squared_error(df_validation_y_alb.flatten(), predicted_albedos.flatten())
        """
        textstr = '\n'.join((
            r'$MAE=%.4f$' % (mae_score_alb, ),
            r'$RMSE=%.4f$' % (rmse_score_alb, ),
            r'$R^2=%.4f$' % (r2_scores_alb, )))
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
        plt.text(0.05, 0.95, textstr, transform=plt.gca().transAxes, fontsize=14,
                verticalalignment='top', bbox=props)
        """
        # Compute quantiles and remove duplicates
        quantiles = np.quantile(df_validation_y_alb.flatten(), [0.0, 0.25, 0.5, 0.75, 1.0])
        quantiles = np.unique(quantiles)  # ensures unique edges

        # Dynamically generate labels
        bin_labels = [f"Q{i+1}" for i in range(len(quantiles)-1)]

        # Assign to bins using the valid (unique) quantiles and labels
        bin_indices = pd.cut(
            df_validation_y_alb.flatten(),
            bins=quantiles,
            labels=bin_labels,
            include_lowest=True
        )

        # Compute RMSE per bin
        rmse_by_bin = {}
        for label in bin_labels:
            mask = bin_indices == label
            if np.any(mask):
                rmse = np.sqrt(np.mean((predicted_albedos.flatten()[mask] - df_validation_y_alb.flatten()[mask])**2))
                rmse_by_bin[label] = rmse

        # Print results
        for label, rmse in rmse_by_bin.items():
            print(f"{label}: RMSE = {rmse:.4f}")
        """
        # Scatter plot with density coloring
        sc = ax.scatter(x_sorted, y_sorted, c=density_sorted, s=20, cmap='plasma', alpha=0.7)
        #cb = plt.colorbar(sc, ax=ax, label="Density")

        ax.set_xticks(np.arange(0,1+0.1,0.1))
        ax.set_yticks(np.arange(0,1+0.1,0.1))
        ax.grid(True)
        #plt.savefig("/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/Figures/alb_emulator.png")
        """
        metrics_dict["ALB"]["MAE"] = mae_score_alb
        metrics_dict["ALB"]["RMSE"] = rmse_score_alb
        metrics_dict["ALB"]["R2"] = r2_scores_alb
        
        #
        print(metrics_dict)
        
        experiment_data = {
            "n_samples": n_samples,                       # Sample size used
            "train_data": X_train,              # Typically an xarray.Dataset or pandas.DataFrame
            "test_data": X_validation,                # Same format
            "predicted_mb": predictions[0],  # Shape: (N, 1)
            "predicted_tsla": predictions[1],    # Shape: (N, 62)
            "predicted_alb": predictions[2],
            "metrics": metrics_dict,               # MAE, RMSE, loglik, etc.
        }
        
        ## Save to file
        
        if 'win' in sys.platform:
            model.save("E:/OneDrive/PhD/PhD/Data/Hintereisferner/COSIPY/NN_test/model_{n_samples}_samples.keras")
        else:
            model.save(f"/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/COSIPY/NN_test/model_{n_samples}v{repeat}_samples.keras")

            with open(f"/mnt/C4AEBBABAEBB9500/OneDrive/PhD/PhD/Data/Hintereisferner/COSIPY/NN_test/model_experiment_{n_samples}v{repeat}_samples.pkl", "wb") as f:
                pickle.dump(experiment_data, f)
                
        #
        del model
        del history
        del metrics_dict
        del experiment_data
        K.clear_session()
        gc.collect()