In [1]:
import sys
import os
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense, Layer, Dropout

print("Successfully imported libraries!")

Successfully imported libraries!


In [2]:
# List available GPUs
gpus = tf.config.list_physical_devices('GPU')
print("GPUs Available:", gpus)

# Check if TensorFlow will place operations on the GPU
print("TensorFlow Version:", tf.__version__)

# Run a quick test
with tf.device('/GPU:0'):
    a = tf.random.normal([1000, 1000])
    b = tf.random.normal([1000, 1000])
    c = tf.matmul(a, b)
    print("Test computation done on GPU")

GPUs Available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
TensorFlow Version: 2.16.1


2025-11-08 12:53:37.043741: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:00:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-11-08 12:53:44.115302: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:00:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-11-08 12:53:44.115613: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:00:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-11-08 12:53:44.120765: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:00:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-11-08 12:53:44.121109: I external/local_xla/xla/stream_executor

Test computation done on GPU


In [3]:
# Get the absolute path of the current script's directory
current_dir = os.path.dirname(os.path.abspath("transformer0.ipynb"))

# Get the absolute path of the parent directory (project_folder)
parent_dir = os.path.dirname(current_dir)

# Add the parent directory to the Python path
sys.path.append(parent_dir)

# Now you can import from GetXY.py
from GetXY import x_train, y_train, x_val, y_val

early_stopping = tf.keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
    monitor='mse'
)
# ... rest of your code
print("Successfully imported variables!")

3 + 4 - -4
2543
11.0

Expressions not in x:
2 + -1 - 2
True
1457
-1.0
15
-4.0
[-5.   1.   1.   0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5
  0.5]
Successfully imported variables!


In [4]:
#add a cls token at the beginning of x_train and x_val
pad_value = 15
x_train = np.pad(x_train, ((0, 0), (1, 0)), 'constant', constant_values=pad_value)
x_val = np.pad(x_val, ((0, 0), (1, 0)), 'constant', constant_values=pad_value)

In [5]:
#defining the positional encoder modelled after the formula in the paper that was cited. (generated by gemini)
def posEncoding(max_seq_len, d_model):
    # Create a matrix of angles according to the formula
    angle_rads = get_angles(np.arange(max_seq_len)[:, np.newaxis],
                          np.arange(d_model)[np.newaxis, :],
                          d_model)
    
    # Apply sine to even indices in the array; 2i
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    
    # Apply cosine to odd indices in the array; 2i+1
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    
    # Add a batch dimension
    pos_encoding = angle_rads[np.newaxis, ...]
    
    return tf.cast(pos_encoding, dtype=tf.float32)

def get_angles(pos, i, d_model):
    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
    return pos * angle_rates

In [6]:
#defining the point-wise FNN
#d_ff = 2048 #(original transformer size)
def point_wise_fnn(d_model, d_ff):
    return tf.keras.Sequential([
        Dense(d_ff, activation = "relu", kernel_initializer = "glorot_uniform", bias_initializer = "zeros"),
        Dense(d_model, kernel_initializer = "glorot_uniform", bias_initializer = "zeros")
    ])

In [7]:
#scaled dot-product attention
class MH_Attention(Layer):
    def __init__(self, d_model, num_heads):
        super().__init__()
        #for the split_heads function:
        self.num_heads = num_heads
        self.d_model = d_model
        
        assert d_model % self.num_heads == 0
        self.depth = d_model // self.num_heads

        #for the call function:
        #This allows the model to learn the best way to project the input embeddings. (linear projection)
        self.wq = Dense(d_model, kernel_initializer = "glorot_uniform", bias_initializer = "zeros")
        self.wk = Dense(d_model, kernel_initializer = "glorot_uniform", bias_initializer = "zeros")
        self.wv = Dense(d_model, kernel_initializer = "glorot_uniform", bias_initializer = "zeros")

        #it's important to initialize this aswell as the ones above here, so that the model saves the previous weights and is able to learn.
        self.finalDense = Dense(d_model, kernel_initializer = "glorot_uniform", bias_initializer = "zeros")
        
    def SDP_Attention(self, q, k, v, mask):
        matmul_qk = tf.matmul(q, k, transpose_b=True) #calculate the dotproduct, between the query and a transposed key.
        d_k = tf.shape(k)[-1] #read the dimensionality of the key tensor (here d_model/num_heads = depth)
        d_k = tf.cast(d_k, tf.float32) #convert to float type
        scaled_qk = matmul_qk / tf.math.sqrt(d_k) #scale for purposes discussed in their paper.        

        if mask is not None:
            scaled_qk += (mask * -1e9) #masking to a big negative number
        
        softmaxed_qk = tf.nn.softmax(scaled_qk, axis = -1) #apply softmax function (axis = -1) for softmaxing all the different keys. The last entry is the number of keys (not the dimensionality of them, like it was befre.)
        output = tf.matmul(softmaxed_qk, v) #multiply the attention-weights with the values corresponding to the keys, in respect to the query.
        return output, softmaxed_qk
        
    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth)) #splits up the x data which is gonna be q, k, or v, into the individual heads. effectively adding a dimension (self.num_heads), after splitting up self.d_model
        return tf.transpose(x, perm =[0,2,1,3]) #reorganizes the dimensions into the expected order (batch_size, num_heads, seq_len, depth(the new d_model "fractions"))

    def call(self, q, k ,v, mask = None):
        batch_size = tf.shape(q)[0]

        #(linear projection)
        q = self.wq(q)
        k = self.wk(k)
        v = self.wv(v)

        #split them all up into the individual heads. (add a dimension basically)
        q = self.split_heads(q , batch_size)
        k = self.split_heads(k , batch_size)
        v = self.split_heads(v , batch_size)

        sdp_attention, attention_weights = self.SDP_Attention(q,k,v, mask = mask) #applies the sdp-attention to all of them. sdp_attention at the end has a shape of: (batch_size, num_heads, seq_len, depth)
        
        sdp_attention = tf.transpose(sdp_attention, perm=[0, 2, 1, 3]) #swap the 2nd and 3rd dimensions
        combined_attention = tf.reshape(sdp_attention, (batch_size, -1, self.d_model)) #combine back the two last dimnensions (num_heads and depth) into the original d_model

        output = self.finalDense(combined_attention)
        return output, attention_weights

In [8]:
class EncodingLayer(Layer):
    def __init__(self, d_model, num_heads, d_ff, rate):
        super().__init__()
        #define all the components of a Layer so the model will learn them properly here.
        self.mha = MH_Attention(d_model, num_heads)
        self.fnn = point_wise_fnn(d_model, d_ff)

        #initiate the 2 normalizations
        self.norm1 = tf.keras.layers.LayerNormalization()
        self.norm2 = tf.keras.layers.LayerNormalization()

        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

        
    def call(self,x, training, mask = None):
        mha_out, attention_weights = self.mha(x,x,x,mask = mask) #for self-attention: q,k,v = x
        mha_out = self.dropout1(mha_out, training = training) #they apply a small dropout of 0.1 after every residual step in the paper.

        norm_out = self.norm1(x + mha_out) #first, add the vectors, then normalize them.

        fnn_out = self.fnn(norm_out) #2nd sub-layer with fnn
        fnn_out = self.dropout2(fnn_out, training = training) #again apply drop out

        norm2_out = self.norm2(norm_out + fnn_out) #again add and norm

        return norm2_out

In [9]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, num_layers, d_ff, rate):
        super().__init__()

        self.d_model = d_model
        self.num_layers = num_layers #amount of encoding layers
        self.layers = [EncodingLayer(d_model, num_heads, d_ff, rate) for i in range(num_layers)] #define multiple diffferent encoding layers here.

        self.dropout = Dropout(rate)
            
    def call(self, x, training, mask = None):
        x = self.dropout(x, training = training) #we want to drop out before the first layer
        for i in range(self.num_layers):
            x = self.layers[i](x, training = training, mask = mask)
        return x

In [10]:
class Transformer(tf.keras.Model):
    def __init__(self, embedding_layer, d_model, max_seq_len, num_heads, num_layers, d_ff, rate):
        super().__init__()
        self.embedding = embedding_layer
        self.d_model = d_model
        self.pos_enc = posEncoding(max_seq_len, d_model)
        self.Encoder = Encoder(d_model, num_heads, num_layers, d_ff, rate)
        self.dropout = tf.keras.layers.Dropout(rate)
        self.finalDense = Dense(1, activation = "linear", kernel_initializer = "glorot_uniform", bias_initializer = "zeros")
        
    def call(self, x, training, mask = None):
        seq_len = tf.shape(x)[1]
        x = tf.expand_dims(x, axis=-1) #add a dimension to x
        x = self.embedding(x)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32)) #scale with √d_model
        x += self.pos_enc[:, :seq_len, :]
        
        out_Encoder = self.Encoder(x, training = training, mask = mask)

        output = out_Encoder[:,0,:] #pooling: to the first token.
        output = self.dropout(output, training = training) #another dropout

        final = self.finalDense(output) #now we can reduce back to a single neuron. This is the opposite of what we did in the embedding layer.

        return final
        

In [11]:
# Define a custom learning rate schedule class with warmup and cosine decay
class WarmupCosineDecay(tf.keras.optimizers.schedules.LearningRateSchedule):
    """
    A custom learning rate schedule that implements a linear warmup
    followed by a cosine decay.
    """
    def __init__(self, peak_lr, warmup_steps, decay_steps, alpha=0.0, name=None):
        super().__init__()
        self.peak_lr = peak_lr
        self.warmup_steps = warmup_steps
        self.decay_steps = decay_steps
        self.alpha = alpha
        self.name = name

    def __call__(self, step):
        with tf.name_scope(self.name or "WarmupCosineDecay"):
            # Ensure step is a float for calculations
            step = tf.cast(step, tf.float32)
            
            # --- 1. Warmup Phase ---
            # Linearly increase the learning rate from 0 to peak_lr
            warmup_lr = self.peak_lr * (step / self.warmup_steps)

            # --- 2. Cosine Decay Phase ---
            # Define the cosine decay schedule
            cosine_decay_schedule = tf.keras.optimizers.schedules.CosineDecay(
                initial_learning_rate=self.peak_lr,
                decay_steps=self.decay_steps,
                alpha=self.alpha
            )
            # Calculate the learning rate for the decay phase.
            # Note: The 'step' for the cosine part must be relative to its start.
            decay_lr = cosine_decay_schedule(step - self.warmup_steps)

            # --- 3. Choose the correct phase ---
            # Use tf.where to select the learning rate based on the current step
            learning_rate = tf.where(
                step < self.warmup_steps,
                warmup_lr,
                decay_lr
            )
            return learning_rate

    def get_config(self):
        return {
            "peak_lr": self.peak_lr,
            "warmup_steps": self.warmup_steps,
            "decay_steps": self.decay_steps,
            "alpha": self.alpha,
            "name": self.name
        }




In [12]:
import keras_tuner
from tensorflow.keras import backend as K
def build_model(hp):
    K.clear_session()
    # A smaller configuration to reduce overfitting
    # Ensure compatibility
    num_heads = hp.Choice('num_heads', [2, 4, 8])  # Powers of 2 work well
    d_model = hp.Choice('d_model', [32, 64, 128])   # Also powers of 2
    # This guarantees d_model % num_heads == 0
    num_layers = hp.Int('num_layers', 2, 6)
    d_ff = hp.Choice('d_ff', [128, 256, 512, 1024])   # Multiples that work well
    if hp.Boolean("dropout"):
        dropout_rate = 0.05
    else: 
        dropout_rate = 0
    peak_lr = hp.Float("peak learning rate", min_value = 1e-7, max_value = 1e-2, sampling="log")

    embedding_layer = Dense(d_model, kernel_initializer = "glorot_uniform", bias_initializer = "zeros")
    batch_size = 32
    num_epochs = 25
    max_seq_len = 16
    warmup_epochs = 3
    

    
    transformer_model = Transformer(
        embedding_layer = embedding_layer, 
        d_model = d_model,
        max_seq_len = max_seq_len,
        num_heads = num_heads,
        num_layers = num_layers,
        d_ff = d_ff,
        rate = dropout_rate
    )


        # Calculate steps based on your data
    # IMPORTANT: Use the actual length of your training data for this calculation
    steps_per_epoch = len(x_train) // batch_size
    warmup_steps = warmup_epochs * steps_per_epoch
    decay_steps = (num_epochs - warmup_epochs) * steps_per_epoch
    
    # Create an instance of our new scheduler
    lr_schedule = WarmupCosineDecay(
        peak_lr=peak_lr,
        warmup_steps=warmup_steps,
        decay_steps=decay_steps,
        alpha=0.1 # This means the LR will decay to 10% of peak_lr
    )

    transformer_model.compile(
        optimizer=tf.keras.optimizers.AdamW(
            learning_rate=lr_schedule,
            weight_decay = 4e-3,
            beta_1=0.85,  
            beta_2=0.999,  # Primary recommendation: lower this
            clipnorm=1.0
        ),
        loss='mse'
    )
    return transformer_model

build_model(keras_tuner.HyperParameters())

<Transformer name=transformer, built=False>

In [13]:
tuner = keras_tuner.BayesianOptimization(
    hypermodel=build_model,
    objective="val_loss",
    max_trials=50,
    executions_per_trial=1,
    overwrite=False,
    directory="2ndTuner",
    project_name="tuner_2",
)

Reloading Tuner from 2ndTuner/tuner_2/tuner0.json


In [14]:
num_epochs = 25
batch_size = 32

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(len(x_train)).batch(batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(batch_size)

tuner.search(train_dataset, epochs = num_epochs, validation_data = (val_dataset), verbose = 1, callbacks = [])

In [15]:
tuner.results_summary()

Results summary
Results in 2ndTuner/tuner_2
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 38 summary
Hyperparameters:
num_heads: 8
d_model: 64
num_layers: 6
d_ff: 512
dropout: False
peak learning rate: 0.000335475909883661
Score: 0.04216651991009712

Trial 44 summary
Hyperparameters:
num_heads: 8
d_model: 64
num_layers: 6
d_ff: 256
dropout: False
peak learning rate: 0.0003474250203708101
Score: 0.045826565474271774

Trial 47 summary
Hyperparameters:
num_heads: 8
d_model: 64
num_layers: 6
d_ff: 256
dropout: False
peak learning rate: 0.0002954682078714795
Score: 0.047226257622241974

Trial 32 summary
Hyperparameters:
num_heads: 8
d_model: 64
num_layers: 6
d_ff: 256
dropout: False
peak learning rate: 0.00022857874505977143
Score: 0.0673523098230362

Trial 02 summary
Hyperparameters:
num_heads: 8
d_model: 64
num_layers: 6
d_ff: 512
dropout: False
peak learning rate: 0.00044234303994600396
Score: 0.07036179304122925

Trial 29 summary
Hyperparameters:
num_heads: 8

In [16]:
# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters()[0]

In [17]:
def build_best_model(hp, num_epochs):
    # A smaller configuration to reduce overfitting
    # Ensure compatibility
    num_heads = hp.Choice('num_heads', [2, 4, 8])  # Powers of 2 work well
    d_model = hp.Choice('d_model', [32, 64, 128])   # Also powers of 2
    # This guarantees d_model % num_heads == 0
    num_layers = hp.Int('num_layers', 2, 6)
    d_ff = hp.Choice('d_ff', [64, 128, 256, 512])   # Multiples that work well
    if hp.Boolean("dropout"):
        dropout_rate = 0.2 
    else: 
        dropout_rate = 0
    peak_lr = hp.Float("peak learning rate", min_value = 1e-7, max_value = 1e-2, sampling="log")

    embedding_layer = Dense(d_model, kernel_initializer = "glorot_uniform", bias_initializer = "zeros")
    batch_size = 32
    num_epochs = num_epochs
    max_seq_len = 16
    warmup_epochs = np.floor(num_epochs/10) + 1
    

    
    transformer_model = Transformer(
        embedding_layer = embedding_layer, 
        d_model = d_model,
        max_seq_len = max_seq_len,
        num_heads = num_heads,
        num_layers = num_layers,
        d_ff = d_ff,
        rate = dropout_rate
    )


        # Calculate steps based on your data
    # IMPORTANT: Use the actual length of your training data for this calculation
    steps_per_epoch = len(x_train) // batch_size
    warmup_steps = warmup_epochs * steps_per_epoch
    decay_steps = (num_epochs - warmup_epochs) * steps_per_epoch
    
    # Create an instance of our new scheduler
    lr_schedule = WarmupCosineDecay(
        peak_lr=peak_lr,
        warmup_steps=warmup_steps,
        decay_steps=decay_steps,
        alpha=0.1 # This means the LR will decay to 10% of peak_lr
    )
    return transformer_model, lr_schedule
num_epochs_best_model = 200


In [18]:
from FNN1_1 import baseline_deviation, baeline_out_deviation, baseline_long_deviation, baseline_relError, absSum
baseline_out_deviation = baeline_out_deviation
from GetXY import x_test, y_test, out_x_test, out_y_test, long_x_test, long_y_test, outsideExpr, absSum

Epoch 1/200


I0000 00:00:1762602833.474125   44228 service.cc:145] XLA service 0xfffe6c0056f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1762602833.474192   44228 service.cc:153]   StreamExecutor device (0): Orin, Compute Capability 8.7
2025-11-08 12:53:53.524710: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-11-08 12:53:53.780674: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 90300


[1m26/60[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m0s[0m 6ms/step - loss: 26.8351

I0000 00:00:1762602834.877694   44228 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - loss: 23.3327 - val_loss: 19.0927
Epoch 2/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 16.9646 - val_loss: 16.0995
Epoch 3/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 14.9996 - val_loss: 14.6730
Epoch 4/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 13.4185 - val_loss: 12.8001
Epoch 5/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 11.3125 - val_loss: 10.2111
Epoch 6/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 8.4715 - val_loss: 6.8789
Epoch 7/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5.1637 - val_loss: 3.6190
Epoch 8/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 2.5662 - val_loss: 1.8084
Epoch 9/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━

  relativeError = np.where(np.array(y_test) != 0, deviation.flatten() / np.abs(np.array(y_test)), deviation.flatten())


In [19]:
pad_value = 15
x_test = np.pad(x_test, ((0, 0), (1, 0)), 'constant', constant_values=pad_value)
out_x_test = np.pad(out_x_test, ((0, 0), (1, 0)), 'constant', constant_values=pad_value)
long_x_test = np.pad(long_x_test, ((0, 0), (1, 0)), 'constant', constant_values=pad_value)

x_test_dataset = tf.data.Dataset.from_tensor_slices(x_test).batch(batch_size)
out_x_test_dataset = tf.data.Dataset.from_tensor_slices(out_x_test).batch(batch_size)
long_x_test_dataset = tf.data.Dataset.from_tensor_slices(long_x_test).batch(batch_size)

In [20]:
#debuggng: 
# Add a custom callback to track the best epoch
class BestEpochTracker(tf.keras.callbacks.Callback):
    def __init__(self):
        self.best_val_loss = float('inf')
        self.best_epoch = 0
        
    def on_epoch_end(self, epoch, logs=None):
        val_loss = logs.get('val_loss')
        if val_loss is not None and val_loss < self.best_val_loss:
            self.best_val_loss = val_loss
            self.best_epoch = epoch + 1
            print(f"New best validation loss: {val_loss:.4f} at epoch {self.best_epoch}")

best_tracker = BestEpochTracker()

In [21]:
n_bootstrap = 5
count = 0
bootstrap_predsInRange = []
bootstrap_predsOutRange = []
bootstrap_predsLongRange = []

for i in range(n_bootstrap):
    early_stopping = tf.keras.callbacks.EarlyStopping(
        patience=20,
        min_delta=0.001,
        restore_best_weights=True,
        monitor='val_loss',
        mode="min", 
        verbose=1
    )
    tf.random.set_seed(i * 12345)  # Different seed each iteration
    best_model, lr_schedule = build_best_model(best_hps, num_epochs_best_model)
    best_model.compile(
        optimizer=tf.keras.optimizers.AdamW(
            learning_rate=lr_schedule,
            weight_decay = 4e-3,
            beta_1=0.85,  
            beta_2=0.999,  # Primary recommendation: lower this
            clipnorm=1.0
        ),
        loss='mse',
        metrics=['mse']
    )
    best_model.fit(
        train_dataset, # Pass the TensorFlow Dataset
        validation_data=val_dataset, # Pass the TensorFlow Dataset
        epochs=num_epochs_best_model,
        callbacks=[early_stopping, best_tracker],
        verbose = 0
    )
    
    bootstrap_predsInRange.append(best_model.predict(x_test_dataset))
    bootstrap_predsOutRange.append(best_model.predict(out_x_test_dataset))
    bootstrap_predsLongRange.append(best_model.predict(long_x_test_dataset))
    print(f"Done: {count}")
    count += 1

bootstrap_predsInRange = np.array(bootstrap_predsInRange)
bootstrap_predsOutRange = np.array(bootstrap_predsOutRange)
bootstrap_predsLongRange = np.array(bootstrap_predsLongRange)














New best validation loss: 23.9213 at epoch 1
New best validation loss: 20.6732 at epoch 2
New best validation loss: 18.0401 at epoch 3
New best validation loss: 16.4750 at epoch 4
New best validation loss: 13.2050 at epoch 6
New best validation loss: 10.8947 at epoch 7
New best validation loss: 10.2258 at epoch 8
New best validation loss: 9.0360 at epoch 9
New best validation loss: 4.8992 at epoch 10
New best validation loss: 4.2583 at epoch 11
New best validation loss: 3.7782 at epoch 13
New best validation loss: 3.2629 at epoch 15
New best validation loss: 2.4269 at epoch 16
New best validation loss: 1.8531 at epoch 23
New best validation loss: 1.4773 at epoch 26
New best validation loss: 1.1669 at epoch 27
New best validation loss: 0.8779 at epoch 29
New best validation loss: 0.6763 at epoch 38
New best validation loss: 0.5501 at epoch 40
New best validation loss: 0.2854 at epoch 43
New best validation loss: 0.2807 at epoch 44
New best validation loss: 0.1864 at epoch 47
New best va






[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 149ms/step
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 122ms/step
Done: 0
Epoch 197: early stopping
Restoring model weights from the end of the best epoch: 177.
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 103ms/step
Done: 1
New best validation loss: 0.0040 at epoch 121
New best validation loss: 0.0036 at epoch 126
New best validation loss: 0.0034 at epoch 127
New best validation loss: 0.0030 at epoch 130
New best validation loss: 0.0029 at epoch 134
New best validation loss: 0.0027 at epoch 137
Epoch 150: early stopping
Restoring model weights from the end of the best epoch: 130.
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step
[1m64/64[0m 

In [22]:
best_model.summary()

In [23]:
bootstrap_predsInRange.shape

(5, 1457, 1)

In [24]:
predsInRange = []
predsOutRange = []
predsLongRange = []

for i in range(len(x_test)):
    counter = 0
    for j in range(n_bootstrap):
        counter += bootstrap_predsInRange[j][i]
    predsInRange.append(counter/n_bootstrap)

for i in range(len(out_x_test)):
    counter = 0
    for j in range(n_bootstrap):
        counter += bootstrap_predsOutRange[j][i]
    predsOutRange.append(counter/n_bootstrap)

for i in range(len(long_x_test)):
    counter = 0
    for j in range(n_bootstrap):
        counter += bootstrap_predsLongRange[j][i]
    predsLongRange.append(counter/n_bootstrap)

In [25]:
reldiffInRange = []
diffInRange = []
safe_y_test = np.where(np.isclose(y_test,0.0), 1.0, y_test)

for i in range(len(y_test)):
    diffInRange.append(abs(y_test[i] - predsInRange[i]))
    reldiffInRange.append(abs(y_test[i] - predsInRange[i])/abs(safe_y_test[i]))
print(len(diffInRange))
print("MAE in Range: ", np.mean(diffInRange))
print("MRE in Range: ", np.mean(reldiffInRange))

diffLongRange = []
for i in range(200, 300):
    diffLongRange.append(np.array(np.abs(long_y_test[i]) - np.array(predsLongRange[i])))
    
NEEDdiffLongRange = []
for i in range(len(long_y_test)):
    NEEDdiffLongRange.append(np.array(np.abs(long_y_test[i]) - np.array(predsLongRange[i])))
print("MAE longer Expressions: ", np.mean(NEEDdiffLongRange))

diffOutRange = []
for i in range(len(out_y_test)):
    diffOutRange.append(abs(out_y_test[i] - predsOutRange[i]))
safe_out_y_test = np.where(out_y_test == 0, 1, out_y_test)
diff_out_relError = []
for i in range(len(out_y_test)):
    diff_out_relError.append(abs(diffOutRange[i] / safe_out_y_test[i]))
print("MAE out Range: ", np.mean(diffOutRange))
print("MRE out Range: ", np.mean(diff_out_relError))

1457
MAE in Range:  0.024938123
MRE in Range:  0.009930147
MAE longer Expressions:  5.4132934
MAE out Range:  4.4533553
MRE out Range:  0.4833237


In [26]:
placeholder = absSum(outsideExpr)
diffOutRange = []
indices_with_placeholder_22 = [i for i, val in enumerate(placeholder) if val == 22] 

for i in indices_with_placeholder_22:
    diffOutRange.append(np.abs(out_y_test[i]-predsOutRange[i]))


In [27]:
meanDiff_InRange = np.mean(diffInRange)
meanDiff_OutRange = np.mean(diffOutRange)
meanDiff_LongRange = np.mean(diffLongRange)
meanDiff_OutRelRange = np.mean(diff_out_relError)



In [28]:
benchmark = 0
benchmark += baseline_deviation / (meanDiff_InRange**2) / 4
print(baseline_deviation / (meanDiff_InRange**2) / 4)

benchmark += baseline_out_deviation / (meanDiff_OutRange**2) / 4
print(baseline_out_deviation / (meanDiff_OutRange**2) / 4)

benchmark += baseline_long_deviation / (meanDiff_LongRange**2) / 4
print(baseline_long_deviation / (meanDiff_LongRange**2) / 4)

benchmark += baseline_relError / (meanDiff_OutRelRange**2) / 4
print(baseline_relError / (meanDiff_OutRelRange**2) / 4)

print(f"Benchmark: {benchmark}")

12.986259147312428
0.08068347951950057
0.08940818917870592
0.04266547454277558
Benchmark: 13.19901629055341


In [29]:
#this was generated based on the FNN6.py script, which was made by myself. (Gemini 2.5 Pro)

import numpy as np
from scipy.stats import ttest_1samp

# Initialize lists to store metrics for each bootstrap run
MAEinRange_list = []
MREinRange_list = []
MAEoutRange_list = []
MREoutRange_list = []
MAElongRange_list = []
benchmarks_list = []


# Loop through the predictions from each bootstrap run
for i in range(n_bootstrap):
    # Extract predictions for the current run
    current_preds_in_range = bootstrap_predsInRange[i]
    current_preds_out_range = bootstrap_predsOutRange[i]
    current_preds_long_range = bootstrap_predsLongRange[i]

    # --- In-Range Metrics ---
    diffInRange = np.abs(y_test - current_preds_in_range.flatten())
    safe_y_test = np.where(np.isclose(y_test, 0.0), 1.0, y_test)
    reldiffInRange = diffInRange / np.abs(safe_y_test)
    
    mean_mae_in_range = np.mean(diffInRange)
    mean_mre_in_range = np.mean(reldiffInRange)
    MAEinRange_list.append(mean_mae_in_range)
    MREinRange_list.append(mean_mre_in_range)

    # --- Out-of-Range Metrics ---
    diffOutRange = np.abs(out_y_test - current_preds_out_range.flatten())
    safe_out_y_test = np.where(np.isclose(out_y_test, 0.0), 1.0, out_y_test)
    reldiffOutRange = diffOutRange / np.abs(safe_out_y_test)

    mean_mae_out_range = np.mean(diffOutRange)
    mean_mre_out_range = np.mean(reldiffOutRange)
    MAEoutRange_list.append(mean_mae_out_range)
    MREoutRange_list.append(mean_mre_out_range)

    # --- Long-Range Metrics ---
    diffLongRange = np.abs(long_y_test - current_preds_long_range.flatten())
    mean_mae_long_range = np.mean(diffLongRange)
    MAElongRange_list.append(mean_mae_long_range)
    
    # --- Benchmark Calculation ---
    # This part replicates the specific slicing and filtering from FNN6.py for the benchmark score
    
    # Specific filter for out-of-range benchmark
    placeholder = absSum(outsideExpr)
    indices_with_placeholder_22 = [i for i, val in enumerate(placeholder) if val == 22]
    diffOutRange_for_benchmark = []
    for idx in indices_with_placeholder_22:
        diffOutRange_for_benchmark.append(np.abs(out_y_test[idx] - current_preds_out_range[idx]))
    
    meanDiff_OutRange_for_benchmark = np.mean(diffOutRange_for_benchmark)

    # Specific slice for long-range benchmark
    diffLongRange_for_benchmark = []
    for j in range(200, 300):
         diffLongRange_for_benchmark.append(np.abs(long_y_test[j] - current_preds_long_range[j]))

    meanDiff_LongRange_for_benchmark = np.mean(diffLongRange_for_benchmark)

    # Calculate the benchmark score for the current run
    benchmark = 0
    benchmark += baseline_deviation / (mean_mae_in_range**2) / 4
    benchmark += baseline_out_deviation / (meanDiff_OutRange_for_benchmark**2) / 4
    benchmark += baseline_long_deviation / (meanDiff_LongRange_for_benchmark**2) / 4
    benchmark += baseline_relError / (mean_mre_out_range**2) / 4
    benchmarks_list.append(benchmark)

# --- Statistical Analysis and Final Output ---

# Perform one-sample t-test against a population mean of 1
# Note: A t-test is meaningful only if n_bootstrap > 1
if n_bootstrap > 1:
    stats1, p_value1 = ttest_1samp(MAEinRange_list, popmean=1)
    stats2, p_value2 = ttest_1samp(MREinRange_list, popmean=1)
    stats3, p_value3 = ttest_1samp(MAEoutRange_list, popmean=1)
    stats4, p_value4 = ttest_1samp(MREoutRange_list, popmean=1)
    stats5, p_value5 = ttest_1samp(MAElongRange_list, popmean=1)
    stats6, p_value6 = ttest_1samp(benchmarks_list, popmean=1)

    print(f"MAE in Range P-value: {p_value1}") #please ignore this, it is clear to me now, that it is irrelevant.
    print(f"MRE in Range P-value: {p_value2}") #please ignore this, it is clear to me now, that it is irrelevant.
    print(f"MAE out Range P-value: {p_value3}") #please ignore this, it is clear to me now, that it is irrelevant.
    print(f"MRE out Range P-value: {p_value4}") #please ignore this, it is clear to me now, that it is irrelevant.
    print(f"MAE long Range P-value: {p_value5}") #please ignore this, it is clear to me now, that it is irrelevant.
    print(f"benchmark P-value: {p_value6}\n")
else:
    print("Cannot calculate p-values with n_bootstrap=1. Run more bootstraps for statistical tests.\n")


# Print average metrics across all runs
print(f"Average MAE in Range: {np.mean(MAEinRange_list)}")
print(f"Average MRE in Range: {np.mean(MREinRange_list)}")
print(f"Average MAE out Range: {np.mean(MAEoutRange_list)}")
print(f"Average MRE out Range: {np.mean(MREoutRange_list)}")
print(f"Average MAE long Range: {np.mean(MAElongRange_list)}")
print(f"Average benchmark: {np.mean(benchmarks_list)}\n")

# Print the lists of metrics
print(f"MAE in Range List: {MAEinRange_list}")
print(f"MRE in Range List: {MREinRange_list}")
print(f"MAE out Range List: {MAEoutRange_list}")
print(f"MRE out Range List: {MREoutRange_list}")
print(f"MAE long Range List: {MAElongRange_list}")
print(f"Benchmark List: {benchmarks_list}")

MAE in Range P-value: 5.00519322048704e-09
MRE in Range P-value: 1.9537125367354873e-10
MAE out Range P-value: 0.0002616112273514388
MRE out Range P-value: 0.00013747628247760853
MAE long Range P-value: 2.547903472507344e-05
benchmark P-value: 0.02109364586612576

Average MAE in Range: 0.04838372725499928
Average MRE in Range: 0.01988274135367271
Average MAE out Range: 4.58534299405319
Average MRE out Range: 0.5102148462367032
Average MAE long Range: 3.928956553852186
Average benchmark: 4.326895973969607

MAE in Range List: [0.05162703826468676, 0.05274510079088404, 0.040619104882023395, 0.03374445467596706, 0.06318293766143515]
MRE in Range List: [0.021830857794752904, 0.023007002138480944, 0.016116786574885866, 0.012863879925814892, 0.025595180334428952]
MAE out Range List: [5.573665441385401, 4.676484657378751, 4.700748730589112, 4.096332878155408, 3.879483262757276]
MRE out Range List: [0.6262075386697508, 0.49619005194976695, 0.5363418809540272, 0.46616528704043964, 0.426169472569