In [1]:
# Let's make inference on test data here. Should we do all test blocks at once? I think it would make sense to do  on all blocks together since this is a deeplearning model. Maybe we can do both block-wise and all test dataset wise. The notebook presents both these methods, first block-wise, and then altogether.

In [2]:
import numpy as np
import os
import pandas as pd
import tensorflow as tf
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score # can't use this on 3D matrices, only work for 2D
from scipy.stats import pearsonr # this is for 1D vectiors

2025-06-10 20:28:24.862958: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-10 20:28:24.898752: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-10 20:28:24.898777: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-10 20:28:24.899583: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-10 20:28:24.905403: I tensorflow/core/platform/cpu_feature_guar

In [3]:
# import the trained seq_2_seq model here
seq_2_seq_generic_model = tf.keras.models.load_model("models/seq_2_seq_generic_model.keras")



2025-06-10 20:28:26.672044: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31134 MB memory:  -> device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:86:00.0, compute capability: 7.0


In [4]:
# validation data location
validation_data_loc = "seq_2_seq_valid_data"

In [5]:
os.listdir(validation_data_loc)

['block_0204_extracted_input_features.npy',
 'block_0204_extracted_target_features.npy']

In [6]:
# load validation data
validation_features = np.load(os.path.join(validation_data_loc, 'block_0204_extracted_input_features.npy'))

In [7]:
validation_features.shape

(910, 13, 32)

In [8]:
# get the predictions for validation data
preds_for_valid_data = seq_2_seq_generic_model.predict(validation_features)



In [9]:
preds_for_valid_data.shape

(910, 7, 32)

In [10]:
# sanity check - check with the mean on script 2
preds_for_valid_data.mean()

0.3711441

In [11]:
# Let's compute the performance metrics for the validaiton data

In [12]:
# load the true validation data
valid_true = np.load(os.path.join(validation_data_loc, 'block_0204_extracted_target_features.npy'))

In [13]:
valid_true.shape

(910, 7, 32)

In [14]:
# get the metrics with tensorflow

# convert the true and predicted matrices into tensors
y_true_tensor_valid = tf.constant(valid_true, dtype = tf.float32)
y_pred_tensor_valid = tf.constant(preds_for_valid_data, dtype = tf.float32)

In [15]:
y_true_tensor_valid.shape, y_pred_tensor_valid.shape

(TensorShape([910, 7, 32]), TensorShape([910, 7, 32]))

In [16]:
# define the MAE metric
mae_metric_valid = tf.keras.metrics.MeanAbsoluteError()

In [17]:
# update the state of the metric
mae_metric_valid.update_state(y_true_tensor_valid, y_pred_tensor_valid)

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=6370.0>

In [18]:
# Get the result
mae_valid = mae_metric_valid.result().numpy()

In [19]:
mae_valid # This value match the one during training in script 2

0.15943222

In [20]:
# We can also use numpy to get this value directly
np.mean(np.abs(valid_true - preds_for_valid_data))

0.15943222

In [21]:
# Also get the rmse?

# define the MSE metric
mse_metric_valid = tf.keras.metrics.MeanSquaredError()

In [22]:
# update the state of the metric
mse_metric_valid.update_state(y_true_tensor_valid, y_pred_tensor_valid)

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=6370.0>

In [23]:
# get the result
mse_valid = mse_metric_valid.result().numpy()
mse_valid

0.1067442

In [24]:
# therefore rmse
rmse_valid = np.sqrt(mse_valid)
rmse_valid

0.32671732

In [25]:
# Do this the numpy way?
np.sqrt(np.mean((valid_true - preds_for_valid_data)**2))

0.32671732

In [26]:
# also let's get the pearsonr?

valid_true_flat = valid_true.flatten()
valid_preds_flat = preds_for_valid_data.flatten()

In [27]:
valid_true_flat.shape, valid_preds_flat.shape

((203840,), (203840,))

In [28]:
pearsonr(valid_true_flat, valid_preds_flat)

PearsonRResult(statistic=0.8564887666318884, pvalue=0.0)

In [29]:
# let's do r2 score
r2_score(valid_true_flat, valid_preds_flat)

0.7335276201093235

In [30]:
mean_absolute_error(valid_true_flat, valid_preds_flat)

0.15943222

In [31]:
mean_squared_error(valid_true_flat, valid_preds_flat)

0.1067442

In [32]:
# Do this with the test data?

In [33]:
# Let's do both ways, while I'm sure we only need it the second way, lets just do it the individual block-wise way too

1. Block-wise

In [34]:
# Let's maybe define a function to compute all the metrics given the features and target matrices

In [35]:
# test it out using one block

In [36]:
test_data_loc = 'seq_2_seq_test_data'

In [37]:
all_preprocessed_test_files = os.listdir(test_data_loc)

In [38]:
all_preprocessed_test_files.sort()

In [39]:
block_0103_features = all_preprocessed_test_files[0]
block_0103_features

'block_0103_extracted_input_features.npy'

In [40]:
block_0103_targets = all_preprocessed_test_files[1]
block_0103_targets

'block_0103_extracted_target_features.npy'

In [41]:
block_0103_features.split(".")[0][-14:]

'input_features'

In [42]:
def predictions_and_metrics_for_test_data(test_data_loc, test_block_name):
    # maybe add some codes for choosing the correct file too
    all_contents_in_test_folder = os.listdir(test_data_loc)
    block_files = [file for file in all_contents_in_test_folder if file[:10] == test_block_name]

    targets_file = [file for file in block_files if file.split(".")[0][-15:] == 'target_features'][0]
    features_file = [file for file in block_files if file.split(".")[0][-14:] == 'input_features'][0]
    
    # load the features
    test_features = np.load(os.path.join(test_data_loc, features_file))
    test_targets = np.load(os.path.join(test_data_loc, targets_file))
    # get predictions from the model
    test_preds = seq_2_seq_generic_model.predict(test_features)
    print(test_targets.shape, test_preds.shape)
    
    # let's just flatten the matrices and compute all metrics
    test_flat_targets = test_targets.flatten()
    test_flat_preds = test_preds.flatten()
    print(test_flat_targets.shape, test_flat_preds.shape)

    # compute the metrics
    mae = mean_absolute_error(test_flat_targets, test_flat_preds)
    mse = mean_squared_error(test_flat_targets, test_flat_preds)
    rmse = np.sqrt(mse)
    pearson_corr = pearsonr(test_flat_targets, test_flat_preds)
    r2_value = r2_score(test_flat_targets, test_flat_preds)

    return mae, mse, rmse, pearson_corr[0], r2_value

In [43]:
# Try this with block 0103
blk_0103_mae, blk_0103_mse, blk_0103_rmse, blk_0103_r, blk_0103_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0103')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [44]:
print(blk_0103_mae, blk_0103_mse, blk_0103_rmse, blk_0103_r, blk_0103_r2)

0.15601262 0.099959336 0.31616345 0.8633312172366329 0.744471232665102


In [45]:
0.15601262, 0.099959336, 0.31616345, 0.8633312172366329, 0.744471232665102

(0.15601262, 0.099959336, 0.31616345, 0.8633312172366329, 0.744471232665102)

In [46]:
# Do this for the rest of the blocks

Block 0104

In [47]:
blk_0104_mae, blk_0104_mse, blk_0104_rmse, blk_0104_r, blk_0104_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0104')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [48]:
blk_0104_mae, blk_0104_mse, blk_0104_rmse, blk_0104_r, blk_0104_r2

(0.14248763, 0.089720406, 0.29953364, 0.8945826753075663, 0.797020548338786)

Block 0105

In [49]:
blk_0105_mae, blk_0105_mse, blk_0105_rmse, blk_0105_r, blk_0105_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0105')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [50]:
blk_0105_mae, blk_0105_mse, blk_0105_rmse, blk_0105_r, blk_0105_r2

(0.14917079, 0.09445538, 0.30733594, 0.8857926481740191, 0.7789711134835442)

Block 0106

In [51]:
blk_0106_mae, blk_0106_mse, blk_0106_rmse, blk_0106_r, blk_0106_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0106')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [52]:
blk_0106_mae, blk_0106_mse, blk_0106_rmse, blk_0106_r, blk_0106_r2

(0.13985209, 0.086053915, 0.29334947, 0.8962599964327399, 0.7995956046679267)

Block 0201

In [53]:
blk_0201_mae, blk_0201_mse, blk_0201_rmse, blk_0201_r, blk_0201_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0201')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [54]:
blk_0201_mae, blk_0201_mse, blk_0201_rmse, blk_0201_r, blk_0201_r2

(0.1577407, 0.104686856, 0.32355347, 0.8573014277182002, 0.7342941224679029)

Block 0202

In [55]:
blk_0202_mae, blk_0202_mse, blk_0202_rmse, blk_0202_r, blk_0202_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0202')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [56]:
blk_0202_mae, blk_0202_mse, blk_0202_rmse, blk_0202_r, blk_0202_r2

(0.16074643, 0.105793566, 0.32525924, 0.8396027613871067, 0.7019627172187048)

Block 0205

In [57]:
blk_0205_mae, blk_0205_mse, blk_0205_rmse, blk_0205_r, blk_0205_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0205')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [58]:
blk_0205_mae, blk_0205_mse, blk_0205_rmse, blk_0205_r, blk_0205_r2

(0.14196524, 0.08925282, 0.2987521, 0.8967382270371432, 0.79953003272503)

Block 0206

In [59]:
blk_0206_mae, blk_0206_mse, blk_0206_rmse, blk_0206_r, blk_0206_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0206')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [60]:
blk_0206_mae, blk_0206_mse, blk_0206_rmse, blk_0206_r, blk_0206_r2

(0.17179392, 0.12505318, 0.3536286, 0.8297091886667658, 0.6875895694401083)

Block 0302

In [61]:
blk_0302_mae, blk_0302_mse, blk_0302_rmse, blk_0302_r, blk_0302_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0302')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [62]:
blk_0302_mae, blk_0302_mse, blk_0302_rmse, blk_0302_r, blk_0302_r2

(0.15553218, 0.10348547, 0.32169157, 0.8578087340483902, 0.7354855608118599)

Block 0303

In [63]:
blk_0303_mae, blk_0303_mse, blk_0303_rmse, blk_0303_r, blk_0303_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0303')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [64]:
blk_0303_mae, blk_0303_mse, blk_0303_rmse, blk_0303_r, blk_0303_r2

(0.14828412, 0.09245285, 0.3040606, 0.8664186237762366, 0.7506468068597054)

Block 0304

In [65]:
blk_0304_mae, blk_0304_mse, blk_0304_rmse, blk_0304_r, blk_0304_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0304')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [66]:
blk_0304_mae, blk_0304_mse, blk_0304_rmse, blk_0304_r, blk_0304_r2

(0.14681453, 0.09236957, 0.30392364, 0.877196116722412, 0.7688358809118414)

Block 0305

In [67]:
blk_0305_mae, blk_0305_mse, blk_0305_rmse, blk_0305_r, blk_0305_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0305')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [68]:
blk_0305_mae, blk_0305_mse, blk_0305_rmse, blk_0305_r, blk_0305_r2

(0.16429992, 0.112498455, 0.33540788, 0.8447891176604796, 0.7134830023075813)

Block 0306

In [69]:
blk_0306_mae, blk_0306_mse, blk_0306_rmse, blk_0306_r, blk_0306_r2 = predictions_and_metrics_for_test_data(test_data_loc, 'block_0306')

(910, 7, 32) (910, 7, 32)
(203840,) (203840,)


In [70]:
blk_0306_mae, blk_0306_mse, blk_0306_rmse, blk_0306_r, blk_0306_r2

(0.15772247, 0.10659945, 0.3264957, 0.8570999949437654, 0.7346187831503386)

2. All test data 

In [71]:
# Okay, need to combine all the data first 

In [72]:
# load the datasets again
all_preprocessed_test_files.sort()

In [73]:
# separate the features and input matrices
all_input_features = [file for file in all_preprocessed_test_files if file.split('.')[0][-14:] == 'input_features']

In [74]:
all_target_features = [file for file in all_preprocessed_test_files if file.split('.')[0][-15:] == 'target_features']

In [75]:
len(all_input_features), len(all_target_features)

(13, 13)

In [76]:
%%time
# load all the feature files together
catch_all_input_feature_matrices = []
for file in all_input_features:
    loaded_file = np.load(os.path.join(test_data_loc, file))
    catch_all_input_feature_matrices.append(loaded_file)
    

CPU times: user 19 ms, sys: 206 ms, total: 225 ms
Wall time: 14.2 ms


In [77]:
len(catch_all_input_feature_matrices)

13

In [78]:
catch_all_input_feature_matrices[0].shape

(910, 13, 32)

In [79]:
# stack all these vertically along the 0th axis
test_input_features = np.vstack(catch_all_input_feature_matrices)

In [80]:
test_input_features.shape

(11830, 13, 32)

In [81]:
# check randomly if the stacking is right
for i in range(len(catch_all_input_feature_matrices)):
    print("features:", i)
    print(np.mean(catch_all_input_feature_matrices[i] == test_input_features[910*i: 910*(i+1), :, :]))

features: 0
1.0
features: 1
1.0
features: 2
1.0
features: 3
1.0
features: 4
1.0
features: 5
1.0
features: 6
1.0
features: 7
1.0
features: 8
1.0
features: 9
1.0
features: 10
1.0
features: 11
1.0
features: 12
1.0


In [82]:
# Also stack targets

In [83]:
%%time
# load all the feature files together
catch_all_target_matrices = []
for file in all_target_features:
    loaded_file = np.load(os.path.join(test_data_loc, file))
    catch_all_target_matrices.append(loaded_file)

CPU times: user 932 µs, sys: 8.84 ms, total: 9.78 ms
Wall time: 9.4 ms


In [84]:
len(catch_all_target_matrices)

13

In [85]:
catch_all_target_matrices[0].shape

(910, 7, 32)

In [86]:
# stack these
test_targets = np.vstack(catch_all_target_matrices)

In [87]:
test_targets.shape

(11830, 7, 32)

In [88]:
# check randomly if the stacking is right
for i in range(len(catch_all_target_matrices)):
    print("targets:", i)
    print(np.mean(catch_all_target_matrices[i] == test_targets[910*i: 910*(i+1), :, :]))

targets: 0
1.0
targets: 1
1.0
targets: 2
1.0
targets: 3
1.0
targets: 4
1.0
targets: 5
1.0
targets: 6
1.0
targets: 7
1.0
targets: 8
1.0
targets: 9
1.0
targets: 10
1.0
targets: 11
1.0
targets: 12
1.0


In [89]:
# Okay, now let's get the model predictions on all test data

In [90]:
# Do this in a function

In [91]:
def metrics_for_all_test_data(all_input_features, all_targets):
    
    # get predictions from the model
    test_preds = seq_2_seq_generic_model.predict(all_input_features)
    print(test_preds.shape)
    
    # let's just flatten the matrices and compute all metrics
    test_flat_targets = all_targets.flatten()
    test_flat_preds = test_preds.flatten()
    print(test_flat_targets.shape, test_flat_preds.shape)

    # compute the metrics
    mae = mean_absolute_error(test_flat_targets, test_flat_preds)
    mse = mean_squared_error(test_flat_targets, test_flat_preds)
    rmse = np.sqrt(mse)
    pearson_corr = pearsonr(test_flat_targets, test_flat_preds)
    r2_value = r2_score(test_flat_targets, test_flat_preds)

    return mae, mse, rmse, pearson_corr[0], r2_value

In [92]:
all_test_mae, all_test_mse, all_test_rmse, all_test_r, all_test_r2 = metrics_for_all_test_data(test_input_features, test_targets)

(11830, 7, 32)
(2649920,) (2649920,)


In [93]:
all_test_mae, all_test_mse, all_test_rmse, all_test_r, all_test_r2

(0.1532633, 0.10018315, 0.31651723, 0.8673123076335503, 0.7517599163553234)