In [13]:
import glob
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from keras_tuner import Hyperband
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split

In [17]:
def load_all_data(data_files_pattern='data*.npy', label_files_pattern='lab*.npy'):
    data_files = sorted(glob.glob(data_files_pattern))
    label_files = sorted(glob.glob(label_files_pattern))
    X_list, y_list = [], []
    for d_file, l_file in zip(data_files, label_files):
        X_list.append(np.load(d_file))
        y_list.append(np.load(l_file))
    X = np.concatenate(X_list, axis=0)
    y = np.concatenate(y_list, axis=0)
    print(f"Loaded data shapes: X - {X.shape}, y - {y.shape}")
    return X, y

def preprocess_data(X, y):
    if len(X.shape) == 3:  
        X = np.expand_dims(X, axis=-1)
    X = X.astype('float32') / 255.0 
    y = y.astype('float32')  
    return X, y

def self_attention_block(x, num_heads=4, key_dim=32):
    attn_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(x, x)
    attn_output = layers.Add()([x, attn_output]) 
    attn_output = layers.LayerNormalization()(attn_output)
    return attn_output

def build_model(hp):
    input_shape = (40, 168, 1)  
    inputs = layers.Input(shape=input_shape)

    # First convolutional block
    x = layers.Conv2D(hp.Int("filters_1", 32, 128, step=32), kernel_size=hp.Choice("kernel_1", [3, 5]), activation='relu', padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)

    # Second convolutional block
    x = layers.Conv2D(hp.Int("filters_2", 32, 128, step=32), kernel_size=hp.Choice("kernel_2", [3, 5]), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)

    # Third convolutional block
    x = layers.Conv2D(hp.Int("filters_3", 64, 256, step=64), kernel_size=hp.Choice("kernel_3", [3, 5]), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)

    # Fourth convolutional block
    x = layers.Conv2D(hp.Int("filters_4", 64, 256, step=64), kernel_size=hp.Choice("kernel_4", [3, 5]), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)

    # Fifth convolutional block
    x = layers.Conv2D(hp.Int("filters_5", 128, 512, step=64), kernel_size=hp.Choice("kernel_5", [3, 5]), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.GlobalAveragePooling2D()(x)

    # Self-attention block
    x = layers.Reshape((1, x.shape[-1]))(x)
    x = self_attention_block(x, num_heads=hp.Int("num_heads", 2, 8, step=2), key_dim=hp.Int("key_dim", 16, 64, step=16))

    # Fully connected layers
    x = layers.Flatten()(x)
    x = layers.Dense(hp.Int("dense_units", 64, 256, step=64), activation='relu')(x)
    x = layers.Dropout(hp.Float("dropout", 0.2, 0.5, step=0.1))(x)
    outputs = layers.Dense(1)(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(learning_rate=hp.Float("learning_rate", 1e-4, 1e-2, sampling="log")), loss=MeanSquaredError(), metrics=["mae"])
    model.summary()
    return model

def load_and_preprocess_data(data_files_pattern='data*.npy', label_files_pattern='lab*.npy'):
    X, y = load_all_data(data_files_pattern, label_files_pattern)
    X, y = preprocess_data(X, y)
    _, X_val, _, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    return X_val, y_val

def load_and_preprocess_data2(data_files_pattern='data*.npy', label_files_pattern='lab*.npy'):
    X, y = load_all_data(data_files_pattern, label_files_pattern)
    X, y = preprocess_data(X, y)
    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=42)
    return X_train, y_train

def evaluate_best_model2(model_path, data_files_pattern='data*.npy', label_files_pattern='lab*.npy'):
    model = load_model(model_path)
    print(f"Loaded model from {model_path}")
    X_train, y_train = load_and_preprocess_data2(data_files_pattern, label_files_pattern)
    y_pred = model.predict(X_train)
    c = 0
    d = 0
    e = 0
    f = 0
    g = 0
    for i in range(min(len(y_train), len(y_train))):  
        # print(f"Predicted: {y_pred[i][0]:.4f}, Actual: {y_train[i]:.4f}")
        if abs(y_pred[i][0] - y_train[i]) <= 0.5:
            c += 1
        if abs(y_pred[i][0] - y_train[i]) <= 0.7:
            d += 1
        if abs(y_pred[i][0] - y_train[i]) <= 0.8:
            e += 1
        if abs(y_pred[i][0] - y_train[i]) <= 0.9:
            f += 1
        if abs(y_pred[i][0] - y_train[i]) <= 1:
            g += 1
    print(c/len(y_train))
    print(d/len(y_train))
    print(e/len(y_train))
    print(f/len(y_train))
    print(g/len(y_train))
    # print(len(y_train))

def evaluate_best_model(model_path, data_files_pattern='data*.npy', label_files_pattern='lab*.npy'):
    model = load_model(model_path)
    print(f"Loaded model from {model_path}")
    X_val, y_val = load_and_preprocess_data(data_files_pattern, label_files_pattern)
    y_pred = model.predict(X_val)
    c = 0
    d = 0
    e = 0
    f = 0
    g = 0
    for i in range(min(len(y_val), len(y_val))):  
        # print(f"Predicted: {y_pred[i][0]:.4f}, Actual: {y_val[i]:.4f}")
        if abs(y_pred[i][0] - y_val[i]) <= 0.5:
            c += 1
        if abs(y_pred[i][0] - y_val[i]) <= 0.8:
            e += 1
        if abs(y_pred[i][0] - y_val[i]) <= 0.9:
            f += 1
    print("Accuracy  percentage with 0.5 cuttoff", c/len(y_val))
    print("Accuracy  percentage with 0.8 cuttoff", e/len(y_val))
    print("Accuracy  percentage with 0.9 cuttoff", f/len(y_val))

    print(d/len(y_val))
    print(e/len(y_val))
    print(f/len(y_val))
    print(g/len(y_val))

In [18]:
evaluate_best_model('best_cnn_with_attention_model.keras')
evaluate_best_model2('best_cnn_with_attention_model.keras')

Loaded model from best_cnn_with_attention_model.keras
Loaded data shapes: X - (30000, 40, 168), y - (30000,)
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 76ms/step
Accuracy  percentage with 0.5 cuttoff 0.493
Accuracy  percentage with 0.8 cuttoff 0.6996666666666667
Accuracy  percentage with 0.9 cuttoff 0.7513333333333333
0.0
0.6996666666666667
0.7513333333333333
0.0
Loaded model from best_cnn_with_attention_model.keras
Loaded data shapes: X - (30000, 40, 168), y - (30000,)
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 78ms/step
0.5431666666666667
0.69725
0.7582083333333334
0.8026666666666666
0.8414166666666667


In [19]:
evaluate_best_model('best_cnn_with_attention_model_val.keras')
evaluate_best_model2('best_cnn_with_attention_model_val.keras')

Loaded model from best_cnn_with_attention_model_val.keras
Loaded data shapes: X - (30000, 40, 168), y - (30000,)
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 83ms/step
Accuracy  percentage with 0.5 cuttoff 0.565
Accuracy  percentage with 0.8 cuttoff 0.7768333333333334
Accuracy  percentage with 0.9 cuttoff 0.8246666666666667
0.0
0.7768333333333334
0.8246666666666667
0.0
Loaded model from best_cnn_with_attention_model_val.keras
Loaded data shapes: X - (30000, 40, 168), y - (30000,)
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 76ms/step
0.5257916666666667
0.6747916666666667
0.732
0.7810416666666666
0.8195416666666666
