# Setting up the GPU

In [None]:
import tensorflow as tf

# Check for GPU and set memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPUs are available and memory growth is set")
    except RuntimeError as e:
        print(e)

# Loading the ECG data for all subjects

In [None]:
import os
import pandas as pd

# List to store individual subject DataFrames
data_frames = []

# Iterate over the subject numbers
for subject_number in range(2, 18):
    # Construct the file path for each subject
    file_path = f'/kaggle/input/wesad-dataset/S{subject_number}_respiban.txt'
    
    # Check if the file exists
    if os.path.exists(file_path):
        # Read the file into a DataFrame
        df = pd.read_csv(file_path, delimiter='\t', skiprows=3, header=None)
        # Select the first three columns
        df_subset = df.iloc[:, 2]
        # Append the DataFrame to the list
        print(df_subset.shape)
 
        df_subset.columns = [f'Subject_{subject_number}']
        
        data_frames.append(df_subset)
    else:
        print(f'File not found for subject {subject_number}')

# Concatenate all DataFrames into a single DataFrame
data_1 = pd.concat(data_frames,axis=1)

# Print the shape of the combined data
print(data_1.shape)

# The ECG signal required to have some modification to converting it from raw signal values to SI unit.

In [None]:
import numpy as np
# Define the constants
chan_bit = 2 ** 16
vcc = 3
# Apply the equation to the dataset
data_wesad = data_1.applymap(lambda x: ((x / chan_bit - 0.5) * vcc) if not np.isnan(x) else np.nan)
# Print the updated dataset
print(data_wesad)

# Sample plotting of the ECG signal

In [None]:
import matplotlib.pyplot as plt

# Select the data for the first subject
subject_data = data_wesad.iloc[:550, 4]  # Assuming the first column represents the first subject

# Create a time axis for the plot
time_axis = range(550)

# Plot the data
plt.plot(time_axis, subject_data)
plt.xlabel('Time')
plt.ylabel('Voltage(mV)')
plt.title('Plot of First 1000 Data Points - Subject 1')
plt.show()

# Loading the start and end of every session(stress/baseline) for each subjects. The file WESAD_mins.xlsx was made by the author(based on the WESAD documentation) for the simplification of the process. 

In [None]:
import pandas as pd

# Specify the path to your .xlsx file
file_path = '/kaggle/input/wesad-dataset/WESAD_mins.xlsx'

# Read the .xlsx file into a DataFrame
df = pd.read_excel(file_path)
df_sub = df.iloc[:, 1:]
# Print the DataFrame
print(df_sub)

# Setting up the data points according to the baseline and stress time

In [None]:
num_intervals_base = 19
num_intervals_tsst = 9

base_interval = (df_sub['Base_end'] - df_sub['Base_start']) / (num_intervals_base + 1)
tsst_interval = (df_sub['Tsst_end'] - df_sub['TSST_Start']) / (num_intervals_tsst + 1)

# Create the new dataset
new_df = pd.DataFrame()
new_df['base_start'] = df_sub['Base_start']
for i in range(1, num_intervals_base + 1):
    new_df[f'base_s{i}'] = df_sub['Base_start'] + i * base_interval
new_df['base_end'] = df_sub['Base_end']
new_df['tsst_start'] = df_sub['TSST_Start']
for i in range(1, num_intervals_tsst + 1):
    new_df[f'tsst_s{i}'] = df_sub['TSST_Start'] + i * tsst_interval
new_df['tsst_end'] = df_sub['Tsst_end']
df_lebel=(new_df*700*60).T
# Print the new dataset
print(df_lebel)

# Observing the absolute lowest session segment data points in the dataset.

In [None]:
min_diffs = df_lebel.diff(axis=0).abs().min()

# Find the absolute lowest value among the minimum differences
absolute_lowest = np.floor(min_diffs.min()).astype(int)

# Print the absolute lowest value
print(absolute_lowest)

# Slicing every subjects data of same shape to avoid the mismatch in the data point. Also setting up the label.

In [None]:
import pandas as pd
import numpy as np

samples = []
lebels = []
# Iterate over the subjects
for subject_number in range(15):
    # Read the first dataset for the current subject
    df1 = data_wesad.iloc[:, subject_number] # Replace with your own file path
    df2 = df_lebel.iloc[:,subject_number]
    #print(subject_number)
    indices = df2.values.astype(int).flatten()
    # Iterate over the indices
    for i in range(len(indices) - 1):
        if i != 20:  # Exclude the 20th sample
            start = indices[i]
            end = start+absolute_lowest
            sample = df1.iloc[start-1:end].values  # Cut the sample from the first dataset
            sample = pd.Series(sample)
            samples.append(sample)
            #print(sample.shape)
    lebel=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1]
    lebels = lebels+lebel

# Concatenate the samples along a new axis
concatenated = pd.concat(samples,axis=1).T
lebel_all = pd.DataFrame(lebels)

# Print the shape of the concatenated dataset
print(concatenated.shape)
print(lebel_all.shape)

# Downsamplin the dataset from (700 Hz to 256 Hz to reduce the computationl cost and the improvement of the performance

In [None]:
import numpy as np
from scipy.signal import resample
original_dataset = concatenated.T

# Assuming original_dataset has shape (450, 40608)
downsampled_dataset = np.zeros((450, 256 * (40608 // 700)))  # Downsampling to 256 Hz
print(original_dataset[0].shape)
for i in range(original_dataset.shape[1]):
    original_signal = original_dataset[i]
    downsampled_signal = resample(original_signal, 256 * (40608 // 700))
    downsampled_dataset[i, :] = downsampled_signal
wesad_x=downsampled_dataset.T
print(wesad_x.shape)  # Output: (90, 58418)

# Observing the downsampled dataset.

In [None]:
import matplotlib.pyplot as plt

# Select the data for the first subject
wesad_x = pd.DataFrame(wesad_x)
subject_data = wesad_x.iloc[:2000, 4]  # Assuming the first column represents the first subject

# Create a time axis for the plot
time_axis = range(2000)

# Plot the data
plt.plot(time_axis, subject_data)
plt.xlabel('Time')
plt.ylabel('Voltage(mV)')
plt.title('Plot of First 1000 Data Points - Subject 1')
plt.show()

# Transposing the dataset.

In [None]:
X= wesad_x.T
y=lebel_all
print(y.shape)

# Slicing the dataset for making 10 sec. non-overlapping samples.

In [None]:
import pandas as pd
import numpy as np

# Assuming X is your DataFrame with shape (450, 14848)
# Split the DataFrame into two halves along the columns
first = X.iloc[:, :2475]
first1 = X.iloc[:, 2474:4949]
second = X.iloc[:, 4949:7424]
second1 = X.iloc[:, 7423:9898]
third = X.iloc[:, 9898:12373]
third1 = X.iloc[:, 12372:14847]

# Convert to NumPy arrays for stacking
first_half_np = first.to_numpy()
second_half_np = second.to_numpy()
third_half_np = third.to_numpy()
first_half_np1 = first1.to_numpy()
second_half_np1 = second1.to_numpy()
third_half_np1 = third1.to_numpy()
# Stack the two halves along the rows
result = np.vstack((first_half_np, first_half_np1, second_half_np,second_half_np1, third_half_np,  third_half_np1))

# Verify the shape of the resulting array
print(result.shape)  # This should print (900, 7424)


# Also setting the label for 10 sec. segments.

In [None]:
import numpy as np
repeated_y = np.tile(y, (6, 1))

# Verify the shape of the resulting array
print(repeated_y.shape) 

# Simple preprocessing with standard scaler, splitting the dataset into test and train subset. Observing the majority and minority class in the train dataset. 

In [None]:
features=result
labels=repeated_y
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Assuming filtered_data and repeated_y (as y) are already defined
X_train, X_test, y_train, y_test = train_test_split(features, repeated_y, test_size=0.3, shuffle=True, stratify=repeated_y)

# Scaling the data
scaling = StandardScaler()
X_train = scaling.fit_transform(X_train)
X_test = scaling.transform(X_test)

# Reshape y_train and y_test to 1D arrays for proper indexing
y_train = y_train.ravel()
y_test = y_test.ravel()

# Select majority and minority classes
X_majority = X_train[y_train == 0]
X_minority = X_train[y_train == 1]

print(X_minority.shape)
print(X_majority.shape)


# Minority data augmentaion using shifting method.

In [None]:
X_minority_augmented = np.zeros((2*X_minority.shape[0], 2304))
print(X_minority_augmented.shape)
j=0
i=0
#print(int(X_minority.shape[0]/2))
for i in range(int(X_minority.shape[0]/2)):
    i = i*2
    #print(i)
    x1=X_minority[i,:]
    x2=X_minority[i+1,:]
    x_combined = np.concatenate((X_minority[i,:], X_minority[i+1,:]))
    #print(x_combined.shape)
    x3 = x_combined[768:3072]
    x4 = x_combined[1792:4096]
    #print(x3.shape)
    #print(x2.shape)
        
    X_minority_augmented[j] = x1  #np.roll(x1, 2000)
    j=j+1    
    X_minority_augmented[j] = x2  #np.roll(x1, 2000)
    j=j+1
    X_minority_augmented[j] = x3  #np.roll(x1, 2000)
    j=j+1
    #x1=X_minority[i,:]
    X_minority_augmented[j] = x4 #np.roll(x1, 2000)x1
    j=j+1
    #i=i+1
    #print(i)
#print(i)
print(X_minority_augmented.shape)
X_train_augmented = np.vstack((X_majority, X_minority_augmented))
y_train_augmented = np.hstack((np.zeros(X_majority.shape[0]), np.ones(X_minority_augmented.shape[0])))
# shuffle the data

idx = np.random.permutation(X_train_augmented.shape[0])
#print(idx)
X_train_augmented = X_train_augmented[idx]
y_train_augmented = pd.DataFrame(y_train_augmented[idx])
print(X_train_augmented.shape)
#print(y_train_augmented[0])
X_train=X_train_augmented
y_train=y_train_augmented

# Proposed model architecture.

In [None]:
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, LSTM, Bidirectional, Dense, Dropout, BatchNormalization, GlobalMaxPooling1D, Attention, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2

input_layer = Input(shape=(2475, 1))

# Shared CNN layers
def shared_cnn(input_layer, filters1, kernel_size1, filters2, kernel_size2, filters3, kernel_size3):
    conv1 = Conv1D(filters=filters1, kernel_size=kernel_size1, activation='relu')(input_layer)
    maxpool1 = MaxPooling1D(pool_size=2)(conv1)
    batch_norm1 = BatchNormalization()(maxpool1)

    conv2 = Conv1D(filters=filters2, kernel_size=kernel_size2, activation='relu')(batch_norm1)
    maxpool2 = MaxPooling1D(pool_size=2)(conv2)
    batch_norm2 = BatchNormalization()(maxpool2)

    conv3 = Conv1D(filters=filters3, kernel_size=kernel_size3, activation='relu')(batch_norm2)
    maxpool3 = MaxPooling1D(pool_size=2)(conv3)
    batch_norm3 = BatchNormalization()(maxpool3)

    return batch_norm3

# Shared BiLSTM layer with attention
def shared_bilstm_with_attention(shared_cnn_output, units, return_sequences, num_heads):
    bilstm_output = Bidirectional(LSTM(units=units, return_sequences=return_sequences))(shared_cnn_output)
    attention_heads = []
    for _ in range(num_heads):
        attention_head = Attention()([bilstm_output, bilstm_output])
        attention_heads.append(attention_head)

    # Concatenate the outputs of attention heads
    multi_attention = Concatenate(axis=-1)(attention_heads)
    global_pool = GlobalMaxPooling1D()(bilstm_output)
    return global_pool

# Apply shared CNN and BiLSTM with attention to each input path with different parameters
shared_cnn_output1 = shared_cnn(input_layer, filters1=64, kernel_size1=10, filters2=128, kernel_size2=5, filters3=256, kernel_size3=3)
shared_bilstm_output1 = shared_bilstm_with_attention(shared_cnn_output1, units=64, return_sequences=True, num_heads=4)

shared_cnn_output2 = shared_cnn(input_layer, filters1=32, kernel_size1=14, filters2=64, kernel_size2=8, filters3=128, kernel_size3=5)
shared_bilstm_output2 = shared_bilstm_with_attention(shared_cnn_output2, units=32, return_sequences=True, num_heads=2)

# Concatenate the outputs of both paths
concatenated_output = Concatenate(axis=-1)([shared_bilstm_output1, shared_bilstm_output2])
#with kernel size 14, cnn, bilstm
# Dense layers
global_pool2 = GlobalMaxPooling1D()(shared_cnn_output2)
global_pool1 = GlobalMaxPooling1D()(shared_cnn_output1)
concatenated_output2 = Concatenate(axis=-1)([global_pool1, global_pool2])

dense1 = Dense(units=256, activation='relu', kernel_regularizer=l2(0.01))(shared_bilstm_output2)
batch_norm4 = BatchNormalization()(dense1)
drop1 = Dropout(0.4)(batch_norm4)

dense2 = Dense(units=128, activation='relu', kernel_regularizer=l2(0.01))(drop1)
batch_norm5 = BatchNormalization()(dense2)
drop2 = Dropout(0.4)(batch_norm5)

# Output layer
output = Dense(units=1, activation='sigmoid')(drop2)

# Model
model = Model(inputs=input_layer, outputs=output)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Training the model.

In [None]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
# Define early stopping and checkpoint callbacks
es = EarlyStopping(monitor='val_loss', mode='min', patience=30)
mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

# Train the model with early stopping and checkpoint callbacks
history = model.fit(X_train, y_train, epochs=200, batch_size=32, validation_data=(X_test, y_test), callbacks=[es, mc])

# Calculating the confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix
from keras.models import load_model
# Load the saved model
model = load_model('best_model.h5')

# Predict the class probabilities for the test set
y_pred = model.predict(X_test)

# Convert the probabilities into class labels using a threshold of 0.5
y_pred_classes = (y_pred > 0.5).astype(int)

# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)
print(cm)

# Plotting the Loss vs Epoch curve

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score

# Print loss vs. epoch curve
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Calculating the AUC score

In [None]:
# Calculate predictions for the test set
y_pred = model.predict(X_test)

# Calculate AUC score
auc_score = roc_auc_score(y_test, y_pred)

# Print AUC score
print('AUC Score:', auc_score)

# Plotting the ROC curve.

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score

# Calculate predictions for the test set
y_pred = model.predict(X_test)

# Calculate false positive rate, true positive rate, and thresholds
fpr, tpr, thresholds = roc_curve(y_test, y_pred)

# Calculate AUC score
auc_score = roc_auc_score(y_test, y_pred)

# Plot ROC curve
plt.plot(fpr, tpr, label='ROC Curve (AUC = {:.2f})'.format(auc_score))
plt.plot([0, 1], [0, 1], linestyle='--', color='r', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend()
plt.show()

# Feature Maps for 2 example showing seperately.

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model

# Assuming 'X_test' is your test data and 'model' is your trained model

# Specify the layers for which you want to visualize the feature maps
conv_layers = ['conv1d_3']  # Replace with actual layer names

# Create a new model that outputs the activations of the specified layers
layer_outputs = [model.get_layer(layer_name).output for layer_name in conv_layers]
activation_model = Model(inputs=model.input, outputs=layer_outputs)

# Number of examples you want to visualize
num_examples = 10  # You can change this to the number of examples you want to visualize

# Generate and visualize feature maps for example 1 and example 6
examples_to_visualize = [12,16]

for i in examples_to_visualize:
    input_tensor = tf.convert_to_tensor(X_test[i:i+1], dtype=tf.float32)
    true_class = y_test[i]  # Get the true class label for the example
    
    # Get the feature maps
    activations = activation_model.predict(input_tensor)
    
    # Get the predicted class
    prediction = model.predict(input_tensor)
    predicted_class = int(prediction > 0.5)  # Assuming binary classification with sigmoid

    # Define title color based on the true class
    title_color = 'purple' if true_class == 0 else 'orange'

    for layer_name, activation in zip(conv_layers, activations):
        num_filters = activation.shape[-1]
        num_cols = 8  # Number of columns for subplot
        num_rows = (num_filters + num_cols - 1) // num_cols  # Calculate number of rows needed

        plt.figure(figsize=(15, num_rows * 2))
        for j in range(num_filters):
            plt.subplot(num_rows, num_cols, j + 1)
            plt.plot(activation[:, j])  # Corrected indexing for 2D array
            plt.title(f'Filter {j}')
            plt.axis('off')
        plt.suptitle(
            f'Example {i+1} - True Class: {true_class} - Predicted Class: {predicted_class} - Feature maps for layer: {layer_name}', 
            fontsize=16, color=title_color
        )
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])  # Adjust layout to make room for the title
        plt.show()


# Feature maps of 2 example showing side by side. 

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model

# Assuming 'X_test' is your test data and 'model' is your trained model

# Specify the layers for which you want to visualize the feature maps
conv_layers = ['conv1d_3']  # Replace with actual layer names

# Create a new model that outputs the activations of the specified layers
layer_outputs = [model.get_layer(layer_name).output for layer_name in conv_layers]
activation_model = Model(inputs=model.input, outputs=layer_outputs)
colors = ['blue', 'red']
# Indices of the examples you want to visualize
examples_to_visualize = [12, 16]

# Ensure the selected examples exist in the dataset
if max(examples_to_visualize) >= len(X_test):
    raise ValueError("Example index out of range")

# Generate and visualize feature maps for the specified convolutional layer
for layer_name in conv_layers:
    # Collect activations for both examples
    activations = []
    for i in examples_to_visualize:
        input_tensor = tf.convert_to_tensor(X_test[i:i+1], dtype=tf.float32)
        activation = activation_model.predict(input_tensor)
        activations.append(activation[0])  # Extract the first (and only) batch element
    
    num_filters = activations[0].shape[-1]
    num_cols = 16  # Number of columns for subplot
    num_rows = (num_filters + num_cols - 1) // num_cols * len(examples_to_visualize)  # Calculate rows needed

    plt.figure(figsize=(24, num_rows * 2))
    for j in range(num_filters):
        for k, activation in enumerate(activations):
            plt.subplot(num_rows, num_cols, j * len(examples_to_visualize) + k + 1)
            plt.plot(activation[:, j], color=colors[k])
            if k == 0:
                plt.title(f'                     Filter {j}',fontsize = 16)
            plt.axis('off')
    plt.suptitle(
        f'Feature Maps for Layer: {layer_name} - True Positive (Ex. {examples_to_visualize[0]+1}) Vs True Negative (Ex. {examples_to_visualize[1]+1})', 
        fontsize=24
    )
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])  # Adjust layout to make room for the title : Ex. {examples_to_visualize[0]} Vs Ex. {examples_to_visualize[1]}
    plt.savefig('Feature_mapping_conv1d_3_wesad.png') 
    plt.show()


# Defining the function of UMAP

In [None]:
pip install umap-learn

In [None]:
from tensorflow.keras.models import load_model
import numpy as np
import umap
import matplotlib.pyplot as plt
import umap.umap_ as umap
from tensorflow.keras.models import load_model

# Assuming 'model' is your Keras model
for layer in model.layers:
    print(layer.name)

# UMAP Embeddign representation for raw ECG signals. 

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import umap

# Specify the layer name
layer_name = 'input_layer'  # Replace with your actual layer name

# Create a new model that outputs the activations of the specified layer
layer_output = model.get_layer(layer_name).output
activation_model = Model(inputs=model.input, outputs=layer_output)

# Get feature maps for all test data
feature_maps = activation_model.predict(X_test)
# Flatten feature maps to 2D array (samples, features)
num_samples = feature_maps.shape[0]
num_features = np.prod(feature_maps.shape[1:])  # Flatten all dimensions except the batch dimension
flattened_feature_maps = feature_maps.reshape(num_samples, num_features)

# Run UMAP to reduce to 2D
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embedding = reducer.fit_transform(flattened_feature_maps)


# Assuming you have true positive and true negative labels
tp_indices = np.where(y_test == 1)[0]
tn_indices = np.where(y_test == 0)[0]

plt.figure(figsize=(5, 4))
plt.scatter(umap_embedding[tp_indices, 0], umap_embedding[tp_indices, 1], color='red', label='Positives', s=10)
plt.scatter(umap_embedding[tn_indices, 0], umap_embedding[tn_indices, 1], color='blue', label='Negatives', s=10)
plt.title(f'UMAP Rep. of Test Data')
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.legend()
# Save the plot as an image file
plt.savefig('umap_feature_maps1.png')  # You can change the filename and format as needed

plt.show()

# UMAP Embedding representation of batch_normalization_2 (3rd cnn of block 1) layer embedding.

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import umap

# Specify the layer name
layer_name = 'batch_normalization_2'  # Replace with your actual layer name

# Create a new model that outputs the activations of the specified layer
layer_output = model.get_layer(layer_name).output
activation_model = Model(inputs=model.input, outputs=layer_output)

# Get feature maps for all test data
feature_maps = activation_model.predict(X_test)
# Flatten feature maps to 2D array (samples, features)
num_samples = feature_maps.shape[0]
num_features = np.prod(feature_maps.shape[1:])  # Flatten all dimensions except the batch dimension
flattened_feature_maps = feature_maps.reshape(num_samples, num_features)

# Run UMAP to reduce to 2D
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embedding = reducer.fit_transform(flattened_feature_maps)


# Assuming you have true positive and true negative labels
tp_indices = np.where(y_test == 1)[0]
tn_indices = np.where(y_test == 0)[0]

plt.figure(figsize=(5, 4))
plt.scatter(umap_embedding[tp_indices, 0], umap_embedding[tp_indices, 1], color='red', label='Positives', s=10)
plt.scatter(umap_embedding[tn_indices, 0], umap_embedding[tn_indices, 1], color='blue', label='Negatives', s=10)
plt.title(f'UMAP Features Rep. of CNN Layers')
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.legend()
# Save the plot as an image file
plt.savefig('umap_feature_cnn1.png')  # You can change the filename and format as needed

plt.show()

# UMAP Embedding representation of batch_normalization_5 (3rd cnn of block 2) layer embedding

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import umap

# Specify the layer name
layer_name = 'batch_normalization_5'  # Replace with your actual layer name

# Create a new model that outputs the activations of the specified layer
layer_output = model.get_layer(layer_name).output
activation_model = Model(inputs=model.input, outputs=layer_output)

# Get feature maps for all test data
feature_maps = activation_model.predict(X_test)
# Flatten feature maps to 2D array (samples, features)
num_samples = feature_maps.shape[0]
num_features = np.prod(feature_maps.shape[1:])  # Flatten all dimensions except the batch dimension
flattened_feature_maps = feature_maps.reshape(num_samples, num_features)

# Run UMAP to reduce to 2D
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embedding = reducer.fit_transform(flattened_feature_maps)


# Assuming you have true positive and true negative labels
tp_indices = np.where(y_test == 1)[0]
tn_indices = np.where(y_test == 0)[0]

plt.figure(figsize=(5, 4))
plt.scatter(umap_embedding[tp_indices, 0], umap_embedding[tp_indices, 1], color='red', label='Positives', s=10)
plt.scatter(umap_embedding[tn_indices, 0], umap_embedding[tn_indices, 1], color='blue', label='Negatives', s=10)
plt.title(f'UMAP Features Rep. of CNN Layers')
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.legend()
# Save the plot as an image file
plt.savefig('umap_feature_cnn2.png')  # You can change the filename and format as needed

plt.show()

# UMAP Embedding representation of bilstm(bilstm of block 1) layer embedding¶

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import umap

# Specify the layer name
layer_name = 'bidirectional'  # Replace with your actual layer name

# Create a new model that outputs the activations of the specified layer
layer_output = model.get_layer(layer_name).output
activation_model = Model(inputs=model.input, outputs=layer_output)

# Get feature maps for all test data
feature_maps = activation_model.predict(X_test)
# Flatten feature maps to 2D array (samples, features)
num_samples = feature_maps.shape[0]
num_features = np.prod(feature_maps.shape[1:])  # Flatten all dimensions except the batch dimension
flattened_feature_maps = feature_maps.reshape(num_samples, num_features)

# Run UMAP to reduce to 2D
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embedding = reducer.fit_transform(flattened_feature_maps)


# Assuming you have true positive and true negative labels
tp_indices = np.where(y_test == 1)[0]
tn_indices = np.where(y_test == 0)[0]

plt.figure(figsize=(5, 4))
plt.scatter(umap_embedding[tp_indices, 0], umap_embedding[tp_indices, 1], color='red', label='Positives', s=10)
plt.scatter(umap_embedding[tn_indices, 0], umap_embedding[tn_indices, 1], color='blue', label='Negatives', s=10)
plt.title(f'UMAP Features Rep. of BiLSTM Layer')
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.legend()
# Save the plot as an image file
plt.savefig('umap_feature_bilstm1.png')  # You can change the filename and format as needed

plt.show()

# UMAP Embedding representation of bilstm(bilstm of block 2) layer embedding

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import umap

# Specify the layer name
layer_name = 'bidirectional_1'  # Replace with your actual layer name

# Create a new model that outputs the activations of the specified layer
layer_output = model.get_layer(layer_name).output
activation_model = Model(inputs=model.input, outputs=layer_output)

# Get feature maps for all test data
feature_maps = activation_model.predict(X_test)
# Flatten feature maps to 2D array (samples, features)
num_samples = feature_maps.shape[0]
num_features = np.prod(feature_maps.shape[1:])  # Flatten all dimensions except the batch dimension
flattened_feature_maps = feature_maps.reshape(num_samples, num_features)

# Run UMAP to reduce to 2D
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embedding = reducer.fit_transform(flattened_feature_maps)


# Assuming you have true positive and true negative labels
tp_indices = np.where(y_test == 1)[0]
tn_indices = np.where(y_test == 0)[0]

plt.figure(figsize=(5, 4))
plt.scatter(umap_embedding[tp_indices, 0], umap_embedding[tp_indices, 1], color='red', label='Positives', s=10)
plt.scatter(umap_embedding[tn_indices, 0], umap_embedding[tn_indices, 1], color='blue', label='Negatives', s=10)
plt.title(f'UMAP Features Rep. of BiLSTM Layer')
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.legend()
# Save the plot as an image file
plt.savefig('umap_feature_bilstm2.png')  # You can change the filename and format as needed

plt.show()

# UMAP Embedding representation of attention mechanism (block 1) layer embedding

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import umap

# Specify the layer name
layer_name = 'concatenate'  # Replace with your actual layer name

# Create a new model that outputs the activations of the specified layer
layer_output = model.get_layer(layer_name).output
activation_model = Model(inputs=model.input, outputs=layer_output)

# Get feature maps for all test data
feature_maps = activation_model.predict(X_test)
# Flatten feature maps to 2D array (samples, features)
num_samples = feature_maps.shape[0]
num_features = np.prod(feature_maps.shape[1:])  # Flatten all dimensions except the batch dimension
flattened_feature_maps = feature_maps.reshape(num_samples, num_features)

# Run UMAP to reduce to 2D
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embedding = reducer.fit_transform(flattened_feature_maps)


# Assuming you have true positive and true negative labels
tp_indices = np.where(y_test == 1)[0]
tn_indices = np.where(y_test == 0)[0]

plt.figure(figsize=(5, 4))
plt.scatter(umap_embedding[tp_indices, 0], umap_embedding[tp_indices, 1], color='red', label='Positives', s=10)
plt.scatter(umap_embedding[tn_indices, 0], umap_embedding[tn_indices, 1], color='blue', label='Negatives', s=10)
plt.title(f'UMAP Features Rep. of MHA Layer')
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.legend()
# Save the plot as an image file
plt.savefig('umap_feature_attention1.png')  # You can change the filename and format as needed

plt.show()

# UMAP Embedding representation of bilstm (block 1) layer embedding

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import umap

# Specify the layer name
layer_name = 'concatenate_1'  # Replace with your actual layer name

# Create a new model that outputs the activations of the specified layer
layer_output = model.get_layer(layer_name).output
activation_model = Model(inputs=model.input, outputs=layer_output)

# Get feature maps for all test data
feature_maps = activation_model.predict(X_test)
# Flatten feature maps to 2D array (samples, features)
num_samples = feature_maps.shape[0]
num_features = np.prod(feature_maps.shape[1:])  # Flatten all dimensions except the batch dimension
flattened_feature_maps = feature_maps.reshape(num_samples, num_features)

# Run UMAP to reduce to 2D
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embedding = reducer.fit_transform(flattened_feature_maps)


# Assuming you have true positive and true negative labels
tp_indices = np.where(y_test == 1)[0]
tn_indices = np.where(y_test == 0)[0]

plt.figure(figsize=(5, 4))
plt.scatter(umap_embedding[tp_indices, 0], umap_embedding[tp_indices, 1], color='red', label='Positives', s=10)
plt.scatter(umap_embedding[tn_indices, 0], umap_embedding[tn_indices, 1], color='blue', label='Negatives', s=10)
plt.title(f'UMAP Features Rep. of MHA Layer')
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.legend()
# Save the plot as an image file
plt.savefig('umap_feature_attention2.png')  # You can change the filename and format as needed

plt.show()

# UMAP Embedding representation of dropout_1 (output block dense layer 2) layer embedding

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import umap

# Specify the layer name
layer_name = 'dropout_1'  # Replace with your actual layer name

# Create a new model that outputs the activations of the specified layer
layer_output = model.get_layer(layer_name).output
activation_model = Model(inputs=model.input, outputs=layer_output)

# Get feature maps for all test data
feature_maps = activation_model.predict(X_test)
# Flatten feature maps to 2D array (samples, features)
num_samples = feature_maps.shape[0]
num_features = np.prod(feature_maps.shape[1:])  # Flatten all dimensions except the batch dimension
flattened_feature_maps = feature_maps.reshape(num_samples, num_features)

# Run UMAP to reduce to 2D
reducer = umap.UMAP(n_components=2, random_state=42)
umap_embedding = reducer.fit_transform(flattened_feature_maps)


# Assuming you have true positive and true negative labels
tp_indices = np.where(y_test == 1)[0]
tn_indices = np.where(y_test == 0)[0]

plt.figure(figsize=(5, 4))
plt.scatter(umap_embedding[tp_indices, 0], umap_embedding[tp_indices, 1], color='red', label='Positives', s=10)
plt.scatter(umap_embedding[tn_indices, 0], umap_embedding[tn_indices, 1], color='blue', label='Negatives', s=10)
plt.title(f'UMAP Features Rep. of Dense Layer 2')
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.legend()
# Save the plot as an image file
plt.savefig('umap_feature_dense2.png')  # You can change the filename and format as needed

plt.show()