In [None]:
import os
import pandas as pd

# List to store individual subject DataFrames
data_frames = []

# Iterate over the subject numbers
for subject_number in range(2, 18):
    # Construct the file path for each subject
    file_path = f'/kaggle/input/wesad-dataset/S{subject_number}_respiban.txt'
    
    # Check if the file exists
    if os.path.exists(file_path):
        # Read the file into a DataFrame
        df = pd.read_csv(file_path, delimiter='\t', skiprows=3, header=None)
        # Select the first three columns
        df_subset = df.iloc[:, 2]
        # Append the DataFrame to the list
        #print(df_subset.shape)
 
        df_subset.columns = [f'Subject_{subject_number}']
        
        data_frames.append(df_subset)
    else:
        print(f'File not found for subject {subject_number}')

# Concatenate all DataFrames into a single DataFrame
data_1 = pd.concat(data_frames,axis=1)

# Print the shape of the combined data
print(data_1.shape)


In [None]:
import numpy as np
# Define the constants
chan_bit = 2 ** 16
vcc = 3
# Apply the equation to the dataset
data_wesad = data_1.applymap(lambda x: ((x / chan_bit - 0.5) * vcc) if not np.isnan(x) else np.nan)
# Print the updated dataset
print(data_wesad)


In [None]:
import matplotlib.pyplot as plt

# Select the data for the first subject
subject_data = data_wesad.iloc[:550, 4]  # Assuming the first column represents the first subject

# Create a time axis for the plot
time_axis = range(550)

# Plot the data
plt.plot(time_axis, subject_data)
plt.xlabel('Time')
plt.ylabel('Voltage(mV)')
plt.title('Plot of First 1000 Data Points - Subject 1')
plt.show()

In [None]:
import pandas as pd

# Specify the path to your .xlsx file
file_path = '/kaggle/input/wesad-dataset/WESAD_mins.xlsx'

# Read the .xlsx file into a DataFrame
df = pd.read_excel(file_path)
df_sub = df.iloc[:, 1:]
# Print the DataFrame
print(df_sub)

In [None]:
num_intervals_base = 119
num_intervals_tsst = 59

base_interval = (df_sub['Base_end'] - df_sub['Base_start']) / (num_intervals_base + 1)
tsst_interval = (df_sub['Tsst_end'] - df_sub['TSST_Start']) / (num_intervals_tsst + 1)

# Create the new dataset
new_df = pd.DataFrame()
new_df['base_start'] = df_sub['Base_start']
for i in range(1, num_intervals_base + 1):
    new_df[f'base_s{i}'] = df_sub['Base_start'] + i * base_interval
new_df['base_end'] = df_sub['Base_end']
new_df['tsst_start'] = df_sub['TSST_Start']
for i in range(1, num_intervals_tsst + 1):
    new_df[f'tsst_s{i}'] = df_sub['TSST_Start'] + i * tsst_interval
new_df['tsst_end'] = df_sub['Tsst_end']
df_lebel=(new_df*700*60).T
# Print the new dataset
print(df_lebel.shape)


In [None]:
min_diffs = df_lebel.diff(axis=0).abs().min()

# Find the absolute lowest value among the minimum differences
absolute_lowest = np.floor(min_diffs.min()).astype(int)

# Print the absolute lowest value
print(absolute_lowest)

In [None]:
import pandas as pd
import numpy as np

samples = []
lebels = []
# Iterate over the subjects
for subject_number in range(15):
    # Read the first dataset for the current subject
    df1 = data_wesad.iloc[:, subject_number] # Replace with your own file path
    df2 = df_lebel.iloc[:,subject_number]
    #print(subject_number)
    indices = df2.values.astype(int).flatten()
    # Iterate over the indices
    for i in range(len(indices) - 1):
        if i != 20:  # Exclude the 20th sample
            start = indices[i]
            end = start+absolute_lowest
            sample = df1.iloc[start-1:end].values  # Cut the sample from the first dataset
            sample = pd.Series(sample)
            samples.append(sample)
            #print(sample.shape)
    lebel=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
           1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,]
    lebels = lebels+lebel

# Concatenate the samples along a new axis
concatenated = pd.concat(samples,axis=1).T
lebel_all = pd.DataFrame(lebels)

# Print the shape of the concatenated dataset
print(concatenated.shape)
print(lebel_all.shape)

# downsampling

In [None]:
import numpy as np
from scipy.signal import resample
original_dataset = concatenated.T

# Assuming original_dataset has shape (450, 40608)
downsampled_dataset = np.zeros((2700, 256 * (6768 // 700)))  # Downsampling to 256 Hz
print(original_dataset[0].shape)
for i in range(original_dataset.shape[1]):
    original_signal = original_dataset[i]
    downsampled_signal = resample(original_signal, 256 * (6768 // 700))
    downsampled_dataset[i, :] = downsampled_signal
wesad_x=downsampled_dataset.T
print(wesad_x.shape)  # Output: (90, 58418)

In [None]:
import numpy as np
print(np.zeros((450,  40608 )).shape)

In [None]:
import matplotlib.pyplot as plt

# Select the data for the first subject
wesad_x = pd.DataFrame(wesad_x)
subject_data = wesad_x.iloc[:2000, 4]  # Assuming the first column represents the first subject

# Create a time axis for the plot
time_axis = range(2000)

# Plot the data
plt.plot(time_axis, subject_data)
plt.xlabel('Time')
plt.ylabel('Voltage(mV)')
plt.title('Plot of First 1000 Data Points - Subject 1')
plt.show()

In [None]:
X= wesad_x.T
y=lebel_all

In [None]:
features=X
labels=y

# architecture Trasnformer

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import LayerNormalization, MultiHeadAttention, Add, Dense, Input, Conv1D, MaxPooling1D, BatchNormalization, Dropout, GlobalMaxPooling1D, Concatenate, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
import numpy as np
from tensorflow.keras.layers import Bidirectional, LSTM
from keras.layers import Conv1D, MaxPooling1D, BatchNormalization, Add, Concatenate, ZeroPadding1D
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, BatchNormalization, SeparableConv1D, Bidirectional, LSTM, Dense, Add, Concatenate, GlobalAveragePooling1D, Reshape, Attention
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import GlobalAveragePooling1D, Dense, Multiply
from tensorflow.keras.layers import SeparableConv1D, AveragePooling1D, BatchNormalization, Concatenate

def se_block(input_layer, reduction_ratio=4):
    filters = input_layer.shape[-1]
    se = GlobalAveragePooling1D()(input_layer)
    se = Dense(filters // reduction_ratio, activation='relu')(se)
    se = Dense(filters, activation='sigmoid')(se)
    return Multiply()([input_layer, se])

class PositionalEncoding(Layer):
    def __init__(self, position, d_model):
        super(PositionalEncoding, self).__init__()
        self.position = position
        self.d_model = d_model

    def get_angles(self, pos, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
        return pos * angle_rates

    def call(self, inputs):
        seq_length = tf.shape(inputs)[1]
        angles = self.get_angles(np.arange(self.position)[:, np.newaxis],
                                 np.arange(self.d_model)[np.newaxis, :],
                                 self.d_model)

        # Apply sine to even indices in the array; 2i
        angles[:, 0::2] = np.sin(angles[:, 0::2])

        # Apply cosine to odd indices in the array; 2i+1
        angles[:, 1::2] = np.cos(angles[:, 1::2])

        pos_encoding = angles[np.newaxis, ...]
        pos_encoding = tf.cast(pos_encoding, dtype=tf.float32)

        # Scale the input (optional, closer to original implementation)
        inputs *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))

        return inputs + pos_encoding[:, :seq_length, :]



def shared_cnn(input_layer, filters1, kernel_size1, filters2, kernel_size2, filters3, kernel_size3):
    # First CNN block
    conv1 = Conv1D(filters=filters1, kernel_size=kernel_size1, activation='relu')(input_layer)
    # Apply SE block after conv2
    conv1 = se_block(conv1)
    maxpool1 = AveragePooling1D(pool_size=2)(conv1)
    batch_norm1 = BatchNormalization()(maxpool1)
    
    # Second CNN block
    conv2 = Conv1D(filters=filters2, kernel_size=kernel_size2, activation='relu')(batch_norm1)
    # Apply SE block after conv2
    conv2 = se_block(conv2)
    maxpool2 = AveragePooling1D(pool_size=2)(conv2)
    batch_norm2 = BatchNormalization()(maxpool2)
    
    conv3 = Conv1D(filters=filters3, kernel_size=kernel_size3, activation='relu')(batch_norm2)
    maxpool3 = AveragePooling1D(pool_size=2)(conv3)
    batch_norm3 = BatchNormalization()(maxpool3)
    
    return batch_norm3


def hybrid_depthwise_cnn(input_layer, filters1, kernel_size1, filters2, kernel_size2, filters3, kernel_size3):
    conv1 = SeparableConv1D(filters=filters1, kernel_size=kernel_size1, activation='relu')(input_layer)
    conv1 = se_block(conv1)
    maxpool1 = AveragePooling1D(pool_size=2)(conv1)
    batch_norm1 = BatchNormalization()(maxpool1)

    conv2 = SeparableConv1D(filters=filters2, kernel_size=kernel_size2, activation='relu')(batch_norm1)
    conv2 = se_block(conv2)
    maxpool2 = AveragePooling1D(pool_size=2)(conv2)
    batch_norm2 = BatchNormalization()(maxpool2)

    conv3 = SeparableConv1D(filters=filters3, kernel_size=kernel_size3, activation='relu')(batch_norm2)
    #conv3 = se_block(conv3)
    maxpool3 = AveragePooling1D(pool_size=2)(conv3)
    batch_norm3 = BatchNormalization()(maxpool3)

    return batch_norm3


def transformer_encoder(input_layer, num_heads, units):
    # Layer normalization before self-attention (optional change for Pre-Norm)
    norm_input = LayerNormalization()(input_layer)
    
    # Self-Attention
    attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=units)(norm_input, norm_input)
    attention_output = Add()([input_layer, attention_output])  # Residual connection

    # Layer normalization before FFN (optional change for Pre-Norm)
    norm_attention = LayerNormalization()(attention_output)
    
    # Feed-forward neural network with expansion
    ffn = Dense(4 * units, activation='relu')(norm_attention)  # Expansion
    ffn_output = Dense(units)(ffn)  # Reduce back to `units`
    ffn_output = Add()([attention_output, ffn_output])  # Residual connection
    transformer_output = LayerNormalization()(ffn_output)

    return transformer_output



In [None]:
k_num = 15

In [None]:
features=X
labels=y

In [None]:
print(features.shape)
print(labels.shape)

# Training with Data Augmentation

In [None]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import MultiHeadAttention, Flatten


# Initialize KFold with 8 splits
kf = KFold(n_splits=(k_num), shuffle=False, random_state=None)

# Initialize lists to store the history and results of each fold
all_fold_histories = []
all_fold_accuracies = []


fold = 1
for train_index, test_index in kf.split(features):
        
    tf.keras.backend.clear_session()
    print(f"\nTraining on Fold 2nd {fold}...")
    print(test_index)
    # Split the data
    X_train, X_test = features.T[train_index], features.T[test_index]
    y_train, y_test = labels.T[train_index], labels.T[test_index]
    print(X_train.shape)
    print(X_test.shape)
    # Standard scaling
    X_train = X_train.T
    X_test = X_test.T
    scaling = StandardScaler()
    X_train = scaling.fit_transform(X_train)
    X_test = scaling.transform(X_test)

    # Reshape y_train and y_test to 1D arrays for proper indexing
    y_train = y_train.to_numpy().ravel()  # Convert to NumPy and flatten
    y_test = y_test.to_numpy().ravel()    # Convert to NumPy and flatten
#data augmentation
    X_majority = X_train[y_train == 0]
    X_minority = X_train[y_train == 1]

    print(X_minority.shape)
    print(X_majority.shape)
    minority_indices = np.where(y_train == 1)[0]  # Indices where y_train == 1
    X_minority_augmented = np.zeros((2*X_minority.shape[0], 2304))
    print(X_minority_augmented.shape)
    j=0
    i=0
#print(int(X_minority.shape[0]/2))
    for i in range(int(X_minority.shape[0]/2)):
        i = i*2
    #print(i)
        x1=X_minority[i,:]
        x2=X_minority[i+1,:]
        x_combined = np.concatenate((X_minority[i,:], X_minority[i+1,:]))
    #print(x_combined.shape)
        x3 = x_combined[768:3072]
        x4 = x_combined[1792:4096]
        
        X_minority_augmented[j] = x1  #np.roll(x1, 2000)
        j=j+1    
        X_minority_augmented[j] = x2  #np.roll(x1, 2000)
        j=j+1
        X_minority_augmented[j] = x3  #np.roll(x1, 2000)
        j=j+1
    #x1=X_minority[i,:]
        X_minority_augmented[j] = x4 #np.roll(x1, 2000)x1
        j=j+1
    #i=i+1
    #print(i)
#print(i)
    print(X_minority_augmented.shape)
    X_train_augmented = np.vstack((X_majority, X_minority_augmented))
    y_train_augmented = np.hstack((np.zeros(X_majority.shape[0]), np.ones(X_minority_augmented.shape[0])))
# shuffle the data

    idx = np.random.permutation(X_train_augmented.shape[0])
#print(idx)
    X_train_augmented = X_train_augmented[idx]
    y_train_augmented = pd.DataFrame(y_train_augmented[idx])
    print(X_train_augmented.shape)
#print(y_train_augmented[0])
    X_train=X_train_augmented
    y_train=y_train_augmented #augmentation
    # Define early stopping

# Input layer, this shape is 2304 as a few subjects had fewer data points. To match that this was choosen
    input_layer = Input(shape=(2304, 1))

# Apply shared CNNs to the input
    shared_cnn_output2 = shared_cnn(input_layer, filters1=32, kernel_size1=14, filters2=64, kernel_size2=8, filters3=128, kernel_size3=5)
# Apply hybrid CNN
    hybrid_cnn_output2 = hybrid_depthwise_cnn(input_layer, filters1=32, kernel_size1=14, filters2=64, kernel_size2=8, filters3=128, kernel_size3=5)
# Concatenate CNN outputs 
    combined_output2 = Add()([shared_cnn_output2, hybrid_cnn_output2])
    combined_output2 = se_block(combined_output2)
# Apply a Bidirectional LSTM layer after each CNN output
    bilstm_output2 = Bidirectional(LSTM(units=64, return_sequences=True))(combined_output2)
# Apply Positional Encoding after LSTM outputs
    pos_encoder2 = PositionalEncoding(position=2560, d_model=128)(bilstm_output2)   # d_model matches LSTM output size
# Apply Transformer Encoder to each Positional Encoding output
    transformer_output2 = transformer_encoder(pos_encoder2, num_heads=4, units=128)

    transformer_output2_2 = transformer_encoder(transformer_output2, num_heads=2, units=128)

# Apply a second Transformer Encoder layer if needed
#transformer_output1_2 = transformer_encoder(transformer_output1_1, num_heads=2, units=256)
    transformer_output2_3 = transformer_encoder(transformer_output2_2, num_heads=2, units=128)
    transformer_output2_4 = transformer_encoder(transformer_output2_3, num_heads=2, units=128)
    transformer_output2_5 = transformer_encoder(transformer_output2_4, num_heads=2, units=128)
    transformer_output2_6 = transformer_encoder(transformer_output2_5, num_heads=2, units=128)
    transformer_output2_7 = transformer_encoder(transformer_output2_6, num_heads=2, units=128)
    transformer_output2_8 = transformer_encoder(transformer_output2_7, num_heads=2, units=128)

# Residual connection
    residual_output2 = Add()([transformer_output2_4, bilstm_output2])
# Apply Global Max Pooling to flatten the outputs
    pooled_output2 = GlobalAveragePooling1D()(residual_output2)

# Dense layers
    dense1 = Dense(units=256, activation='relu', kernel_regularizer=l2(0.01))(pooled_output2)
    batch_norm4 = BatchNormalization()(dense1)
    drop1 = Dropout(0.4)(batch_norm4)
    dense2 = Dense(units=128, activation='relu', kernel_regularizer=l2(0.01))(drop1)
    batch_norm5 = BatchNormalization()(dense2)
    drop2 = Dropout(0.4)(batch_norm5)
# Output layer for binary classification
    output = Dense(units=1, activation='sigmoid')(drop2)
# Build and compile the model
    model = Model(inputs=input_layer, outputs=output)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    es = EarlyStopping(monitor='val_loss', mode='min', patience=80)

    # Check shapes
    print(f"Fold {fold} - X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")

    # Define a unique filename for each fold
    checkpoint_filename = f'best_model_fold_3nd{fold}.keras'
    mc = ModelCheckpoint(checkpoint_filename, monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

    # Train the model with early stopping and a unique checkpoint for each fold
    try:
        history = model.fit(X_train, y_train, epochs=100, batch_size=64,
                            validation_data=(X_test, y_test), callbacks= [es, mc])

        # Save the history and validation accuracy for this fold
        all_fold_histories.append(history.history)
        all_fold_accuracies.append(max(history.history['val_accuracy']))
        
        print(f"Fold {fold} - Validation accuracy: {all_fold_accuracies[-1]}")
    except Exception as e:
        print(f"Error during training Fold {fold}: {e}")
    
    tf.keras.backend.clear_session()
    fold += 1

# Print or analyze the results
print("\nCross-validation accuracies for each fold:", all_fold_accuracies)
print("Average cross-validation accuracy:", np.mean(all_fold_accuracies))


# Prediction/evaluation

In [None]:
from keras import layers
import keras
import numpy as np
import tensorflow as tf

@keras.saving.register_keras_serializable(package="Custom", name="PositionalEncoding")
class PositionalEncoding(layers.Layer):
    def __init__(self, position, d_model, **kwargs):
        super(PositionalEncoding, self).__init__(**kwargs)
        self.position = position
        self.d_model = d_model

    def get_angles(self, pos, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
        return pos * angle_rates

    def call(self, inputs):
        seq_length = tf.shape(inputs)[1]
        angles = self.get_angles(np.arange(self.position)[:, np.newaxis],
                                 np.arange(self.d_model)[np.newaxis, :],
                                 self.d_model)

        # Apply sine to even indices in the array; 2i
        angles[:, 0::2] = np.sin(angles[:, 0::2])

        # Apply cosine to odd indices in the array; 2i+1
        angles[:, 1::2] = np.cos(angles[:, 1::2])

        pos_encoding = angles[np.newaxis, ...]
        pos_encoding = tf.cast(pos_encoding, dtype=tf.float32)

        # Scale the input (optional, closer to original implementation)
        inputs *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))

        return inputs + pos_encoding[:, :seq_length, :]

    def get_config(self):
        config = super().get_config()
        config.update({
            "position": self.position,
            "d_model": self.d_model,
        })
        return config


In [None]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import cohen_kappa_score

kappa_scores = []
# Initialize KFold with 4 splits
kf = KFold(n_splits=k_num, shuffle=False, random_state=None)

fold = 1
for train_index, test_index in kf.split(features):
    print(f"\nResult on Fold {fold}...")

    X_train, X_test = features.T[train_index], features.T[test_index]
    y_train, y_test = labels.T[train_index], labels.T[test_index]
    
    print(X_train.shape)
    print(X_test.shape)
    # Standard scaling
    X_train = X_train.T
    X_test = X_test.T
    scaling = StandardScaler()
    X_train = scaling.fit_transform(X_train)
    X_test = scaling.transform(X_test)

    # Reshape y_train and y_test to 1D arrays for proper indexing
    y_train = y_train.to_numpy().ravel()  # Convert to NumPy and flatten
    y_test = y_test.to_numpy().ravel()    # Convert to NumPy and flatten


    #print(fold)    # Load the best model for this fold to evaluate
    model_fold = load_model(f'best_model_fold_3nd{fold}.keras', custom_objects={"PositionalEncoding": PositionalEncoding})

        # Get the predicted probabilities for the test set
    y_pred_probs = model_fold.predict(X_test)

        # Convert predicted probabilities to binary class labels (0 or 1)
    y_pred_classes = (y_pred_probs > 0.5).astype(int).flatten()

        # Generate the confusion matrix
    cm = confusion_matrix(y_test, y_pred_classes)
    print("Confusion Matrix for Fold {}:\n".format(fold), cm)

        # Print the classification report for precision, recall, f1-score
    report = classification_report(y_test, y_pred_classes, target_names=['Class 0', 'Class 1'])
    print("\nClassification Report for Fold {}:\n".format(fold), report)

    # Compute ROC curve and ROC area
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_probs)
    roc_auc = auc(fpr, tpr)

    # Plotting the ROC curve
    plt.figure(figsize=(10, 6))
    plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC Curve (area = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')  # Diagonal line
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'Receiver Operating Characteristic for Fold {fold + 1}')
    plt.legend(loc='lower right')
    plt.grid()
    plt.show()

    # Optionally print AUC value
    print(f'Fold {fold + 1} - AUC: {roc_auc:.2f}')
    # Calculate and store Kappa for this fold
    kappa = cohen_kappa_score(y_test, y_pred_classes)
    kappa_scores.append(kappa)
    print(f"Kappa for this fold: {kappa}")
    fold += 1

# Print or analyze the results
print("\nCross-validation accuracies for each fold:", all_fold_accuracies)
print("Average cross-validation accuracy:", np.mean(all_fold_accuracies))

# Average Kappa across all folds
print("Average Kappa across folds:", np.mean(kappa_scores))


In [None]:
print("Number of folds in all_fold_histories:", len(all_fold_histories))

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score

# Assuming all_fold_histories contains the history from each fold
for fold in range(1, k_num+1):  # Adjust the range based on your number of folds
    history = all_fold_histories[fold - 1]  # Get the history for the current fold

    # Plotting accuracy
    plt.figure(figsize=(12, 6))
    
    plt.subplot(1, 2, 1)
    plt.plot(history['accuracy'], label='Training Accuracy')
    plt.plot(history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'Fold {fold} - Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Plotting loss
    plt.subplot(1, 2, 2)
    plt.plot(history['loss'], label='Training Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title(f'Fold {fold} - Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.suptitle(f'Fold {fold} Performance')
    plt.show()
