In [3]:
import pandas as pd
combined_data= pd.read_csv('combined_data.csv')

# NO RFE

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from tensorflow.keras.regularizers import l2
from tensorflow.keras.constraints import max_norm


# Preprocessing
X = combined_data.drop(columns=['Segment', 'Subject NO.', 'Gender'])
y_binary = combined_data['Segment'].apply(lambda x: 0 if x == 'EO' else 1)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.3, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshape the data for LSTM input [samples, timesteps, features]
X_train_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

# Build an improved Hybrid LSTM + NN Model
model = Sequential()

# Bidirectional LSTM Layer with max_norm constraint to prevent exploding gradients
model.add(Bidirectional(LSTM(128, return_sequences=True, kernel_constraint=max_norm(3)), input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])))
model.add(Dropout(0.5))  # Increased Dropout rate to reduce overfitting

# Second LSTM Layer for added depth
model.add(LSTM(64, return_sequences=False, kernel_constraint=max_norm(3)))
model.add(Dropout(0.4))

# Fully Connected (Dense) Layers with L2 regularization and Dropout
model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.4))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.4))

# Output Layer (sigmoid for binary classification)
model.add(Dense(1, activation='sigmoid'))

# Compile the Model with Adam optimizer and a smaller learning rate
optimizer = Adam(learning_rate=0.0003, clipvalue=1.0)  # Gradient clipping added
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Learning rate scheduler and early stopping
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model for more epochs with smaller batch size and callbacks
model.fit(X_train_lstm, y_train, epochs=150, batch_size=16, verbose=1, validation_data=(X_test_lstm, y_test), 
          callbacks=[reduce_lr, early_stopping])

# Evaluate the model
binary_predictions = model.predict(X_test_lstm)
binary_predictions = (binary_predictions > 0.5).astype(int)

# Calculate accuracy and F1 score
binary_acc = accuracy_score(y_test, binary_predictions)
binary_f1 = f1_score(y_test, binary_predictions)

print(f"Improved Binary Classification Accuracy: {binary_acc}")
print(f"Improved Binary Classification F1 Score: {binary_f1}")


Epoch 1/150


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 110ms/step - accuracy: 0.3688 - loss: 0.8667 - val_accuracy: 0.6667 - val_loss: 0.8589 - learning_rate: 3.0000e-04
Epoch 2/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4272 - loss: 0.8611 - val_accuracy: 0.6667 - val_loss: 0.8549 - learning_rate: 3.0000e-04
Epoch 3/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5977 - loss: 0.8576 - val_accuracy: 0.6667 - val_loss: 0.8508 - learning_rate: 3.0000e-04
Epoch 4/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6556 - loss: 0.8507 - val_accuracy: 0.6667 - val_loss: 0.8467 - learning_rate: 3.0000e-04
Epoch 5/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5876 - loss: 0.8473 - val_accuracy: 0.6667 - val_loss: 0.8424 - learning_rate: 3.0000e-04
Epoch 6/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

In [18]:
combined_data

Unnamed: 0,Subject NO.,Gender,Mean HR (BPM),AVNN (ms),SDNN (ms),NN50 (beats),pNN50 (%),RMSSD (ms),LF (ms2),LF Norm (n.u.),...,Alpha_(P 3 - P 4),Beta1_(Fp 1 - Fp 2),Beta1_(F 3 - F 4),Beta1_(T 3 - T 4),Beta1_(P 3 - P 4),Beta2_(Fp 1 - Fp 2),Beta2_(F 3 - F 4),Beta2_(T 3 - T 4),Beta2_(P 3 - P 4),Segment
0,1,Female,85.8474,698.9147,45.8957,46,10.7477,29.6913,412.1663,46.8523,...,-0.056984,-0.011507,-0.012932,-0.146292,-0.092129,0.043186,-0.027985,0.149890,-0.223297,EO
1,2,Female,88.3727,678.9429,23.8804,0,0.0000,11.6837,314.3801,87.1339,...,-0.122560,0.161338,0.057577,-0.289226,-0.217620,0.383177,0.005490,0.032459,-0.190202,EO
2,3,Female,79.4924,754.7887,50.0888,71,17.4877,37.8050,612.5444,45.8684,...,-0.046685,-0.019545,0.027443,-0.189588,-0.051164,-0.099272,0.204298,-0.104940,-0.129108,EO
3,4,Female,78.8327,761.1057,41.4575,27,6.4593,27.0164,446.1722,64.1144,...,0.123047,-0.082896,0.019006,0.084082,0.130780,-0.106925,-0.011477,0.081186,0.023819,EO
4,5,Female,63.3055,947.7851,40.0863,54,16.0714,35.2921,367.0269,49.2067,...,0.017300,-0.016011,-0.124211,-0.350270,-0.000317,0.022838,-0.058616,-0.242430,0.117196,EO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,36,Male,84.3914,710.9727,20.4221,0,0.0000,14.9740,254.1884,78.5750,...,0.156168,-0.003896,0.084955,0.133143,0.153955,0.011932,0.008967,-0.207500,0.055425,AC2
116,37,Male,94.1723,637.1301,20.0847,2,0.3854,14.0631,288.2414,70.0306,...,-0.064413,-0.049778,-0.059893,-0.438174,-0.144314,-0.037145,-0.016475,-0.399745,-0.129649,AC2
117,38,Female,92.8401,646.2725,24.3686,3,0.6024,16.4590,304.7617,62.9154,...,0.177345,0.114239,0.180824,0.150073,0.099037,0.148904,0.119568,0.079116,0.105819,AC2
118,39,Male,64.9858,923.2791,75.4837,221,66.1677,91.7459,1916.9640,37.0442,...,0.230870,0.205814,0.108824,0.273889,0.152374,0.344771,0.225022,0.275262,0.102648,AC2


# all genders

## eo vs ac1,ac2

In [26]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
from tensorflow.keras.regularizers import l2
from tensorflow.keras.constraints import max_norm


X = combined_data.drop(columns=['Segment', 'Subject NO.', 'Gender'])
y_binary = combined_data['Segment'].apply(lambda x: 0 if x == 'EO' else 1)

# Feature Scaling for RFE
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply Recursive Feature Elimination (RFE)
logistic_model = LogisticRegression(solver='lbfgs', max_iter=500)
rfe = RFE(logistic_model, n_features_to_select=15)  
rfe = rfe.fit(X_scaled, y_binary)

# Select the features chosen by RFE
X_rfe_selected = X_scaled[:, rfe.support_]


X_train_rfe, X_test_rfe, y_train, y_test = train_test_split(X_rfe_selected, y_binary, test_size=0.3, random_state=42)

# Reshape the selected data for LSTM input [samples, timesteps, features]
X_train_lstm_rfe = X_train_rfe.reshape((X_train_rfe.shape[0], 1, X_train_rfe.shape[1]))
X_test_lstm_rfe = X_test_rfe.reshape((X_test_rfe.shape[0], 1, X_test_rfe.shape[1]))

# Build an improved Hybrid LSTM + NN Model using the selected features
model = Sequential()

# Bidirectional LSTM Layer with max_norm constraint to prevent exploding gradients
model.add(Bidirectional(LSTM(128, return_sequences=True, kernel_constraint=max_norm(3)), input_shape=(X_train_lstm_rfe.shape[1], X_train_lstm_rfe.shape[2])))
model.add(Dropout(0.5))  # Increased Dropout rate to reduce overfitting

# Second LSTM Layer for added depth
model.add(LSTM(64, return_sequences=False, kernel_constraint=max_norm(3)))
model.add(Dropout(0.4))

# Fully Connected (Dense) Layers with L2 regularization and Dropout
model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.4))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.4))

# Output Layer (sigmoid for binary classification)
model.add(Dense(1, activation='sigmoid'))

# Compile the Model with Adam optimizer and a smaller learning rate
optimizer = Adam(learning_rate=0.0003, clipvalue=1.0)  # Gradient clipping added
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Learning rate scheduler and early stopping
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model for more epochs with smaller batch size and callbacks
model.fit(X_train_lstm_rfe, y_train, epochs=150, batch_size=16, verbose=1, validation_data=(X_test_lstm_rfe, y_test), 
          callbacks=[reduce_lr, early_stopping])

binary_predictions = model.predict(X_test_lstm_rfe)
binary_predictions = (binary_predictions > 0.5).astype(int)

binary_acc = accuracy_score(y_test, binary_predictions)
binary_f1 = f1_score(y_test, binary_predictions)

print(f"Improved Binary Classification Accuracy with RFE: {binary_acc}")
print(f"Improved Binary Classification F1 Score with RFE: {binary_f1}")


Epoch 1/150


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 110ms/step - accuracy: 0.3733 - loss: 0.8655 - val_accuracy: 0.6667 - val_loss: 0.8587 - learning_rate: 3.0000e-04
Epoch 2/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6178 - loss: 0.8556 - val_accuracy: 0.6389 - val_loss: 0.8537 - learning_rate: 3.0000e-04
Epoch 3/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6894 - loss: 0.8525 - val_accuracy: 0.6389 - val_loss: 0.8490 - learning_rate: 3.0000e-04
Epoch 4/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7000 - loss: 0.8454 - val_accuracy: 0.6667 - val_loss: 0.8445 - learning_rate: 3.0000e-04
Epoch 5/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6564 - loss: 0.8410 - val_accuracy: 0.6667 - val_loss: 0.8397 - learning_rate: 3.0000e-04
Epoch 6/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

In [27]:
selected_features = X.columns[rfe.support_]
print("Selected Features by RFE:")
print(selected_features)

Selected Features by RFE:
Index(['Mean HR (BPM)', 'pNN50 (%)', 'HF Norm (n.u.)', 'Fp1_Theta (4-8 Hz)',
       'Fp2_Theta (4-8 Hz)', 'F3_Alpha (8-12 Hz)', 'F4_Alpha (8-12 Hz)',
       'P4_Alpha (8-12 Hz)', 'F3_Beta 2 (20-30 Hz)', 'Fp1_Gamma (30-60 Hz)',
       'P4_Gamma (30-60 Hz)', 'Fp1_Gamma 2 (60-100 Hz)', 'Beta1_(Fp 1 - Fp 2)',
       'Beta1_(T 3 - T 4)', 'Beta1_(P 3 - P 4)'],
      dtype='object')


## The selected features from RFE are distributed across the files as follows:

# ECG Data: 3 features
# EEG Data: 9 features
# Ratio of Alpha/Beta Power Data: 3 features
# This shows that most of the important features selected by RFE come from the EEG data, with a few from ECG and ratio data.

## eo vs ac1
## eo vs ac2

In [32]:
def train_model_all_genders(segment_1, segment_2):
    # Filter out data for the two segments (EO vs AC1 or EO vs AC2)
    filtered_data = combined_data[combined_data['Segment'].isin([segment_1, segment_2])]
    
    # Create binary labels for classification (0 for EO, 1 for AC1 or AC2)
    y_binary = filtered_data['Segment'].apply(lambda x: 0 if x == segment_1 else 1)
    
    X = filtered_data.drop(columns=['Segment', 'Subject NO.', 'Gender'])
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Apply Recursive Feature Elimination (RFE)
    logistic_model = LogisticRegression(solver='lbfgs', max_iter=500)
    rfe = RFE(logistic_model, n_features_to_select=15)  # Adjust the number of features
    rfe = rfe.fit(X_scaled, y_binary)
    
    # Select the features chosen by RFE
    X_rfe_selected = X_scaled[:, rfe.support_]
    
    X_train_rfe, X_test_rfe, y_train, y_test = train_test_split(X_rfe_selected, y_binary, test_size=0.3, random_state=42)
    
    # Reshape the selected data for LSTM input [samples, timesteps, features]
    X_train_lstm_rfe = X_train_rfe.reshape((X_train_rfe.shape[0], 1, X_train_rfe.shape[1]))
    X_test_lstm_rfe = X_test_rfe.reshape((X_test_rfe.shape[0], 1, X_test_rfe.shape[1]))
    
    # Build and compile the model (similar to the previous example)
    model = Sequential()
    model.add(Bidirectional(LSTM(128, return_sequences=True, kernel_constraint=max_norm(3)), input_shape=(X_train_lstm_rfe.shape[1], X_train_lstm_rfe.shape[2])))
    model.add(Dropout(0.5))
    model.add(LSTM(64, return_sequences=False, kernel_constraint=max_norm(3)))
    model.add(Dropout(0.4))
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.4))
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.4))
    model.add(Dense(1, activation='sigmoid'))
    
    optimizer = Adam(learning_rate=0.0003, clipvalue=1.0)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    # Learning rate scheduler and early stopping
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    
    model.fit(X_train_lstm_rfe, y_train, epochs=150, batch_size=16, verbose=1, validation_data=(X_test_lstm_rfe, y_test),
                callbacks=[reduce_lr, early_stopping])
    
    binary_predictions = model.predict(X_test_lstm_rfe)
    binary_predictions = (binary_predictions > 0.5).astype(int)
    
    binary_acc = accuracy_score(y_test, binary_predictions)
    binary_f1 = f1_score(y_test, binary_predictions)
    
    print(f"All Genders - {segment_1} vs {segment_2} Accuracy: {binary_acc}, F1 Score: {binary_f1}")

train_model_all_genders('EO', 'AC1')

train_model_all_genders('EO', 'AC2')


Epoch 1/150


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 201ms/step - accuracy: 0.4702 - loss: 0.8622 - val_accuracy: 0.5000 - val_loss: 0.8630 - learning_rate: 3.0000e-04
Epoch 2/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.3997 - loss: 0.8622 - val_accuracy: 0.5417 - val_loss: 0.8606 - learning_rate: 3.0000e-04
Epoch 3/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.3890 - loss: 0.8618 - val_accuracy: 0.5417 - val_loss: 0.8582 - learning_rate: 3.0000e-04
Epoch 4/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5134 - loss: 0.8585 - val_accuracy: 0.6250 - val_loss: 0.8557 - learning_rate: 3.0000e-04
Epoch 5/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5289 - loss: 0.8567 - val_accuracy: 0.6250 - val_loss: 0.8535 - learning_rate: 3.0000e-04
Epoch 6/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 169ms/step - accuracy: 0.5452 - loss: 0.8639 - val_accuracy: 0.5000 - val_loss: 0.8626 - learning_rate: 3.0000e-04
Epoch 2/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.3911 - loss: 0.8635 - val_accuracy: 0.5000 - val_loss: 0.8600 - learning_rate: 3.0000e-04
Epoch 3/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5682 - loss: 0.8579 - val_accuracy: 0.5417 - val_loss: 0.8576 - learning_rate: 3.0000e-04
Epoch 4/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.4946 - loss: 0.8576 - val_accuracy: 0.5417 - val_loss: 0.8551 - learning_rate: 3.0000e-04
Epoch 5/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5577 - loss: 0.8538 - val_accuracy: 0.5417 - val_loss: 0.8527 - learning_rate: 3.0000e-04
Epoch 6/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

# For Males and Females

# EO VS AC1,AC2

In [28]:
def build_and_train_rfe_model_by_gender(gender_data, gender):
    print(f"\nTraining model with RFE for {gender}...\n")
    
    X = gender_data.drop(columns=['Segment', 'Subject NO.', 'Gender'])
    y_binary = gender_data['Segment'].apply(lambda x: 0 if x == 'EO' else 1)

    # Feature Scaling for RFE
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Apply Recursive Feature Elimination (RFE)
    logistic_model = LogisticRegression(solver='lbfgs', max_iter=500)
    rfe = RFE(logistic_model, n_features_to_select=15)  # Adjust the number of features
    rfe = rfe.fit(X_scaled, y_binary)

    # Select the features chosen by RFE
    X_rfe_selected = X_scaled[:, rfe.support_]

    X_train_rfe, X_test_rfe, y_train, y_test = train_test_split(X_rfe_selected, y_binary, test_size=0.3, random_state=42)

    # Reshape the selected data for LSTM input [samples, timesteps, features]
    X_train_lstm_rfe = X_train_rfe.reshape((X_train_rfe.shape[0], 1, X_train_rfe.shape[1]))
    X_test_lstm_rfe = X_test_rfe.reshape((X_test_rfe.shape[0], 1, X_test_rfe.shape[1]))

    # Build an improved Hybrid LSTM + NN Model using the selected features
    model = Sequential()

    # Bidirectional LSTM Layer with max_norm constraint to prevent exploding gradients
    model.add(Bidirectional(LSTM(128, return_sequences=True, kernel_constraint=max_norm(3)), input_shape=(X_train_lstm_rfe.shape[1], X_train_lstm_rfe.shape[2])))
    model.add(Dropout(0.5))  # Increased Dropout rate to reduce overfitting

    # Second LSTM Layer for added depth
    model.add(LSTM(64, return_sequences=False, kernel_constraint=max_norm(3)))
    model.add(Dropout(0.4))

    # Fully Connected (Dense) Layers with L2 regularization and Dropout
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.4))
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.4))

    # Output Layer (sigmoid for binary classification)
    model.add(Dense(1, activation='sigmoid'))

    # Compile the Model with Adam optimizer and a smaller learning rate
    optimizer = Adam(learning_rate=0.0003, clipvalue=1.0)  # Gradient clipping added
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    # Learning rate scheduler and early stopping
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    model.fit(X_train_lstm_rfe, y_train, epochs=150, batch_size=16, verbose=1, validation_data=(X_test_lstm_rfe, y_test), 
                callbacks=[reduce_lr, early_stopping])

    binary_predictions = model.predict(X_test_lstm_rfe)
    binary_predictions = (binary_predictions > 0.5).astype(int)

    binary_acc = accuracy_score(y_test, binary_predictions)
    binary_f1 = f1_score(y_test, binary_predictions)

    print(f"{gender} Model - Accuracy with RFE: {binary_acc}")
    print(f"{gender} Model - F1 Score with RFE: {binary_f1}")
    
    return binary_acc, binary_f1

male_data = combined_data[combined_data['Gender'] == 'Male']
female_data = combined_data[combined_data['Gender'] == 'Female']

male_acc_rfe, male_f1_rfe = build_and_train_rfe_model_by_gender(male_data, 'Male')
female_acc_rfe, female_f1_rfe = build_and_train_rfe_model_by_gender(female_data, 'Female')

# Print final results
print("\nFinal Results with RFE:")
print(f"Male Model - Accuracy: {male_acc_rfe}, F1 Score: {male_f1_rfe}")
print(f"Female Model - Accuracy: {female_acc_rfe}, F1 Score: {female_f1_rfe}")


Training model with RFE for Male...

Epoch 1/150


  super().__init__(**kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 259ms/step - accuracy: 0.6436 - loss: 0.8592 - val_accuracy: 0.5000 - val_loss: 0.8598 - learning_rate: 3.0000e-04
Epoch 2/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.7183 - loss: 0.8573 - val_accuracy: 0.5000 - val_loss: 0.8582 - learning_rate: 3.0000e-04
Epoch 3/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.7183 - loss: 0.8538 - val_accuracy: 0.5000 - val_loss: 0.8565 - learning_rate: 3.0000e-04
Epoch 4/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.7724 - loss: 0.8515 - val_accuracy: 0.5000 - val_loss: 0.8549 - learning_rate: 3.0000e-04
Epoch 5/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.7674 - loss: 0.8462 - val_accuracy: 0.5000 - val_loss: 0.8533 - learning_rate: 3.0000e-04
Epoch 6/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

  super().__init__(**kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 279ms/step - accuracy: 0.3494 - loss: 0.8666 - val_accuracy: 0.5789 - val_loss: 0.8603 - learning_rate: 3.0000e-04
Epoch 2/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.7195 - loss: 0.8570 - val_accuracy: 0.5789 - val_loss: 0.8579 - learning_rate: 3.0000e-04
Epoch 3/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.6307 - loss: 0.8545 - val_accuracy: 0.5789 - val_loss: 0.8556 - learning_rate: 3.0000e-04
Epoch 4/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.6925 - loss: 0.8549 - val_accuracy: 0.5789 - val_loss: 0.8533 - learning_rate: 3.0000e-04
Epoch 5/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.7152 - loss: 0.8485 - val_accuracy: 0.5789 - val_loss: 0.8511 - learning_rate: 3.0000e-04
Epoch 6/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

# EO VS AC1
# EO VS AC2

In [33]:
def train_model_by_gender(segment_1, segment_2, gender):
    # Filter out data for the two segments and for a specific gender
    filtered_data = combined_data[(combined_data['Segment'].isin([segment_1, segment_2])) & (combined_data['Gender'] == gender)]
    
    # Create binary labels for classification (0 for EO, 1 for AC1 or AC2)
    y_binary = filtered_data['Segment'].apply(lambda x: 0 if x == segment_1 else 1)
    
    X = filtered_data.drop(columns=['Segment', 'Subject NO.', 'Gender'])
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Apply Recursive Feature Elimination (RFE)
    logistic_model = LogisticRegression(solver='lbfgs', max_iter=500)
    rfe = RFE(logistic_model, n_features_to_select=15)  # Adjust the number of features
    rfe = rfe.fit(X_scaled, y_binary)
    
    # Select the features chosen by RFE
    X_rfe_selected = X_scaled[:, rfe.support_]
    
    # Split the data into training and testing sets using the selected features
    X_train_rfe, X_test_rfe, y_train, y_test = train_test_split(X_rfe_selected, y_binary, test_size=0.3, random_state=42)
    
    # Reshape the selected data for LSTM input [samples, timesteps, features]
    X_train_lstm_rfe = X_train_rfe.reshape((X_train_rfe.shape[0], 1, X_train_rfe.shape[1]))
    X_test_lstm_rfe = X_test_rfe.reshape((X_test_rfe.shape[0], 1, X_test_rfe.shape[1]))
    
    # Build and compile the model (similar to the previous example)
    model = Sequential()
    model.add(Bidirectional(LSTM(128, return_sequences=True, kernel_constraint=max_norm(3)), input_shape=(X_train_lstm_rfe.shape[1], X_train_lstm_rfe.shape[2])))
    model.add(Dropout(0.5))
    model.add(LSTM(64, return_sequences=False, kernel_constraint=max_norm(3)))
    model.add(Dropout(0.4))
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.4))
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.4))
    model.add(Dense(1, activation='sigmoid'))
    
    optimizer = Adam(learning_rate=0.0003, clipvalue=1.0)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    # Learning rate scheduler and early stopping
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    
    model.fit(X_train_lstm_rfe, y_train, epochs=150, batch_size=16, verbose=1, validation_data=(X_test_lstm_rfe, y_test),
                callbacks=[reduce_lr, early_stopping])
    
    binary_predictions = model.predict(X_test_lstm_rfe)
    binary_predictions = (binary_predictions > 0.5).astype(int)
    
    binary_acc = accuracy_score(y_test, binary_predictions)
    binary_f1 = f1_score(y_test, binary_predictions)
    
    print(f"{gender.capitalize()} - {segment_1} vs {segment_2} Accuracy: {binary_acc}, F1 Score: {binary_f1}")

train_model_by_gender('EO', 'AC1', 'Male')

train_model_by_gender('EO', 'AC2', 'Male')

train_model_by_gender('EO', 'AC1', 'Female')

train_model_by_gender('EO', 'AC2', 'Female')

Epoch 1/150


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 535ms/step - accuracy: 0.5721 - loss: 0.8640 - val_accuracy: 0.4167 - val_loss: 0.8612 - learning_rate: 3.0000e-04
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.5721 - loss: 0.8589 - val_accuracy: 0.5000 - val_loss: 0.8602 - learning_rate: 3.0000e-04
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.4744 - loss: 0.8607 - val_accuracy: 0.5000 - val_loss: 0.8592 - learning_rate: 3.0000e-04
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.5000 - loss: 0.8595 - val_accuracy: 0.5000 - val_loss: 0.8582 - learning_rate: 3.0000e-04
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.6907 - loss: 0.8539 - val_accuracy: 0.5000 - val_loss: 0.8572 - learning_rate: 3.0000e-04
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 515ms/step - accuracy: 0.2163 - loss: 0.8705 - val_accuracy: 0.5833 - val_loss: 0.8630 - learning_rate: 3.0000e-04
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.4792 - loss: 0.8642 - val_accuracy: 0.5833 - val_loss: 0.8617 - learning_rate: 3.0000e-04
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.4535 - loss: 0.8616 - val_accuracy: 0.5000 - val_loss: 0.8603 - learning_rate: 3.0000e-04
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.6394 - loss: 0.8601 - val_accuracy: 0.5000 - val_loss: 0.8590 - learning_rate: 3.0000e-04
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.4119 - loss: 0.8615 - val_accuracy: 0.5833 - val_loss: 0.8577 - learning_rate: 3.0000e-04
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 519ms/step - accuracy: 0.2672 - loss: 0.8688 - val_accuracy: 0.5385 - val_loss: 0.8616 - learning_rate: 3.0000e-04
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.4260 - loss: 0.8635 - val_accuracy: 0.5385 - val_loss: 0.8603 - learning_rate: 3.0000e-04
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.4009 - loss: 0.8600 - val_accuracy: 0.5385 - val_loss: 0.8590 - learning_rate: 3.0000e-04
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5136 - loss: 0.8563 - val_accuracy: 0.5385 - val_loss: 0.8577 - learning_rate: 3.0000e-04
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.5761 - loss: 0.8540 - val_accuracy: 0.5385 - val_loss: 0.8564 - learning_rate: 3.0000e-04
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 505ms/step - accuracy: 0.4864 - loss: 0.8630 - val_accuracy: 0.4615 - val_loss: 0.8627 - learning_rate: 3.0000e-04
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.4907 - loss: 0.8617 - val_accuracy: 0.4615 - val_loss: 0.8611 - learning_rate: 3.0000e-04
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5115 - loss: 0.8632 - val_accuracy: 0.4615 - val_loss: 0.8595 - learning_rate: 3.0000e-04
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.5136 - loss: 0.8585 - val_accuracy: 0.4615 - val_loss: 0.8580 - learning_rate: 3.0000e-04
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.6200 - loss: 0.8567 - val_accuracy: 0.4615 - val_loss: 0.8565 - learning_rate: 3.0000e-04
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[