In [None]:
from random import randrange, uniform
from sklearn.neighbors import NearestNeighbors
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, recall_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [None]:
df=pd.read_csv("/content/drive/MyDrive/WESAD_pkl/ECG/merged_file_binary_1stress_0nostress.csv")
df['Label'].value_counts()

Unnamed: 0_level_0,count
Label,Unnamed: 1_level_1
0,896
1,251


In [None]:
pip install imbalanced-learn



**SMOTE data generation**


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.combine import SMOTEENN
from collections import Counter
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

INPUT_FILE = "/content/drive/MyDrive/WESAD_pkl/ECG/merged_file_binary_1stress_0nostress.csv"

ORIGINAL_TRAIN_OUTPUT = "wesad_80_original.csv"
TEST_SET_OUTPUT = "wesad_20_orignal.csv"
SMOTE_TRAIN_OUTPUT = "smote_5k.csv"

TARGET_COUNT_PER_CLASS = 2500


df = pd.read_csv(INPUT_FILE)

X = df.drop('Label', axis=1)
y = df['Label']


X_original_train, X_test, y_original_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("--- Data Splitting Complete ---")
print(f"Test Set Size (20%): {len(X_test)}")
print(f"SMOTE-ENN Train Pool Size (80%): {len(X_original_train)}")
print(f"Original Label Distribution in Train Pool: {Counter(y_original_train)}")
print(f"Original Label Distribution in Test Set: {Counter(y_test)}")

df_test = pd.concat([X_test, y_test], axis=1)
df_test.to_csv(TEST_SET_OUTPUT, index=False)
print(f"\nSaved untouched 20% Test Set to '{TEST_SET_OUTPUT}'")


df_original_train = pd.concat([X_original_train, y_original_train], axis=1)
df_original_train.to_csv(ORIGINAL_TRAIN_OUTPUT, index=False)
print(f"Saved original 80% Train Pool to '{ORIGINAL_TRAIN_OUTPUT}'")



print("\n--- Applying SMOTEENN (Oversampling + Cleaning) to Training Data ---")


sampling_strategy = {0: TARGET_COUNT_PER_CLASS, 1: TARGET_COUNT_PER_CLASS}


smote_enn = SMOTEENN(sampling_strategy=sampling_strategy, random_state=42)


X_resampled, y_resampled = smote_enn.fit_resample(X_original_train, y_original_train)

print(f"SMOTEENN Resampled Training Set Size: {len(X_resampled)}")
print(f"SMOTEENN Resampled Label Distribution: {Counter(y_resampled)}")

X_resampled_df = pd.DataFrame(X_resampled, columns=X.columns)
y_resampled_df = pd.Series(y_resampled, name='Label')

df_smote_train = pd.concat([X_resampled_df, y_resampled_df], axis=1)
df_smote_train.to_csv(SMOTE_TRAIN_OUTPUT, index=False)

print(f"\nSaved SMOTE-ENN Augmented Training Set to '{SMOTE_TRAIN_OUTPUT}'")


--- Data Splitting Complete ---
Test Set Size (20%): 288
SMOTE-ENN Train Pool Size (80%): 1148
Original Label Distribution in Train Pool: Counter({0: 894, 1: 254})
Original Label Distribution in Test Set: Counter({0: 224, 1: 64})

Saved untouched 20% Test Set to 'wesad_20_orignal.csv'
Saved original 80% Train Pool to 'wesad_80_original.csv'

--- Applying SMOTEENN (Oversampling + Cleaning) to Training Data ---
SMOTEENN Resampled Training Set Size: 4820
SMOTEENN Resampled Label Distribution: Counter({1: 2467, 0: 2353})

Saved SMOTE-ENN Augmented Training Set to 'smote_5k.csv'


**SMOTE neural network**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping


FILE_NAME = "smote_5k.csv"
BATCH_SIZE = 64
EPOCHS = 30
INPUT_FEATURES = 19
L2_REGULARIZATION = 0.001


df = pd.read_csv(FILE_NAME)
print(f"Loaded balanced data of size: {len(df)}")


X = df.drop('Label', axis=1).values
y = df['Label'].values


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Reshape from (N, 19) to (N, 1, 19) for 1D CNN input
X_reshaped = X_scaled.reshape(X_scaled.shape[0], 1, X_scaled.shape[1])


y_categorical = to_categorical(y, num_classes=2)


X_train, X_val, y_train, y_val = train_test_split(
    X_reshaped, y_categorical, test_size=0.2, random_state=42, stratify=y
)

print(f"Training shape: {X_train.shape}, Validation shape: {X_val.shape}")


model = Sequential([

    Conv1D(filters=32, kernel_size=1, activation='relu',
           input_shape=(X_train.shape[1], X_train.shape[2]),
           kernel_regularizer=l2(L2_REGULARIZATION)),
    Dropout(0.5),


    Conv1D(filters=64, kernel_size=1, activation='relu',
           kernel_regularizer=l2(L2_REGULARIZATION)),
    Dropout(0.5),


    Flatten(),


    Dense(100, activation='relu', kernel_regularizer=l2(L2_REGULARIZATION)),
    Dropout(0.5),


    Dense(2, activation='softmax')
])


model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)



print("\n--- Model Summary ---")
model.summary()


print("\n--- Training Model ---")
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val, y_val),

    verbose=1
)


loss, accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"\n--- SMOTE-ENN Validation Results (Internal) ---")
print(f"Validation Loss: {loss:.4f}")
print(f"Validation Accuracy: {accuracy:.4f}")

model.save('wesad_1d_cnn_model_regularized.h5')
print("\nTrained model saved as 'wesad_1d_cnn_model_regularized.h5'")


Loaded balanced data of size: 4820
Training shape: (3856, 1, 19), Validation shape: (964, 1, 19)

--- Model Summary ---


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



--- Training Model ---
Epoch 1/30
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.6024 - loss: 0.8053 - val_accuracy: 0.8205 - val_loss: 0.5439
Epoch 2/30
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7874 - loss: 0.5905 - val_accuracy: 0.8299 - val_loss: 0.4567
Epoch 3/30
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8157 - loss: 0.5352 - val_accuracy: 0.8620 - val_loss: 0.4173
Epoch 4/30
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8469 - loss: 0.4859 - val_accuracy: 0.8641 - val_loss: 0.4032
Epoch 5/30
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8530 - loss: 0.4644 - val_accuracy: 0.8817 - val_loss: 0.3883
Epoch 6/30
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8613 - loss: 0.4320 - val_accuracy: 0.8849 - val_loss: 0.3703
Epoch 7/30
[1m




--- SMOTE-ENN Validation Results (Internal) ---
Validation Loss: 0.2683
Validation Accuracy: 0.9025

Trained model saved as 'wesad_1d_cnn_model_regularized.h5'


**testing neural network on original dataset**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import f1_score, classification_report


TEST_FILE = "wesad_20_orignal.csv"

MODEL_FILE = 'wesad_1d_cnn_model_regularized.h5'
INPUT_FEATURES = 19


df_test = pd.read_csv(TEST_FILE)

X_test = df_test.drop('Label', axis=1).values
y_test = df_test['Label'].values

scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)


X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

y_test_categorical = to_categorical(y_test, num_classes=2)

print(f"Final Test Data Loaded: {X_test_reshaped.shape}")


model = load_model(MODEL_FILE)


print("\n--- Evaluating Model on UNTOUCHED 20% Test Set ---")
loss, accuracy = model.evaluate(X_test_reshaped, y_test_categorical, verbose=1)

print(f"\nFINAL UNTOUCHED TEST RESULTS (REGULARIZED MODEL):")
print(f"   Test Loss: {loss:.4f}")
print(f"   Test Accuracy: {accuracy:.4f}")


y_pred_probs = model.predict(X_test_reshaped)

y_pred_classes = np.argmax(y_pred_probs, axis=1)

y_true_classes = np.argmax(y_test_categorical, axis=1)

#classification report
print("\nCLASSIFICATION REPORT (Stress vs. No-Stress):")
print(classification_report(y_true_classes, y_pred_classes, target_names=['No-Stress (0)', 'Stress (1)']))

#weighted F1 score
weighted_f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')
print(f"   Weighted F1 Score: {weighted_f1:.4f}")




Final Test Data Loaded: (288, 1, 19)

--- Evaluating Model on UNTOUCHED 20% Test Set ---
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7441 - loss: 0.6999  





FINAL UNTOUCHED TEST RESULTS (REGULARIZED MODEL):
   Test Loss: 0.7364
   Test Accuracy: 0.7257
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 

CLASSIFICATION REPORT (Stress vs. No-Stress):
               precision    recall  f1-score   support

No-Stress (0)       0.97      0.67      0.79       224
   Stress (1)       0.44      0.94      0.60        64

     accuracy                           0.73       288
    macro avg       0.71      0.80      0.70       288
 weighted avg       0.86      0.73      0.75       288

   Weighted F1 Score: 0.7488


**Neural Network on Original Dataset**

In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, f1_score

#Configuration
FILE_NAME = "/content/drive/MyDrive/WESAD_pkl/ECG/merged_file_binary_1stress_0nostress.csv"
TEST_SET_OUTPUT = "wesad_baseline_test_set_20_percent.csv"
MODEL_FILE_NAME = 'wesad_1d_cnn_model_baseline.h5'
BATCH_SIZE = 64
EPOCHS = 30
INPUT_FEATURES = 19


df = pd.read_csv(FILE_NAME)
print(f"Loaded original data of size: {len(df)}")


X = df.drop('Label', axis=1).values
y = df['Label'].values


X_train_original, X_test_original, y_train_original, y_test_original = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("\n--- Splitting Complete ---")
print(f"Training Set Size (80%): {len(X_train_original)}")
print(f"Test Set Size (20%): {len(X_test_original)}")


df_test = pd.DataFrame(X_test_original, columns=df.drop('Label', axis=1).columns)
df_test['Label'] = y_test_original
df_test.to_csv(TEST_SET_OUTPUT, index=False)
print(f"Saved untouched 20% Test Set to '{TEST_SET_OUTPUT}'")



scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train_original)

X_test_scaled = scaler.transform(X_test_original)



X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])


y_train_categorical = to_categorical(y_train_original, num_classes=2)
y_test_categorical = to_categorical(y_test_original, num_classes=2)

print(f"\nFinal Training shape: {X_train_reshaped.shape}, Final Test shape: {X_test_reshaped.shape}")


model = Sequential([
    Conv1D(filters=32, kernel_size=1, activation='relu',
           input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])),
    Dropout(0.3),

    Conv1D(filters=64, kernel_size=1, activation='relu'),
    Dropout(0.3),

    Flatten(),

    Dense(100, activation='relu'),
    Dropout(0.5),


    Dense(2, activation='softmax')
])


model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("\n--- Model Summary ---")
model.summary()


print("\n--- Training Model ---")
history = model.fit(
    X_train_reshaped, y_train_categorical,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,

    validation_data=(X_test_reshaped, y_test_categorical),
    verbose=1
)

print(f"\n--- Final Model Evaluation (Baseline) ---")


loss, accuracy = model.evaluate(X_test_reshaped, y_test_categorical, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


y_pred_probs = model.predict(X_test_reshaped)
y_pred_classes = np.argmax(y_pred_probs, axis=1)
y_true_classes = np.argmax(y_test_categorical, axis=1)

print("\nCLASSIFICATION REPORT (Baseline - Unbalanced Data):")
print(classification_report(y_true_classes, y_pred_classes, target_names=['No-Stress (0)', 'Stress (1)']))



model.save(MODEL_FILE_NAME)
print(f"\nTrained baseline model saved as '{MODEL_FILE_NAME}'")

Loaded original data of size: 1436

--- Splitting Complete ---
Training Set Size (80%): 1148
Test Set Size (20%): 288
Saved untouched 20% Test Set to 'wesad_baseline_test_set_20_percent.csv'

Final Training shape: (1148, 1, 19), Final Test shape: (288, 1, 19)

--- Model Summary ---


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



--- Training Model ---
Epoch 1/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - accuracy: 0.4371 - loss: 0.8872 - val_accuracy: 0.7778 - val_loss: 0.5351
Epoch 2/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7778 - loss: 0.5217 - val_accuracy: 0.7778 - val_loss: 0.4533
Epoch 3/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7998 - loss: 0.4220 - val_accuracy: 0.8542 - val_loss: 0.3918
Epoch 4/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8125 - loss: 0.4018 - val_accuracy: 0.8403 - val_loss: 0.3464
Epoch 5/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8369 - loss: 0.3748 - val_accuracy: 0.8576 - val_loss: 0.3184
Epoch 6/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8577 - loss: 0.3176 - val_accuracy: 0.8576 - val_loss: 0.3036
Epoch 7/30
[1m




CLASSIFICATION REPORT (Baseline - Unbalanced Data):
               precision    recall  f1-score   support

No-Stress (0)       0.92      0.96      0.94       224
   Stress (1)       0.83      0.70      0.76        64

     accuracy                           0.90       288
    macro avg       0.88      0.83      0.85       288
 weighted avg       0.90      0.90      0.90       288


Trained baseline model saved as 'wesad_1d_cnn_model_baseline.h5'
