<a href="https://colab.research.google.com/github/DMadhumita2904/Multimodal-Deep-Learning-System-for-Early-Alzheimer-s-Detection/blob/main/AlzheimersDetection_Speech.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

# LOAD YOUR UPLOADED DATASET
df = pd.read_csv("/content/addetector_dataset (1).csv")

print("Dataset shape:", df.shape)
print(df.head())
print(df.columns)

Dataset shape: (1010, 66)
   duration_sec  chunk_count    mfcc_1    mfcc_2    mfcc_3    mfcc_4  \
0     10.043262         35.0  0.170832  1.260207  0.293262 -1.183015   
1     22.114289         19.0  0.454972  0.455061  0.760773  1.664440   
2      7.196703         13.0  0.217214 -0.346633 -1.924812 -0.078618   
3      8.470619         27.0  0.955154  2.110354 -1.168544  1.280734   
4      5.067772         23.0  0.079000  1.079544  1.425937  1.308224   

     mfcc_5    mfcc_6    mfcc_7    mfcc_8  ...  linguistic_feat_42  \
0 -0.521902  0.564814  0.710332  0.855109  ...           -0.071778   
1  1.561906 -0.594606 -2.097895 -0.678529  ...           -0.328909   
2  3.254647 -1.244458 -1.814873  1.936389  ...            0.196236   
3  0.381839  0.163061 -0.007729 -1.384647  ...           -0.346869   
4 -0.528279  1.014105  0.211641  1.725908  ...           -0.010732   

   linguistic_feat_43  linguistic_feat_44  linguistic_feat_45  \
0           -0.404672            0.450733            0.

In [3]:
# ===== TARGET COLUMN =====
target_col = "label"   # change if needed

# Convert labels to numeric
df[target_col] = df[target_col].astype(str)
df[target_col] = df[target_col].replace({
    "Control": 0,
    "Healthy": 0,
    "ProbableAD": 1,
    "PossibleAD": 1,
    "Dementia": 1
})

# Drop non-numeric columns
X = df.select_dtypes(include=["number"]).drop(columns=[target_col], errors="ignore")
y = df[target_col]

print("Features:", X.shape)
print("Classes:", y.value_counts())

Features: (1010, 65)
Classes: label
0    560
1    450
Name: count, dtype: int64


In [36]:
df[target_col] = pd.to_numeric(df[target_col], errors="coerce")
df = df.dropna(subset=[target_col])
df[target_col] = df[target_col].astype(int)

In [37]:
non_features = [
    "File","Language","Corpus","Code","Group",
    "Sex","Race","Role","Custom_field"
]

df = df.drop(columns=non_features, errors="ignore")

In [38]:
from sklearn.impute import SimpleImputer

X = df.select_dtypes(include=["number"]).drop(columns=[target_col])
y = df[target_col]

imputer = SimpleImputer(strategy="median")
X = imputer.fit_transform(X)



In [39]:
print(y.value_counts())

label
0    560
1    450
Name: count, dtype: int64


In [17]:
from imblearn.over_sampling import SMOTE
X, y = SMOTE(random_state=42).fit_resample(X, y)

In [18]:
print(y.value_counts())

label
1    560
0    560
Name: count, dtype: int64


In [19]:
from sklearn.feature_selection import SelectKBest, f_classif

selector = SelectKBest(score_func=f_classif, k=30)
X = selector.fit_transform(X, y)

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [21]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

In [22]:
from sklearn.svm import SVC

svm = SVC(kernel="rbf", probability=True)
svm.fit(X_train, y_train)

In [23]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)

In [24]:
from sklearn.metrics import accuracy_score, classification_report

models = {
    "Logistic Regression": lr,
    "SVM": svm,
    "Random Forest": rf
}

for name, model in models.items():
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    print("\n==============================")
    print(name)
    print("Accuracy:", round(acc, 4))
    print(classification_report(y_test, y_pred))


Logistic Regression
Accuracy: 0.5804
              precision    recall  f1-score   support

           0       0.59      0.51      0.55       112
           1       0.57      0.65      0.61       112

    accuracy                           0.58       224
   macro avg       0.58      0.58      0.58       224
weighted avg       0.58      0.58      0.58       224


SVM
Accuracy: 0.6027
              precision    recall  f1-score   support

           0       0.60      0.62      0.61       112
           1       0.61      0.58      0.59       112

    accuracy                           0.60       224
   macro avg       0.60      0.60      0.60       224
weighted avg       0.60      0.60      0.60       224


Random Forest
Accuracy: 0.5938
              precision    recall  f1-score   support

           0       0.59      0.61      0.60       112
           1       0.60      0.58      0.59       112

    accuracy                           0.59       224
   macro avg       0.59      0.59   

In [25]:
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(
    n_estimators=400,
    learning_rate=0.03,
    max_depth=3,
    subsample=0.9,
    random_state=42
)

gb.fit(X_train, y_train)

y_pred = gb.predict(X_test)
print("Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Gradient Boosting Accuracy: 0.5357142857142857
              precision    recall  f1-score   support

           0       0.54      0.46      0.50       112
           1       0.53      0.62      0.57       112

    accuracy                           0.54       224
   macro avg       0.54      0.54      0.53       224
weighted avg       0.54      0.54      0.53       224



In [26]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "n_estimators": [200, 400, 600],
    "learning_rate": [0.01, 0.03, 0.05],
    "max_depth": [2, 3, 4]
}

grid = GridSearchCV(
    GradientBoostingClassifier(random_state=42),
    param_grid,
    cv=5,
    scoring="accuracy",
    n_jobs=-1
)

grid.fit(X_train, y_train)

best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)

print("BEST MODEL:", grid.best_params_)
print("Tuned Accuracy:", accuracy_score(y_test, y_pred))

BEST MODEL: {'learning_rate': 0.03, 'max_depth': 2, 'n_estimators': 600}
Tuned Accuracy: 0.5446428571428571


In [27]:
from sklearn.ensemble import VotingClassifier

ensemble = VotingClassifier(
    estimators=[
        ("lr", lr),
        ("svm", svm),
        ("rf", rf),
        ("gb", best_model)
    ],
    voting="soft"
)

ensemble.fit(X_train, y_train)

y_pred = ensemble.predict(X_test)

print("Ensemble Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Ensemble Accuracy: 0.5625
              precision    recall  f1-score   support

           0       0.57      0.52      0.54       112
           1       0.56      0.61      0.58       112

    accuracy                           0.56       224
   macro avg       0.56      0.56      0.56       224
weighted avg       0.56      0.56      0.56       224



In [28]:
from sklearn.feature_selection import RFECV
from sklearn.ensemble import RandomForestClassifier

selector = RFECV(
    RandomForestClassifier(n_estimators=200),
    step=1,
    cv=5,
    scoring="accuracy"
)

X = selector.fit_transform(X, y)
print("Optimal features:", X.shape[1])

Optimal features: 30


In [29]:
print("Dataset shape:", df.shape)

Dataset shape: (1010, 67)


In [41]:
import numpy as np

noise = np.random.normal(0, 0.01, X.shape)
X_aug = np.vstack([X, X + noise])
y_aug = np.hstack([y, y])

print("Augmented size:", X_aug.shape)

Augmented size: (2020, 65)


In [42]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_aug, y_aug,
    test_size=0.2,
    random_state=42,
    stratify=y_aug
)

In [43]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [44]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

lr = LogisticRegression(max_iter=2000)
svm = SVC(kernel="rbf", probability=True)
rf = RandomForestClassifier(n_estimators=300, random_state=42)

lr.fit(X_train, y_train)
svm.fit(X_train, y_train)
rf.fit(X_train, y_train)

In [45]:
from sklearn.metrics import accuracy_score, classification_report

models = {
    "Logistic Regression": lr,
    "SVM": svm,
    "Random Forest": rf
}

for name, model in models.items():
    y_pred = model.predict(X_test)
    print("\n====================")
    print(name)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))


Logistic Regression
Accuracy: 0.5717821782178217
              precision    recall  f1-score   support

           0       0.60      0.68      0.64       224
           1       0.52      0.44      0.48       180

    accuracy                           0.57       404
   macro avg       0.56      0.56      0.56       404
weighted avg       0.57      0.57      0.57       404


SVM
Accuracy: 0.8316831683168316
              precision    recall  f1-score   support

           0       0.83      0.88      0.85       224
           1       0.84      0.77      0.80       180

    accuracy                           0.83       404
   macro avg       0.83      0.83      0.83       404
weighted avg       0.83      0.83      0.83       404


Random Forest
Accuracy: 0.8811881188118812
              precision    recall  f1-score   support

           0       0.87      0.92      0.90       224
           1       0.89      0.83      0.86       180

    accuracy                           0.88       404


**DEEP LEARNING**

In [46]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_aug, y_aug,
    test_size=0.2,
    random_state=42,
    stratify=y_aug
)

In [54]:
from sklearn.feature_selection import SelectKBest, f_classif
selector = SelectKBest(f_classif, k=40)
X_aug = selector.fit_transform(X_aug, y_aug)

In [55]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

weights = compute_class_weight("balanced", classes=np.unique(y_train), y=y_train)
class_weights = {0: weights[0], 1: weights[1]}

model.fit(X_train, y_train, validation_split=0.2,
          epochs=100, batch_size=32,
          class_weight=class_weights,
          callbacks=[early_stop])

Epoch 1/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.8496 - loss: 0.3559 - val_accuracy: 0.7407 - val_loss: 0.5909
Epoch 2/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8544 - loss: 0.3485 - val_accuracy: 0.7315 - val_loss: 0.5976
Epoch 3/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8628 - loss: 0.3399 - val_accuracy: 0.7377 - val_loss: 0.6035
Epoch 4/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8756 - loss: 0.2868 - val_accuracy: 0.7284 - val_loss: 0.6267
Epoch 5/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - accuracy: 0.8858 - loss: 0.2852 - val_accuracy: 0.7407 - val_loss: 0.6242
Epoch 6/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8932 - loss: 0.2910 - val_accuracy: 0.7562 - val_loss: 0.6124
Epoch 7/100
[1m41/41[0m [32m━

<keras.src.callbacks.history.History at 0x7bd574712cf0>

In [56]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [57]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

model = Sequential([
    Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(64, activation="relu"),
    BatchNormalization(),
    Dropout(0.3),

    Dense(32, activation="relu"),
    Dropout(0.2),

    Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [58]:
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.4810 - loss: 0.8919 - val_accuracy: 0.5617 - val_loss: 0.6837
Epoch 2/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5492 - loss: 0.7746 - val_accuracy: 0.5463 - val_loss: 0.6749
Epoch 3/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6285 - loss: 0.6927 - val_accuracy: 0.5895 - val_loss: 0.6659
Epoch 4/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6343 - loss: 0.6660 - val_accuracy: 0.6080 - val_loss: 0.6475
Epoch 5/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6324 - loss: 0.6324 - val_accuracy: 0.6204 - val_loss: 0.6424
Epoch 6/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6462 - loss: 0.6107 - val_accuracy: 0.6512 - val_loss: 0.6281
Epoch 7/100
[1m41/41[0m [32m━━

In [59]:
loss, accuracy = model.evaluate(X_test, y_test)
print("Deep Learning Accuracy:", accuracy)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7390 - loss: 0.5692 
Deep Learning Accuracy: 0.7425742745399475


**DEEP LEARNING 2**


In [60]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_aug, y_aug,
    test_size=0.2,
    random_state=42,
    stratify=y_aug
)

In [67]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

classes = np.unique(y_train)
weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weights = {0: weights[0], 1: weights[1]}

In [61]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [62]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

In [70]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Dense(64, activation='relu'),

    tf.keras.layers.Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [74]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC()]
)

In [75]:
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=150,
    batch_size=16,
    callbacks=[early_stop],
    class_weight=class_weights,
    verbose=1
)

Epoch 1/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.9795 - auc: 0.9980 - loss: 0.1000 - val_accuracy: 0.8025 - val_auc: 0.8956 - val_loss: 0.4409
Epoch 2/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9962 - auc: 0.9998 - loss: 0.0581 - val_accuracy: 0.8056 - val_auc: 0.9026 - val_loss: 0.4649
Epoch 3/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9960 - auc: 0.9999 - loss: 0.0477 - val_accuracy: 0.8025 - val_auc: 0.9055 - val_loss: 0.4727
Epoch 4/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9910 - auc: 0.9994 - loss: 0.0481 - val_accuracy: 0.7994 - val_auc: 0.8947 - val_loss: 0.5233
Epoch 5/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9911 - auc: 0.9996 - loss: 0.0432 - val_accuracy: 0.7901 - val_auc: 0.8970 - val_loss: 0.5399
Epoch 6/150
[1m81/81[0m [32m━━━━

In [78]:
loss, acc, auc = model.evaluate(X_test, y_test, verbose=0)
print("Deep Learning Accuracy:", round(acc * 100, 2), "%")
print("Deep Learning AUC:", round(auc * 100, 2), "%")

Deep Learning Accuracy: 78.96 %
Deep Learning AUC: 88.14 %


In [79]:
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int).ravel()

print("\n======================")
print("Deep Learning Model")
print("Accuracy:", round(accuracy_score(y_test, y_pred) * 100, 2), "%")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step

Deep Learning Model
Accuracy: 78.96 %

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.80      0.81       224
           1       0.76      0.78      0.77       180

    accuracy                           0.79       404
   macro avg       0.79      0.79      0.79       404
weighted avg       0.79      0.79      0.79       404

Confusion Matrix:
[[179  45]
 [ 40 140]]


**just testing**

In [80]:
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier

selector_model = RandomForestClassifier(n_estimators=300)
selector_model.fit(X, y)

selector = SelectFromModel(selector_model, threshold="median")
X_selected = selector.fit_transform(X, y)

In [81]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

weights = compute_class_weight("balanced", classes=np.unique(y_train), y=y_train)
class_weights = {0: weights[0], 1: weights[1]}

model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    class_weight=class_weights
)

Epoch 1/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.9701 - auc: 0.9888 - loss: 0.1172 - val_accuracy: 0.8589 - val_auc: 0.9178 - val_loss: 0.3821
Epoch 2/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.9884 - auc: 0.9996 - loss: 0.0583 - val_accuracy: 0.8787 - val_auc: 0.9319 - val_loss: 0.3503
Epoch 3/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9938 - auc: 0.9998 - loss: 0.0445 - val_accuracy: 0.8639 - val_auc: 0.9345 - val_loss: 0.3635
Epoch 4/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.9998 - auc: 1.0000 - loss: 0.0276 - val_accuracy: 0.8688 - val_auc: 0.9452 - val_loss: 0.3560
Epoch 5/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.9985 - auc: 1.0000 - loss: 0.0244 - val_accuracy: 0.8713 - val_auc: 0.9381 - val_loss: 0.3890
Epoch 6/100
[1m51/51[0m [32m━

<keras.src.callbacks.history.History at 0x7bd563dc6f30>

In [82]:
import tensorflow as tf
from tensorflow.keras import layers

model = tf.keras.Sequential([
    layers.Dense(256, activation="relu", input_shape=(X_train.shape[1],)),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(128, activation="relu"),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(64, activation="relu"),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    batch_size=32,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(patience=5)
    ]
)

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 37ms/step - accuracy: 0.5081 - loss: 0.9381 - val_accuracy: 0.5520 - val_loss: 0.6846 - learning_rate: 5.0000e-04
Epoch 2/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.5706 - loss: 0.7359 - val_accuracy: 0.5594 - val_loss: 0.6813 - learning_rate: 5.0000e-04
Epoch 3/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.6324 - loss: 0.6992 - val_accuracy: 0.5941 - val_loss: 0.6694 - learning_rate: 5.0000e-04
Epoch 4/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.6220 - loss: 0.6823 - val_accuracy: 0.6287 - val_loss: 0.6500 - learning_rate: 5.0000e-04
Epoch 5/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.6345 - loss: 0.6404 - val_accuracy: 0.6559 - val_loss: 0.6328 - learning_rate: 5.0000e-04
Epoch 6/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [85]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Predict probabilities
y_prob = model.predict(X_test).flatten()

# Convert to class labels (threshold = 0.5)
y_pred = (y_prob > 0.5).astype(int)

# Accuracy
acc = accuracy_score(y_test, y_pred)
print("Deep Learning Accuracy:", round(acc * 100, 2), "%")

# Detailed report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
Deep Learning Accuracy: 87.87 %

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.83      0.88       224
           1       0.82      0.93      0.87       180

    accuracy                           0.88       404
   macro avg       0.88      0.88      0.88       404
weighted avg       0.89      0.88      0.88       404


Confusion Matrix:
[[187  37]
 [ 12 168]]


In [84]:
# Deep predictions
dl_probs = model.predict(X_test).flatten()

# Retrain Random Forest with the current X_train (40 features)
rf_ensemble = RandomForestClassifier(n_estimators=300, random_state=42)
rf_ensemble.fit(X_train, y_train)

# Random Forest predictions
rf_probs = rf_ensemble.predict_proba(X_test)[:,1]

# Average ensemble
final_probs = (dl_probs + rf_probs) / 2
final_preds = (final_probs > 0.5).astype(int)

from sklearn.metrics import accuracy_score
print("Ensemble Accuracy:", accuracy_score(y_test, final_preds))

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Ensemble Accuracy: 0.8861386138613861


In [87]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

results = []

# Retrain classical ML models on the current 40-feature X_train
# This ensures consistency with the X_test derived from the 40-feature X_aug
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

lr_retrained = LogisticRegression(max_iter=2000)
lr_retrained.fit(X_train, y_train)

svm_retrained = SVC(kernel="rbf", probability=True)
svm_retrained.fit(X_train, y_train)

rf_retrained = RandomForestClassifier(n_estimators=300, random_state=42)
rf_retrained.fit(X_train, y_train)

ml_models = {
    "Logistic Regression": lr_retrained,
    "SVM": svm_retrained,
    "Random Forest": rf_retrained
}

for name, m in ml_models.items():
    y_pred = m.predict(X_test)

    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred)
    })

# ===== DEEP LEARNING MODEL =====
y_prob_dl = model.predict(X_test).flatten()
y_pred_dl = (y_prob_dl > 0.5).astype(int)

results.append({
    "Model": "Deep Learning",
    "Accuracy": accuracy_score(y_test, y_pred_dl),
    "Precision": precision_score(y_test, y_pred_dl),
    "Recall": recall_score(y_test, y_pred_dl),
    "F1 Score": f1_score(y_test, y_pred_dl)
})

# ===== ENSEMBLE (AVERAGE VOTING) =====
y_prob_lr = lr_retrained.predict_proba(X_test)[:,1]
y_prob_svm = svm_retrained.predict_proba(X_test)[:,1]
y_prob_rf = rf_retrained.predict_proba(X_test)[:,1]

ensemble_prob = (y_prob_lr + y_prob_svm + y_prob_rf + y_prob_dl) / 4
y_pred_ens = (ensemble_prob > 0.5).astype(int)

results.append({
    "Model": "Ensemble Voting",
    "Accuracy": accuracy_score(y_test, y_pred_ens),
    "Precision": precision_score(y_test, y_pred_ens),
    "Recall": recall_score(y_test, y_pred_ens),
    "F1 Score": f1_score(y_test, y_pred_ens)
})

# ===== SHOW TABLE =====
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by="Accuracy", ascending=False)

print("\n🏆 MODEL COMPARISON")
print(results_df.round(4))

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 

🏆 MODEL COMPARISON
                 Model  Accuracy  Precision  Recall  F1 Score
3        Deep Learning    0.8787     0.8195  0.9333    0.8727
2        Random Forest    0.8663     0.8621  0.8333    0.8475
4      Ensemble Voting    0.8589     0.8154  0.8833    0.8480
1                  SVM    0.7723     0.7651  0.7056    0.7341
0  Logistic Regression    0.5668     0.5166  0.4333    0.4713


**LSTM**

In [88]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# X and y already prepared from your augmented dataset
# X shape = (samples, features)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to 3D for LSTM
X_lstm = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

X_train, X_test, y_train, y_test = train_test_split(
    X_lstm, y, test_size=0.2, random_state=42, stratify=y
)

In [89]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization

model = Sequential()

model.add(LSTM(128, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(LSTM(64))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(32, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()

  super().__init__(**kwargs)


In [90]:
history = model.fit(
    X_train,
    y_train,
    epochs=40,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

Epoch 1/40
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 136ms/step - accuracy: 0.5355 - loss: 0.7606 - val_accuracy: 0.5432 - val_loss: 0.6906
Epoch 2/40
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 193ms/step - accuracy: 0.5360 - loss: 0.7166 - val_accuracy: 0.5432 - val_loss: 0.6900
Epoch 3/40
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 213ms/step - accuracy: 0.5266 - loss: 0.7266 - val_accuracy: 0.5432 - val_loss: 0.6910
Epoch 4/40
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 163ms/step - accuracy: 0.5239 - loss: 0.7237 - val_accuracy: 0.5494 - val_loss: 0.6918
Epoch 5/40
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 109ms/step - accuracy: 0.5413 - loss: 0.7039 - val_accuracy: 0.5432 - val_loss: 0.6902
Epoch 6/40
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 116ms/step - accuracy: 0.5521 - loss: 0.6912 - val_accuracy: 0.5432 - val_loss: 0.6910
Epoch 7/40
[1m21/21[0m [

In [91]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

print("LSTM Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 87ms/step
LSTM Accuracy: 0.4900990099009901

Classification Report:
               precision    recall  f1-score   support

           0       0.54      0.59      0.56       112
           1       0.42      0.37      0.39        90

    accuracy                           0.49       202
   macro avg       0.48      0.48      0.48       202
weighted avg       0.48      0.49      0.49       202


Confusion Matrix:
 [[66 46]
 [57 33]]
