In [None]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
from collections import Counter
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, Dropout, BatchNormalization, Conv1D, MaxPooling1D, Flatten,
                                     LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D, Add)
from tensorflow.keras.optimizers import Adam
import tensorflow as tf


df = pd.read_csv("/content/drive/MyDrive/thesis/dataset_dementia (1) (1).csv")

df = df[df['dementia'].notnull()]

df['EF'].fillna(df['EF'].mean(), inplace=True)
df['PS'].fillna(df['PS'].mean(), inplace=True)
df['Global'].fillna(df['Global'].mean(), inplace=True)

df['smoking'].fillna(df['smoking'].mode()[0], inplace=True)

encoder = OneHotEncoder(drop='first', sparse_output=False)
encoded_gender = encoder.fit_transform(df[['gender']])
encoded_htn = encoder.fit_transform(df[['hypertension']])
encoded_chol = encoder.fit_transform(df[['hypercholesterolemia']])

le = LabelEncoder()
df['smoking'] = le.fit_transform(df['smoking'])

df = df.drop(columns=['gender', 'hypertension', 'hypercholesterolemia'])


encoded_df = pd.DataFrame(np.hstack((encoded_gender, encoded_htn, encoded_chol)),
                          columns=['gender_1', 'htn_1', 'chol_1'])
df = pd.concat([df.reset_index(drop=True), encoded_df], axis=1)


X = df.drop(columns=['dementia'])
y = df['dementia']


def add_gaussian_noise(X, noise_level=0.01):
    noisy_X = X.copy()
    numeric_cols = ['EF', 'PS', 'Global']
    for col in numeric_cols:
        noisy_X[col] += np.random.normal(0, noise_level, size=X.shape[0])
    return noisy_X

X = add_gaussian_noise(X)


#Normalize and Balance with SMOTE

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print("Before SMOTE:", Counter(y))

sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X_scaled, y)

print("After SMOTE:", Counter(y_resampled))



X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

X_train_cnn = np.expand_dims(X_train, axis=2)
X_test_cnn = np.expand_dims(X_test, axis=2)


# CNN + Transformer Model

def transformer_block(inputs, head_size, num_heads, ff_dim, dropout=0):
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = Dropout(dropout)(x)
    res = Add()([x, inputs])

    x = LayerNormalization(epsilon=1e-6)(res)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    return Add()([x, res])

input_layer = Input(shape=(X_train_cnn.shape[1], 1))

x = Conv1D(filters=64, kernel_size=3, padding='same', activation='relu')(input_layer)
x = MaxPooling1D(pool_size=2)(x)
x = transformer_block(x, head_size=64, num_heads=2, ff_dim=128, dropout=0.1)
x = GlobalAveragePooling1D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)
output_layer = Dense(1, activation='sigmoid')(x)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer=Adam(learning_rate=0.0005), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Train Model

history = model.fit(X_train_cnn, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

test_loss, test_acc = model.evaluate(X_test_cnn, y_test, verbose=0)
print(f"\nCNN + Transformer Test Accuracy is: {test_acc * 100:.2f}%")

preds = (model.predict(X_test_cnn) > 0.5).astype(int)
print("\nClassification Report:\n", classification_report(y_test, preds))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['EF'].fillna(df['EF'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['PS'].fillna(df['PS'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values

Before SMOTE: Counter({0.0: 1726, 1.0: 82})
After SMOTE: Counter({0.0: 1726, 1.0: 1726})


Epoch 1/50
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.6525 - loss: 0.6347 - val_accuracy: 0.7703 - val_loss: 0.4997
Epoch 2/50
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.7705 - loss: 0.5063 - val_accuracy: 0.7866 - val_loss: 0.4558
Epoch 3/50
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.7831 - loss: 0.4812 - val_accuracy: 0.8083 - val_loss: 0.4343
Epoch 4/50
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.7983 - loss: 0.4603 - val_accuracy: 0.8300 - val_loss: 0.3998
Epoch 5/50
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.8240 - loss: 0.4105 - val_accuracy: 0.7993 - val_loss: 0.4214
Epoch 6/50
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.8320 - loss: 0.3898 - val_accuracy: 0.8228 - val_loss: 0.3791
Epoch 7/50
[1m69/69[0m [32m━━━━