#### Considering LSTM Model for diabetes prediction

In [1]:
import pandas as pd

# Load the dataset
data_path = '../data/processed/cleaned_diabetes_one_hot_encoding.csv'
df = pd.read_csv(data_path)

# Display the first few rows of the dataframe
print(df.head())

    age  hypertension  heart_disease    bmi  HbA1c_level  blood_glucose_level  \
0  80.0             0              1  25.19          6.6                  140   
1  54.0             0              0  27.32          6.6                   80   
2  28.0             0              0  27.32          5.7                  158   
3  36.0             0              0  23.45          5.0                  155   
4  76.0             1              1  20.14          4.8                  155   

   diabetes  gender_Female  gender_Male  gender_Other  \
0         0              1            0             0   
1         0              1            0             0   
2         0              0            1             0   
3         0              1            0             0   
4         0              0            1             0   

   smoking_history_No Info  smoking_history_current  smoking_history_former  \
0                        0                        0                       0   
1           

In [2]:
# Normalizing the data
from sklearn.preprocessing import MinMaxScaler

features = df.drop('diabetes', axis=1).columns.tolist()
scaler = MinMaxScaler()

df[features] = scaler.fit_transform(df[features])

In [3]:
# Reshape data for LTSM
X = df.drop('diabetes', axis=1).values
Y = df['diabetes'].values

# Reshape X to be [samples, timesteps, features]
X = X.reshape((X.shape[0], 1, X.shape[1]))

In [4]:
# Splitting the dataset
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [5]:
# Build the LSTM Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

2024-03-14 17:54:46.326324: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /N/soft/sles15/python/gnu/3.10.5/lib:/opt/cray/pe/gcc/11.2.0/snos/lib64:/opt/cray/pe/papi/6.0.0.17/lib64:/opt/cray/libfabric/1.11.0.4.111/lib64
2024-03-14 17:54:46.326398: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-03-14 17:55:22.023282: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2024-03-14 17:55:22.023366: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (x1000c3s0b0n1): /proc/driver/nvidia/version does not exist
2024-03-14 17:55:22.024681: I tensorflow/core/platform/cpu_feature_guard.cc:193] Thi

In [7]:
from tensorflow.keras.metrics import Precision, Recall
from sklearn.metrics import classification_report, roc_auc_score

# Add precision and recall to the model's metrics
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', Precision(), Recall()])

# Train the model (ensure you have trained your model with these metrics before evaluating)
history = model.fit(
    X_train, Y_train, 
    epochs=100, 
    batch_size=64, 
    validation_split=0.2, 
    verbose=2
)

# Evaluate the model on the test set with additional metrics
loss, accuracy, precision, recall = model.evaluate(X_test, Y_test, verbose=2)
print(f'Test Accuracy: {accuracy*100:.2f}%')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')

# For F1-score and ROC-AUC, use sklearn's utilities as they're not directly supported as Keras metrics during training
from sklearn.metrics import f1_score, roc_auc_score

# Predict probabilities for ROC AUC
y_probs = model.predict(X_test)[:, 0]  # assuming your model outputs probabilities for class 1

# Predict class labels for F1 score and other metrics
y_pred = (y_probs > 0.5).astype('int32')

# Calculate F1 score
f1 = f1_score(Y_test, y_pred)
print(f'F1 Score: {f1:.2f}')

# Calculate ROC AUC Score
roc_auc = roc_auc_score(Y_test, y_probs)
print(f'ROC AUC Score: {roc_auc:.2f}')

Epoch 1/100
893/893 - 5s - loss: 0.0898 - accuracy: 0.9707 - precision: 0.9452 - recall: 0.6430 - val_loss: 0.0890 - val_accuracy: 0.9725 - val_precision: 0.9444 - val_recall: 0.6651 - 5s/epoch - 6ms/step
Epoch 2/100
893/893 - 3s - loss: 0.0898 - accuracy: 0.9711 - precision: 0.9487 - recall: 0.6451 - val_loss: 0.0895 - val_accuracy: 0.9714 - val_precision: 0.9558 - val_recall: 0.6393 - 3s/epoch - 3ms/step
Epoch 3/100
893/893 - 3s - loss: 0.0897 - accuracy: 0.9705 - precision: 0.9450 - recall: 0.6407 - val_loss: 0.0890 - val_accuracy: 0.9723 - val_precision: 0.9528 - val_recall: 0.6546 - 3s/epoch - 3ms/step
Epoch 4/100
893/893 - 3s - loss: 0.0898 - accuracy: 0.9705 - precision: 0.9443 - recall: 0.6399 - val_loss: 0.0891 - val_accuracy: 0.9725 - val_precision: 0.9556 - val_recall: 0.6565 - 3s/epoch - 3ms/step
Epoch 5/100
893/893 - 3s - loss: 0.0895 - accuracy: 0.9706 - precision: 0.9464 - recall: 0.6407 - val_loss: 0.0888 - val_accuracy: 0.9721 - val_precision: 0.9590 - val_recall: 0.64

Epoch 41/100
893/893 - 3s - loss: 0.0879 - accuracy: 0.9710 - precision: 0.9480 - recall: 0.6451 - val_loss: 0.0874 - val_accuracy: 0.9728 - val_precision: 0.9647 - val_recall: 0.6527 - 3s/epoch - 3ms/step
Epoch 42/100
893/893 - 3s - loss: 0.0878 - accuracy: 0.9712 - precision: 0.9501 - recall: 0.6463 - val_loss: 0.0890 - val_accuracy: 0.9720 - val_precision: 0.9208 - val_recall: 0.6765 - 3s/epoch - 3ms/step
Epoch 43/100
893/893 - 3s - loss: 0.0879 - accuracy: 0.9715 - precision: 0.9550 - recall: 0.6461 - val_loss: 0.0876 - val_accuracy: 0.9705 - val_precision: 0.9066 - val_recall: 0.6670 - 3s/epoch - 3ms/step
Epoch 44/100
893/893 - 3s - loss: 0.0879 - accuracy: 0.9714 - precision: 0.9537 - recall: 0.6463 - val_loss: 0.0875 - val_accuracy: 0.9735 - val_precision: 0.9813 - val_recall: 0.6517 - 3s/epoch - 3ms/step
Epoch 45/100
893/893 - 3s - loss: 0.0878 - accuracy: 0.9714 - precision: 0.9531 - recall: 0.6468 - val_loss: 0.0880 - val_accuracy: 0.9714 - val_precision: 0.9198 - val_recall:

Epoch 81/100
893/893 - 3s - loss: 0.0862 - accuracy: 0.9712 - precision: 0.9507 - recall: 0.6458 - val_loss: 0.0864 - val_accuracy: 0.9733 - val_precision: 0.9869 - val_recall: 0.6450 - 3s/epoch - 3ms/step
Epoch 82/100
893/893 - 3s - loss: 0.0862 - accuracy: 0.9717 - precision: 0.9578 - recall: 0.6477 - val_loss: 0.0878 - val_accuracy: 0.9721 - val_precision: 0.9209 - val_recall: 0.6775 - 3s/epoch - 3ms/step
Epoch 83/100
893/893 - 3s - loss: 0.0861 - accuracy: 0.9714 - precision: 0.9550 - recall: 0.6451 - val_loss: 0.0861 - val_accuracy: 0.9734 - val_precision: 0.9704 - val_recall: 0.6574 - 3s/epoch - 3ms/step
Epoch 84/100
893/893 - 3s - loss: 0.0861 - accuracy: 0.9715 - precision: 0.9538 - recall: 0.6477 - val_loss: 0.0862 - val_accuracy: 0.9735 - val_precision: 0.9652 - val_recall: 0.6622 - 3s/epoch - 3ms/step
Epoch 85/100
893/893 - 3s - loss: 0.0860 - accuracy: 0.9718 - precision: 0.9537 - recall: 0.6515 - val_loss: 0.0857 - val_accuracy: 0.9735 - val_precision: 0.9799 - val_recall: