In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
import shap
import matplotlib.pyplot as plt

# Data Loading
data = pd.read_csv('heart2.csv')

# Data Preprocessing
# Encode categorical variables
label_encoders = {}
for col in data.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Handle missing values in numeric columns
numeric_cols = data.select_dtypes(include=[np.number]).columns
data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].median())

# Feature and Target Split
X = data.drop('HeartDisease', axis=1)
y = data['HeartDisease']

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Handle class imbalance with SMOTE
smote = SMOTE()
X_res, y_res = smote.fit_resample(X_scaled, y)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, stratify=y_res, random_state=42)

# Neural Network Model
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(16, activation='relu'))
model.add(Dense(2, activation='softmax'))

# Compile Model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Model Training
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=150, batch_size=32, shuffle=True, verbose=2)

# Evaluation
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print(classification_report(y_test, y_pred_classes))
print("ROC AUC Score:", roc_auc_score(y_test, y_pred_classes))




Epoch 1/150
26/26 - 2s - 60ms/step - accuracy: 0.5603 - loss: 0.8965 - val_accuracy: 0.5490 - val_loss: 0.6758
Epoch 2/150
26/26 - 0s - 3ms/step - accuracy: 0.6059 - loss: 0.7930 - val_accuracy: 0.6324 - val_loss: 0.6435
Epoch 3/150
26/26 - 0s - 3ms/step - accuracy: 0.5985 - loss: 0.7591 - val_accuracy: 0.6716 - val_loss: 0.6181
Epoch 4/150
26/26 - 0s - 3ms/step - accuracy: 0.6441 - loss: 0.7534 - val_accuracy: 0.7010 - val_loss: 0.5994
Epoch 5/150
26/26 - 0s - 3ms/step - accuracy: 0.6441 - loss: 0.7319 - val_accuracy: 0.6961 - val_loss: 0.5821
Epoch 6/150
26/26 - 0s - 3ms/step - accuracy: 0.6724 - loss: 0.6830 - val_accuracy: 0.7010 - val_loss: 0.5634
Epoch 7/150
26/26 - 0s - 3ms/step - accuracy: 0.7266 - loss: 0.6283 - val_accuracy: 0.7206 - val_loss: 0.5475
Epoch 8/150
26/26 - 0s - 3ms/step - accuracy: 0.7254 - loss: 0.6113 - val_accuracy: 0.7304 - val_loss: 0.5334
Epoch 9/150
26/26 - 0s - 3ms/step - accuracy: 0.7106 - loss: 0.6035 - val_accuracy: 0.7402 - val_loss: 0.5200
Epoch 10/



Instance 1 Explanation:


DimensionError: Length of features is not equal to the length of shap_values!

In [14]:
# XAI using SHAP
for i in range(10):
    print(f"Instance {i+1} Explanation:")
    shap.force_plot(
        explainer.expected_value[0], 
        shap_values[0][i], 
        features=X_test[i],  # Directly index the NumPy array
        feature_names=X.columns,  # Ensure X.columns is defined
        matplotlib=True
    )
# Save Model
model.save('optimized_heart_model.keras')

Instance 1 Explanation:


DimensionError: Length of features is not equal to the length of shap_values!