In [None]:
!pip uninstall numpy
!pip install numpy --upgrade

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import shap
import warnings
warnings.filterwarnings("ignore")

In [None]:
data_df=pd.read_csv(r"C:\Users\jayan\Downloads\Heart disease predictor\Data\Heart_disease_cleveland_new.csv")
data_df.head()

In [None]:
data_df.info()

In [None]:
data_df.describe()

In [None]:
data_df.isnull().sum()

In [None]:
sns.countplot(data=data_df, x='target')
plt.title("Heart Disease Distribution")
plt.show()

In [None]:
X = data_df.drop('target', axis=1)
y = data_df['target']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
feature_names = X.columns

In [None]:
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(X_train_scaled, y_train)

In [None]:
import joblib
joblib.dump(model, r'C:\Users\jayan\Downloads\Heart disease predictor\Models\xgb_heart_model.pkl')

In [None]:
y_pred = model.predict(X_test_scaled)

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("AUC Score:", roc_auc_score(y_test, y_pred))

In [None]:
# Initialize SHAP
explainer = shap.Explainer(model, X_train_scaled)
shap_values = explainer(X_test_scaled)

# Summary plot
shap.summary_plot(shap_values, X_test_scaled, feature_names=feature_names)

# Feature importance bar plot
shap.plots.bar(shap_values)

# Individual prediction explanation
shap.plots.waterfall(shap_values[0])