# GRADIENT BOOSTING MODEL ANALYSIS

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import warnings

In [2]:
heart_df = pd.read_csv('heart_failure_clinical_records_dataset.csv')

In [3]:
X = heart_df.drop(columns='DEATH_EVENT', axis=1)
Y = heart_df['DEATH_EVENT']

In [51]:
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

In [53]:
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, stratify=Y, random_state=2)

In [55]:
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=0)
gb_model.fit(X_train, Y_train)

In [57]:
Y_pred_gb = gb_model.predict(X_test)

In [59]:
accuracy = accuracy_score(Y_test, Y_pred_gb)
print(f"Gradient Boosting Test Accuracy: {accuracy * 100:.2f}%")

Gradient Boosting Test Accuracy: 85.00%


In [61]:
conf_matrix = confusion_matrix(Y_test, Y_pred_gb)
print("\nConfusion Matrix:\n", conf_matrix)


Confusion Matrix:
 [[36  5]
 [ 4 15]]


In [63]:
class_report = classification_report(Y_test, Y_pred_gb)
print("\nClassification Report:\n", class_report)


Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.88      0.89        41
           1       0.75      0.79      0.77        19

    accuracy                           0.85        60
   macro avg       0.82      0.83      0.83        60
weighted avg       0.85      0.85      0.85        60



In [65]:
warnings.filterwarnings("ignore", message="X does not have valid feature names")
data_1 = np.array((49, 1, 80, 0, 30, 1, 427000, 1, 138, 0, 0, 12))  
data_reshaped = data_1.reshape(1, -1)  
data_standard_gb = scaler.transform(data_reshaped)  


actual_label = heart_df[(heart_df.values[:, :-1] == data_1).all(axis=1)]
print("Actual label from dataset:", actual_label['DEATH_EVENT'].values if not actual_label.empty else "No matching record found")

prediction_gb = gb_model.predict(data_standard_gb)

if prediction_gb[0] == 1:
    print('Gradient Boosting Prediction for Data: Heart Disease')
else:
    print('Gradient Boosting Prediction for Data: No Heart Disease')

Actual label from dataset: [0]
Gradient Boosting Prediction for Data: No Heart Disease


In [67]:
probabilities_gb = gb_model.predict_proba(data_standard_gb)
print(f"Predicted probabilities: {probabilities_gb}")

prediction_confidence = probabilities_gb[0][1]  
print(f"Confidence of heart disease prediction: {prediction_confidence:.2f}")

Predicted probabilities: [[0.90137945 0.09862055]]
Confidence of heart disease prediction: 0.10


# --------------------------------------------------------------------------------------

In [77]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from imblearn.over_sampling import SMOTE  
from collections import Counter

In [134]:
heart_df = pd.read_csv('modified_heart_failure_data_1000.csv')

In [136]:
X = heart_df.drop(columns='DEATH_EVENT', axis=1)
Y = heart_df['DEATH_EVENT'].astype(int)

In [138]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [140]:
print("Original target distribution:", Counter(Y))

Original target distribution: Counter({0: 646, 1: 354})


In [142]:
smote = SMOTE(random_state=2)
X_resampled, Y_resampled = smote.fit_resample(X_scaled, Y)
print("Resampled target distribution:", Counter(Y_resampled))

Resampled target distribution: Counter({1: 646, 0: 646})


In [144]:
X_train, X_test, Y_train, Y_test = train_test_split(X_resampled, Y_resampled, test_size=0.2, stratify=Y_resampled, random_state=2)

In [146]:
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=0)

In [148]:
gb_model.fit(X_train, Y_train)

In [150]:
Y_pred_gb = gb_model.predict(X_test)

In [152]:
accuracy = accuracy_score(Y_test, Y_pred_gb)
print(f"Gradient Boosting Test Accuracy: {accuracy * 100:.2f}%")

Gradient Boosting Test Accuracy: 98.07%


In [154]:
conf_matrix = confusion_matrix(Y_test, Y_pred_gb)
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", classification_report(Y_test, Y_pred_gb))


Confusion Matrix:
 [[127   3]
 [  2 127]]

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98       130
           1       0.98      0.98      0.98       129

    accuracy                           0.98       259
   macro avg       0.98      0.98      0.98       259
weighted avg       0.98      0.98      0.98       259



In [156]:
warnings.filterwarnings("ignore", message="X does not have valid feature names")
data_1 = np.array((49, 1, 80, 0, 30, 1, 427000, 1, 138, 0, 0, 12))  
data_reshaped = data_1.reshape(1, -1)  
data_standard_gb = scaler.transform(data_reshaped)  


actual_label = heart_df[(heart_df.values[:, :-1] == data_1).all(axis=1)]
print("Actual label from dataset:", actual_label['DEATH_EVENT'].values if not actual_label.empty else "No matching record found")

prediction_gb = gb_model.predict(data_standard_gb)

if prediction_gb[0] == 1:
    print('Gradient Boosting Prediction for Data: Heart Disease')
else:
    print('Gradient Boosting Prediction for Data: No Heart Disease')

Actual label from dataset: [0 0 0 0]
Gradient Boosting Prediction for Data: No Heart Disease


# --------------------------------------------------------------------------------------

In [158]:
df = pd.read_csv('modified_heart_failure_data_1000.csv')
lower_percentile = 0.05
upper_percentile = 0.95
for column in df.columns:
    if pd.api.types.is_numeric_dtype(df[column]):
        # Calculate lower and upper caps
        lower_cap = df[column].quantile(lower_percentile)
        upper_cap = df[column].quantile(upper_percentile)
        df[column] = df[column].clip(lower=lower_cap, upper=upper_cap)

print(df.head())

    age  anaemia  creatinine_phosphokinase  diabetes  ejection_fraction  \
0  75.0        0                       582         0                 20   
1  55.0        0                      2262         0                 38   
2  65.0        0                       146         0                 20   
3  50.0        1                       111         0                 20   
4  65.0        1                       160         1                 20   

   high_blood_pressure  platelets  serum_creatinine  serum_sodium  sex  \
0                    1     265000                 1           130    1   
1                    0     263358                 1           136    1   
2                    0     162000                 1           130    1   
3                    0     210000                 1           137    1   
4                    0     327000                 2           130    0   

   smoking  time  DEATH_EVENT  
0        0    11            1  
1        0    11            1  
2       

  df[column] = df[column].clip(lower=lower_cap, upper=upper_cap)
  df[column] = df[column].clip(lower=lower_cap, upper=upper_cap)


In [160]:
X = df.drop(columns='DEATH_EVENT', axis=1)
Y = df['DEATH_EVENT'].astype(int)

In [162]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [164]:
print("Original target distribution:", Counter(Y))

Original target distribution: Counter({0: 646, 1: 354})


In [166]:
smote = SMOTE(random_state=2)
X_resampled, Y_resampled = smote.fit_resample(X_scaled, Y)

In [168]:
print("Resampled target distribution:", Counter(Y_resampled))

Resampled target distribution: Counter({1: 646, 0: 646})


In [170]:
X_train, X_test, Y_train, Y_test = train_test_split(X_resampled, Y_resampled, test_size=0.2, stratify=Y_resampled, random_state=2)

In [172]:
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=0)

In [174]:
gb_model.fit(X_train, Y_train)

In [176]:
Y_pred_gb = gb_model.predict(X_test)

In [178]:
accuracy = accuracy_score(Y_test, Y_pred_gb)
print(f"Gradient Boosting Test Accuracy: {accuracy * 100:.2f}%")

Gradient Boosting Test Accuracy: 98.84%


In [180]:
conf_matrix = confusion_matrix(Y_test, Y_pred_gb)
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", classification_report(Y_test, Y_pred_gb))



Confusion Matrix:
 [[127   3]
 [  0 129]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.98      0.99       130
           1       0.98      1.00      0.99       129

    accuracy                           0.99       259
   macro avg       0.99      0.99      0.99       259
weighted avg       0.99      0.99      0.99       259



In [182]:
warnings.filterwarnings("ignore", message="X does not have valid feature names")
data_1 = np.array((49, 1, 80, 0, 30, 1, 427000, 1, 138, 0, 0, 12))  
data_reshaped = data_1.reshape(1, -1)  
data_standard_gb = scaler.transform(data_reshaped)  


actual_label = heart_df[(heart_df.values[:, :-1] == data_1).all(axis=1)]
print("Actual label from dataset:", actual_label['DEATH_EVENT'].values if not actual_label.empty else "No matching record found")

prediction_gb = gb_model.predict(data_standard_gb)

if prediction_gb[0] == 1:
    print('Gradient Boosting Prediction for Data: Heart Disease')
else:
    print('Gradient Boosting Prediction for Data: No Heart Disease')

Actual label from dataset: [0 0 0 0]
Gradient Boosting Prediction for Data: No Heart Disease


In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [11]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=0)
gb_model.fit(X_train, Y_train)

In [15]:
Y_pred_gb = gb_model.predict(X_test)

In [17]:
accuracy = accuracy_score(Y_test, Y_pred_gb)
print(f"Gradient Boosting Test Accuracy: {accuracy * 100:.2f}%")

Gradient Boosting Test Accuracy: 85.00%
