In [29]:
# Import necessary libraries and load the dataset
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_predict
from sklearn.linear_model import LogisticRegression

In [2]:
df = pd.read_csv("heart.csv")

In [3]:
df.head(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [4]:
# Define the features and target variable
X = df.drop(['target'], axis=1)
y = df['target']

In [5]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Bagging**

Random Forest

In [6]:
from sklearn.ensemble import RandomForestClassifier

In [7]:
# Define the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

In [8]:
# Fit the Random Forest classifier on the training data
rf_classifier.fit(X_train, y_train)

In [9]:
# Predict the target variable on the test data
y_pred = rf_classifier.predict(X_test)

In [10]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-Score:", f1_score(y_test, y_pred))

Accuracy: 0.9853658536585366
Precision: 1.0
Recall: 0.970873786407767
F1-Score: 0.9852216748768473


**Boosting**

Adaboost Classifier

In [11]:
# Define the base classifier
base_classifier = DecisionTreeClassifier(max_depth=1)

In [12]:
# Define the Adaboost classifier with the base classifier
adaboost_classifier = AdaBoostClassifier(base_estimator=base_classifier, n_estimators=100)

In [13]:
# Fit the Adaboost classifier on the training data
adaboost_classifier.fit(X_train, y_train)



In [14]:
# Predict the target variable on the test data
y_pred = adaboost_classifier.predict(X_test)

In [15]:
# Evaluate the performance of the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-Score:", f1_score(y_test, y_pred))

Accuracy: 0.8926829268292683
Precision: 0.9175257731958762
Recall: 0.8640776699029126
F1-Score: 0.89


Gradient Boosting Classifier

In [16]:
from sklearn.ensemble import GradientBoostingClassifier

In [17]:
# Define the Gradient Boosting classifier
gb_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

In [18]:
# Fit the Gradient Boosting classifier on the training data
gb_classifier.fit(X_train, y_train)

In [19]:
# Predict the target variable on the test data
y_pred = gb_classifier.predict(X_test)

In [20]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-Score:", f1_score(y_test, y_pred))

Accuracy: 0.9317073170731708
Precision: 0.9158878504672897
Recall: 0.9514563106796117
F1-Score: 0.9333333333333335


Xgboost Classifer

In [21]:
import xgboost as xgb

In [22]:
# Define the XGBoost classifier
xgb_classifier = xgb.XGBClassifier(objective="binary:logistic", random_state=42)

In [23]:
# Fit the XGBoost classifier on the training data
xgb_classifier.fit(X_train, y_train)

In [24]:
# Predict the target variable on the test data
y_pred = xgb_classifier.predict(X_test)

In [25]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-Score:", f1_score(y_test, y_pred))

Accuracy: 0.9853658536585366
Precision: 1.0
Recall: 0.970873786407767
F1-Score: 0.9852216748768473


**Stacking**

In [43]:
# Define the base models
base_models = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('gb', GradientBoostingClassifier(n_estimators=100, random_state=42))
]

In [44]:
# Define the meta model
meta_model = LogisticRegression()

In [45]:
# Train the base models
for name, model in base_models:
    model.fit(X_train, y_train)

In [46]:
# Generate the predictions of the base models on the test set
base_preds = []
for name, model in base_models:
    preds = cross_val_predict(model, X_test, y_test, cv=5, method='predict_proba')[:, 1]
    base_preds.append(preds)

In [47]:
# Stack the predictions of the base models into a meta feature matrix
meta_features = np.column_stack(base_preds)

In [48]:
# Train the meta model on the meta feature matrix
meta_model.fit(meta_features, y_test)

In [49]:
# Generate the predictions of the stacked model on the test set
stacked_preds = meta_model.predict(meta_features)


In [50]:
# Compute the precision, recall, and F1 score of the stacked model
precision = precision_score(y_test, stacked_preds)
recall = recall_score(y_test, stacked_preds)
f1 = f1_score(y_test, stacked_preds)


In [51]:
# Evaluate the performance of the stacked model
print("Stacked Model Accuracy:", accuracy_score(y_test, stacked_preds))
# Print the results
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Stacked Model Accuracy: 0.8390243902439024
Precision: 0.8301886792452831
Recall: 0.8543689320388349
F1 Score: 0.8421052631578948
