In [44]:
import pandas as pd
import numpy as np

In [46]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

In [47]:
from sklearn.model_selection import train_test_split

In [48]:
import statistics as st

In [49]:
# load dataset
df = pd.read_csv('/content/heart.csv')

In [51]:
# spliting the dataset
x = df.drop(columns='target', axis = 1)
y = df['target']

In [52]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size= 0.2, random_state= 0)

## **Create Function**

---



First define a function to make predictions on n-folds of train and test dataset. This function returns the predictions for train and test for each model.

In [68]:
from sklearn.model_selection import StratifiedKFold

In [112]:
def Stacking(model, train, y, test, n_fold):

  folds = StratifiedKFold(n_splits = n_fold, shuffle=True, random_state= 1)
  test_pred = np.zeros((test.shape[0],))
  train_pred = np.zeros((train.shape[0],))

  for train_indices, val_indices in folds.split(train, y.values):
    x_train, x_val = train.iloc[train_indices], train.iloc[val_indices]
    y_train, y_val = y.iloc[train_indices], y.iloc[val_indices]

    model.fit(X = x_train, y = y_train)

    train_pred[val_indices] = model.predict(x_val)
    test_pred += model.predict(test)

  test_pred /= n_fold
  return test_pred.reshape(-1, 1), train_pred.reshape(-1, 1)


## **Now Create Two Base Models**

---



**Decision Tree**

In [113]:
model1 = DecisionTreeClassifier(random_state=1)

In [114]:
test_pred1, train_pred1 = Stacking(model=model1, n_fold=10, train=X_train, test=X_test, y=y_train)

In [115]:
train_pred1 = pd.DataFrame(train_pred1)

In [116]:
test_pred1 = pd.DataFrame(test_pred1)

In [117]:
print(train_pred1)

       0
0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
..   ...
237  0.0
238  0.0
239  1.0
240  1.0
241  0.0

[242 rows x 1 columns]


In [118]:
print(test_pred1)

      0
0   0.0
1   0.8
2   0.3
3   0.0
4   0.2
..  ...
56  1.0
57  0.8
58  0.7
59  1.0
60  1.0

[61 rows x 1 columns]


**K - Nearest Neighbors**

In [119]:
model2 = KNeighborsClassifier()

In [120]:
test_pred2, train_pred2 = Stacking(model=model2, n_fold=10, train=X_train, test=X_test, y=y_train)

In [121]:
train_pred2 = pd.DataFrame(train_pred2)

In [122]:
test_pred2 = pd.DataFrame(test_pred2)

In [123]:
print(train_pred2)

       0
0    1.0
1    1.0
2    0.0
3    0.0
4    1.0
..   ...
237  1.0
238  1.0
239  1.0
240  1.0
241  1.0

[242 rows x 1 columns]


In [124]:
print(test_pred2)

      0
0   0.1
1   0.0
2   1.0
3   0.0
4   1.0
..  ...
56  0.0
57  1.0
58  0.2
59  1.0
60  0.0

[61 rows x 1 columns]


**Logistic Regression**

In [125]:
df = np.concatenate([train_pred1, train_pred2], axis=1)

In [126]:
df_test = np.concatenate([test_pred1, test_pred2], axis=1)

In [127]:
model = LogisticRegression(random_state=1)

In [128]:
model.fit(df, y_train)

In [129]:
y_pred = model.predict(df_test)

In [130]:
# Calculate accuracy
from sklearn.metrics import accuracy_score

In [131]:
score = accuracy_score(y_test, y_pred)

In [132]:
print(f"Accuracy: {score}")

Accuracy: 0.7868852459016393
