In [42]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [62]:
# Load dataset

df = pd.read_csv('realheart.csv')

X = df.drop(columns = 'target')
y = df['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the base models
base_models = [
    ('rf', RandomForestClassifier(n_estimators=50, random_state=42, max_depth = 3, min_samples_leaf = 5, min_samples_split = 10
                                  ,bootstrap = True)),
    ('gb', GradientBoostingClassifier(n_estimators=50, random_state=42, learning_rate = 0.1)),
    ('svc', SVC(probability=True, random_state=42)),
#     ('knn', KNeighborsClassifier(n_neighbors = 9, leaf_size = 10))
]

# Define the meta-learner
meta_model = DecisionTreeClassifier(max_depth = 3)

# Create the Stacking Classifier
stacking_clf = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5, n_jobs=-1)

# Train the Stacking Classifier
stacking_clf.fit(X_train, y_train)

# Predict on the test set
y_pred = stacking_clf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Stacking Classifier: {accuracy:.2f}")




Accuracy of Stacking Classifier: 0.92


In [63]:
y_train_pred = stacking_clf.predict(X_train)

In [64]:
print("Training Accuracy is ", accuracy_score(y_train_pred, y_train))

Training Accuracy is  0.8305785123966942


In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import StackingClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

df = pd.read_csv('realheart.csv')
X = df.drop(columns = 'target')
y = df['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the base models
base_models = [
    ('rf', RandomForestClassifier(random_state=42)),
    ('gb', GradientBoostingClassifier(random_state=42)),
    ('svc', SVC(probability=True, random_state=42))
]

# Define the meta-learner as a DecisionTreeClassifier
meta_model = DecisionTreeClassifier(random_state=42)

# Create the Stacking Classifier
stacking_clf = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5, n_jobs=-1)

# Define the hyperparameter grid for all models including the meta-model
param_grid = {
    'rf__n_estimators': [50, 100, 200],
    'rf__max_depth': [3, 5, 7],
    'gb__n_estimators': [50, 100, 200],
    'gb__learning_rate': [0.01, 0.1, 0.2],
    'gb__max_depth': [3, 5, 7],
    'svc__C': [0.1, 1, 10],
    'svc__kernel': ['linear', 'rbf'],
    'final_estimator__max_depth': [3, 5, 7],
    'final_estimator__min_samples_split': [2, 5, 10]
}

# Create a GridSearchCV object
grid_search = GridSearchCV(estimator=stacking_clf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=1)

# Fit the GridSearchCV object to the training data
grid_search.fit(X_train, y_train)

# Print the best parameters found by GridSearchCV
print(f"Best Parameters: {grid_search.best_params_}")

# Use the best estimator to make predictions on the test set
best_stacking_clf = grid_search.best_estimator_
y_pred = best_stacking_clf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Best Stacking Classifier: {accuracy:.2f}")


Fitting 5 folds for each of 13122 candidates, totalling 65610 fits
