In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV

In [2]:
df = pd.read_csv('realheart.csv')

In [3]:
X = df.drop(columns = 'target')
y = df['target']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size = 0.2, random_state = 42) 

In [5]:
base_classifier = RandomForestClassifier(bootstrap = False, max_depth = 3, min_samples_leaf = 2,
                                         min_samples_split = 10, max_leaf_nodes = 9)

In [6]:
clf = BaggingClassifier(estimator = base_classifier, n_estimators = 50, bootstrap = True, bootstrap_features = True)

In [7]:
clf.fit(X_train, y_train)

In [8]:
y_pred = clf.predict(X_test)

In [9]:
y_train_pred = clf.predict(X_train)

In [10]:
print(accuracy_score(y_pred, y_test))

0.8852459016393442


In [11]:
print(accuracy_score(y_train_pred, y_train))

0.859504132231405


In [17]:
base_rf = RandomForestClassifier()

# Create a BaggingClassifier with the RandomForestClassifier as the base estimator
bagging_rf = BaggingClassifier(estimator=base_rf, random_state=42)

# Define the hyperparameter grid to search
param_grid = {
    'bootstrap': [True, False],
    'bootstrap_features': [True, False],
    'estimator__n_estimators': [10, 50, 100],
    'estimator__max_depth': [3,5,7],
    'estimator__min_samples_split': [5, 10],
    'estimator__min_samples_leaf': [1, 2, 4],
    'estimator__max_leaf_nodes': [4,6,9]
}

# Create a GridSearchCV object
grid_search = GridSearchCV(estimator=bagging_rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=1)

# Fit the GridSearchCV object to the training data
grid_search.fit(X_train, y_train)

# Print the best parameters found by GridSearchCV
print(f"Best Parameters: {grid_search.best_params_}")

# Use the best estimator to make predictions on the test set
best_bagging_rf = grid_search.best_estimator_
y_pred = best_bagging_rf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Best Bagging Classifier: {accuracy:.2f}")

Fitting 5 folds for each of 648 candidates, totalling 3240 fits
Best Parameters: {'bootstrap': True, 'bootstrap_features': False, 'estimator__max_depth': 3, 'estimator__max_leaf_nodes': 6, 'estimator__min_samples_leaf': 2, 'estimator__min_samples_split': 5, 'estimator__n_estimators': 50}
Accuracy of Best Bagging Classifier: 0.90


# Bagging With SVM

In [12]:
from sklearn.svm import SVC

In [13]:
svc = SVC(C = 1, gamma = 0.01, degree = 3, kernel ='poly')

In [14]:
clf = BaggingClassifier(estimator = svc, n_estimators = 50, bootstrap = True, bootstrap_features = False, n_jobs = -1, verbose =1)

In [15]:
clf.fit(X_train, y_train)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed: 42.6min remaining: 42.6min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed: 51.5min finished


In [16]:
y_pred = clf.predict(X_test)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.2s finished


In [17]:
print('Accuracy Score', accuracy_score(y_pred, y_test))

Accuracy Score 0.8032786885245902


In [None]:
# Step 1: Import necessary libraries
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Set up the SVM model
svm = SVC()

# Step 4: Define the parameter grid for GridSearchCV
param_grid = {
    'C': [1, 3],
    'kernel': ['poly', 'linear'],
    'gamma': ['scale', 'auto'],
    'degree': [3,5]
}

# Step 5: Create the Bagging Classifier with the SVM as the base estimator
bagging_clf = BaggingClassifier(estimator=svm, n_estimators=10, random_state=42)

# Step 6: Set up GridSearchCV
grid_search = GridSearchCV(estimator=bagging_clf, param_grid={
    'estimator__C': param_grid['C'],
    'estimator__kernel': param_grid['kernel'],
    'estimator__gamma': param_grid['gamma'],
    'estimator__degree':param_grid['degree']
}, cv=5, scoring='accuracy', n_jobs=-1, verbose = True)

# Step 7: Fit GridSearchCV to the training data
grid_search.fit(X_train, y_train)

# Step 8: Get the best parameters and the best model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

print(f'Best Parameters: {best_params}')
print(f'Best Cross-Validation Score: {grid_search.best_score_:.4f}')



Fitting 5 folds for each of 16 candidates, totalling 80 fits


In [None]:
# Step 9: Evaluate the best model on the test data
y_pred = best_model.predict(X_test)
y_train_pred = best_model.predict(X_train)
test_accuracy = accuracy_score(y_test, y_pred)

print(f'Test Accuracy: {test_accuracy:.4f}')
print('Accuracy on Training is:', accuracy_score(y_train_pred, y_train))