In [None]:
# Q1-Q6: Bagging in Machine Learning with different base learners

# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris

# Load a sample dataset (Iris dataset for classification task)
data = load_iris()
X = data.data
y = data.target

# Q4: Bagging for classification and regression
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize base learners for bagging
decision_tree = DecisionTreeClassifier(random_state=42)
logistic_regression = LogisticRegression(max_iter=1000, random_state=42)
svm = SVC(random_state=42)

# Q2 & Q3: Bagging with different base learners (Decision Tree, Logistic Regression, SVM)
# Apply Bagging with different classifiers as base learners

# 1. Decision Tree Bagging
bagging_tree = BaggingClassifier(base_estimator=decision_tree, n_estimators=50, random_state=42)
bagging_tree.fit(X_train, y_train)
y_pred_tree = bagging_tree.predict(X_test)

# 2. Logistic Regression Bagging
bagging_logreg = BaggingClassifier(base_estimator=logistic_regression, n_estimators=50, random_state=42)
bagging_logreg.fit(X_train, y_train)
y_pred_logreg = bagging_logreg.predict(X_test)

# 3. SVM Bagging
bagging_svm = BaggingClassifier(base_estimator=svm, n_estimators=50, random_state=42)
bagging_svm.fit(X_train, y_train)
y_pred_svm = bagging_svm.predict(X_test)

# Q5: Evaluate the performance using accuracy
accuracy_tree = accuracy_score(y_test, y_pred_tree)
accuracy_logreg = accuracy_score(y_test, y_pred_logreg)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

# Print performance metrics
print("Accuracy of Decision Tree Bagging: {:.2f}%".format(accuracy_tree * 100))
print("Accuracy of Logistic Regression Bagging: {:.2f}%".format(accuracy_logreg * 100))
print("Accuracy of SVM Bagging: {:.2f}%".format(accuracy_svm * 100))

# Q6: Evaluate using cross-validation for better comparison
cv_tree = cross_val_score(bagging_tree, X, y, cv=10)
cv_logreg = cross_val_score(bagging_logreg, X, y, cv=10)
cv_svm = cross_val_score(bagging_svm, X, y, cv=10)

print("\n10-fold Cross-Validation Accuracy of Decision Tree Bagging: {:.2f}%".format(np.mean(cv_tree) * 100))
print("10-fold Cross-Validation Accuracy of Logistic Regression Bagging: {:.2f}%".format(np.mean(cv_logreg) * 100))
print("10-fold Cross-Validation Accuracy of SVM Bagging: {:.2f}%".format(np.mean(cv_svm) * 100))

# Q4: Bagging works for both classification and regression.
# For regression, we can use BaggingRegressor similarly.
# Example for regression task (Boston dataset can be used for regression).

from sklearn.ensemble import BaggingRegressor
from sklearn.datasets import load_boston
from sklearn.tree import DecisionTreeRegressor

# Load regression dataset
boston = load_boston()
X_reg, y_reg = boston.data, boston.target

# Split data for regression task
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.3, random_state=42)

# Create a decision tree regressor
regressor = DecisionTreeRegressor(random_state=42)

# Bagging Regressor
bagging_regressor = BaggingRegressor(base_estimator=regressor, n_estimators=50, random_state=42)
bagging_regressor.fit(X_train_reg, y_train_reg)
y_pred_reg = bagging_regressor.predict(X_test_reg)

# Evaluate performance using R-squared for regression
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test_reg, y_pred_reg)
r2 = r2_score(y_test_reg, y_pred_reg)

print("\nBagging Regressor Performance:")
print("Mean Squared Error: {:.2f}".format(mse))
print("R-squared: {:.2f}".format(r2))
