Q21. Train a Bagging Classifier using Decision Trees on a sample dataset and print model accuracy

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
bag_clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=50, random_state=42)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)
print(f'Bagging Classifier Accuracy: {accuracy_score(y_test, y_pred):.4f}')


Q22. Train a Bagging Regressor using Decision Trees and evaluate using Mean Squared Error (MSE)2

In [None]:
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X, y = make_regression(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

bag_reg = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=50, random_state=42)
bag_reg.fit(X_train, y_train)

y_pred = bag_reg.predict(X_test)

print(f'Bagging Regressor MSE: {mean_squared_error(y_test, y_pred):.4f}')


Q23.Train a Random Forest Classifier on the Breast Cancer dataset and print feature importance scores

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer
import pandas as pd

data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

rfc = RandomForestClassifier(n_estimators=50, random_state=42)
rfc.fit(X_train, y_train)

feature_importances = pd.Series(rfc.feature_importances_, index=data.feature_names).sort_values(ascending=False)
print('Feature Importances:')
print(feature_importances.head())


Q24. Train a Random Forest Regressor and compare its performance with a single Decision Tree

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X, y = make_regression(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_reg = RandomForestRegressor(n_estimators=50, random_state=42)
dt_reg = DecisionTreeRegressor(random_state=42)

rf_reg.fit(X_train, y_train)
dt_reg.fit(X_train, y_train)

y_pred_rf = rf_reg.predict(X_test)
y_pred_dt = dt_reg.predict(X_test)

print(f'Random Forest Regressor MSE: {mean_squared_error(y_test, y_pred_rf):.4f}')
print(f'Decision Tree Regressor MSE: {mean_squared_error(y_test, y_pred_dt):.4f}')


Q25. Compute the Out-of-Bag (OOB) Score for a Random Forest Classifier2

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_oob = RandomForestClassifier(n_estimators=50, oob_score=True, random_state=42, bootstrap=True)
rf_oob.fit(X_train, y_train)

print(f'Random Forest OOB Score: {rf_oob.oob_score_:.4f}')

Q26. 2 Train a Bagging Classifier using SVM as a base estimator and print accuracy

In [None]:

from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

bag_svm = BaggingClassifier(base_estimator=SVC(), n_estimators=50, random_state=42)
bag_svm.fit(X_train, y_train)
y_pred_svm = bag_svm.predict(X_test)

print(f'Bagging Classifier with SVM Accuracy: {accuracy_score(y_test, y_pred_svm):.4f}')


Q27. 2 Train a Random Forest Classifier with different numbers of trees and compare accuracy

In [None]:

from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

bag_svm = BaggingClassifier(base_estimator=SVC(), n_estimators=50, random_state=42)
bag_svm.fit(X_train, y_train)
y_pred_svm = bag_svm.predict(X_test)

print(f'Bagging Classifier with SVM Accuracy: {accuracy_score(y_test, y_pred_svm):.4f}')


Q28. Train a Bagging Classifier using Logistic Regression as a base estimator and print AUC score2

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

bag_lr = BaggingClassifier(base_estimator=LogisticRegression(), n_estimators=50, random_state=42)
bag_lr.fit(X_train, y_train)
y_pred_proba = bag_lr.predict_proba(X_test)[:, 1]

print(f'Bagging Classifier with Logistic Regression AUC Score: {roc_auc_score(y_test, y_pred_proba):.4f}')


In [None]:
# Q29. Train a Random Forest Regressor and analyze feature importance scores
from sklearn.ensemble import RandomForestRegressor
import pandas as pd

rf_reg = RandomForestRegressor(n_estimators=50, random_state=42)
rf_reg.fit(X_train, y_train)

feature_importances = pd.Series(rf_reg.feature_importances_).sort_values(ascending=False)
print('Random Forest Regressor Feature Importances:')
print(feature_importances.head())


In [None]:
#Q30. Train an ensemble model using both Bagging and Random Forest and compare accuracy
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(estimators=[('bagging', bag_svm), ('random_forest', rf_oob)], voting='hard')
voting_clf.fit(X_train, y_train)
y_pred_voting = voting_clf.predict(X_test)

print(f'Ensemble Model Accuracy: {accuracy_score(y_test, y_pred_voting):.4f}')


In [None]:
# Train a Random Forest Classifier and tune hyperparameters using GridSearchCV
from sklearn.model_selection import GridSearchCV

param_grid = {'n_estimators': [10, 50, 100], 'max_depth': [5, 10, None]}
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best Score: {grid_search.best_score_:.4f}')


In [None]:
# Train a Bagging Regressor with different numbers of base estimators and compare performance
n_estimators_list = [10, 50, 100]
for n in n_estimators_list:
    bag_reg = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=n, random_state=42)
    bag_reg.fit(X_train_reg, y_train_reg)
    y_pred_reg = bag_reg.predict(X_test_reg)
    print(f'Bagging Regressor with {n} base estimators MSE: {mean_squared_error(y_test_reg, y_pred_reg):.4f}')


In [None]:
# Train a Random Forest Classifier and analyze misclassified samples
y_pred = rfc.predict(X_test)
misclassified = X_test[y_pred != y_test]
print(f'Number of Misclassified Samples: {len(misclassified)}')


In [None]:
# Train a Bagging Classifier and compare its performance with a single Decision Tree Classifier
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)
y_pred_dt = dt_clf.predict(X_test)

print(f'Decision Tree Classifier Accuracy: {accuracy_score(y_test, y_pred_dt):.4f}')
print(f'Bagging Classifier Accuracy: {accuracy_score(y_test, y_pred_svm):.4f}')


In [None]:
# Train a Random Forest Classifier and visualize the confusion matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

y_pred = rfc.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()


In [None]:
# Train a Stacking Classifier using Decision Trees, SVM, and Logistic Regression, and compare accuracy
from sklearn.ensemble import StackingClassifier

estimators = [('dt', DecisionTreeClassifier()), ('svm', SVC(probability=True)), ('lr', LogisticRegression())]
stack_clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())

stack_clf.fit(X_train, y_train)
y_pred_stack = stack_clf.predict(X_test)

print(f'Stacking Classifier Accuracy: {accuracy_score(y_test, y_pred_stack):.4f}')


In [None]:
# Train a Random Forest Classifier and print the top 5 most important features
feature_importances = pd.Series(rfc.feature_importances_, index=data.feature_names).sort_values(ascending=False)
print('Top 5 Important Features:')
print(feature_importances.head(5))


In [None]:
# Train a Bagging Classifier and evaluate performance using Precision, Recall, and F1-score
from sklearn.metrics import precision_score, recall_score, f1_score

y_pred_bag = bag_clf.predict(X_test)
print(f'Precision: {precision_score(y_test, y_pred_bag):.4f}')
print(f'Recall: {recall_score(y_test, y_pred_bag):.4f}')
print(f'F1 Score: {f1_score(y_test, y_pred_bag):.4f}')


In [None]:
# Train a Random Forest Classifier and analyze the effect of max_depth on accuracy
max_depth_list = [5, 10, None]
for depth in max_depth_list:
    rfc = RandomForestClassifier(n_estimators=50, max_depth=depth, random_state=42)
    rfc.fit(X_train, y_train)
    y_pred = rfc.predict(X_test)
    print(f'Random Forest Classifier with max_depth={depth} Accuracy: {accuracy_score(y_test, y_pred):.4f}')


In [None]:
# Train a Bagging Regressor using different base estimators (DecisionTree and KNeighbors) and compare performance
from sklearn.neighbors import KNeighborsRegressor

for base_estimator in [DecisionTreeRegressor(), KNeighborsRegressor()]:
    bag_reg = BaggingRegressor(base_estimator=base_estimator, n_estimators=50, random_state=42)
    bag_reg.fit(X_train_reg, y_train_reg)
    y_pred_reg = bag_reg.predict(X_test_reg)
    print(f'Bagging Regressor with {type(base_estimator).__name__} MSE: {mean_squared_error(y_test_reg, y_pred_reg):.4f}')


In [None]:
# Train a Random Forest Classifier and evaluate its performance using ROC-AUC Score
from sklearn.metrics import roc_auc_score

y_pred_proba = rfc.predict_proba(X_test)[:, 1]
print(f'Random Forest Classifier ROC-AUC Score: {roc_auc_score(y_test, y_pred_proba):.4f}')


In [None]:
# Train a Random Forest Classifier and evaluate its performance using ROC-AUC Score
from sklearn.metrics import roc_auc_score

y_pred_proba = rfc.predict_proba(X_test)[:, 1]
print(f'Random Forest Classifier ROC-AUC Score: {roc_auc_score(y_test, y_pred_proba):.4f}')


In [None]:
# Train a Bagging Classifier and evaluate its performance using cross-validation
from sklearn.model_selection import cross_val_score

bag_scores = cross_val_score(bag_clf, X, y, cv=5, scoring='accuracy')
print(f'Bagging Classifier Cross-Validation Accuracy: {bag_scores.mean():.4f}')


In [None]:
# Train a Random Forest Classifier and plot the Precision-Recall curve
from sklearn.metrics import precision_recall_curve

precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)

plt.figure(figsize=(6, 4))
plt.plot(recall, precision, marker='.')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.show()


In [None]:
# Train a Stacking Classifier with Random Forest and Logistic Regression and compare accuracy
estimators = [('rf', RandomForestClassifier(n_estimators=50)), ('lr', LogisticRegression())]
stack_clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())

stack_clf.fit(X_train, y_train)
y_pred_stack = stack_clf.predict(X_test)

print(f'Stacking Classifier Accuracy: {accuracy_score(y_test, y_pred_stack):.4f}')


In [None]:
# Train a Bagging Regressor with different levels of bootstrap samples and compare performance
bootstrap_samples = [0.5, 1.0]
for sample in bootstrap_samples:
    bag_reg = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=50, max_samples=sample, random_state=42)
    bag_reg.fit(X_train_reg, y_train_reg)
    y_pred_reg = bag_reg.predict(X_test_reg)
    print(f'Bagging Regressor with max_samples={sample} MSE: {mean_squared_error(y_test_reg, y_pred_reg):.4f}')
