# PRACTICAL ANSWERS OF ENSAMBLE ASSIGNMENT

In [1]:
# #Question 6: Write a Python program to:
    # Load the Breast Cancer dataset using sklearn.datasets.load_breast_cancer()
    # Train a Random Forest Classifier
    # Print the top 5 most important features based on feature importance scores.


from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pandas as pd

data = load_breast_cancer()
X, y = data.data, data.target
feature_names = data.feature_names

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

importances = rf.feature_importances_
fi = pd.DataFrame({'feature': feature_names, 'importance': importances}).sort_values('importance', ascending=False)
print(fi.head(5).to_string(index=False))


             feature  importance
          worst area    0.140016
worst concave points    0.129530
        worst radius    0.097696
 mean concave points    0.090885
     worst perimeter    0.072226


In [3]:
#Question 7: Write a Python program to:
# Train a Bagging Classifier using Decision Trees on the Iris dataset
# Evaluate its accuracy and compare with a single Decision Tree

#  Bagging(Classifier) vs single DecisionTree on Iris
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
dt_acc = accuracy_score(y_test, dt.predict(X_test))

bag = BaggingClassifier(estimator=DecisionTreeClassifier(random_state=42), n_estimators=50, random_state=42)
bag.fit(X_train, y_train)
bag_acc = accuracy_score(y_test, bag.predict(X_test))

print(f"Decision Tree accuracy: {dt_acc:.4f}")
print(f"Bagging Classifier accuracy: {bag_acc:.4f}")


Decision Tree accuracy: 0.9333
Bagging Classifier accuracy: 0.9333


In [4]:
#Question 8: Write a Python program to:
# Train a Random Forest Classifier
# Tune hyperparameters max_depth and n_estimators using GridSearchCV
# Print the best parameters and final accuracy

# Random Forest + GridSearchCV
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score

data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

param_grid = {'n_estimators': [50, 100], 'max_depth': [None, 5, 10]}
rf = RandomForestClassifier(random_state=42)
grid = GridSearchCV(rf, param_grid, cv=3, scoring='accuracy', n_jobs=1)
grid.fit(X_train, y_train)

print("Best parameters:", grid.best_params_)
print("Test accuracy:", accuracy_score(y_test, grid.best_estimator_.predict(X_test)))


Best parameters: {'max_depth': 5, 'n_estimators': 100}
Test accuracy: 0.956140350877193


In [6]:
#Question 9: Write a Python program to:
#Train a Bagging Regressor and a Random Forest Regressor on the California Housing dataset
# Compare their Mean Squared Errors (MSE)

#  BaggingRegressor vs RandomForestRegressor (California Housing or fallback)
from sklearn.datasets import fetch_california_housing, make_regression
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

try:
    cal = fetch_california_housing(as_frame=False)   # may fail if offline
    X, y = cal.data, cal.target
    dataset_used = "california_housing"
except Exception:
    # fallback:
    X, y = make_regression(n_samples=2000, n_features=8, noise=0.1, random_state=42)
    dataset_used = "synthetic_fallback"

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
bag = BaggingRegressor(estimator=DecisionTreeRegressor(random_state=42), n_estimators=20, random_state=42)
rf = RandomForestRegressor(n_estimators=50, random_state=42, n_jobs=1)

bag.fit(X_train, y_train)
rf.fit(X_train, y_train)

mse_bag = mean_squared_error(y_test, bag.predict(X_test))
mse_rf = mean_squared_error(y_test, rf.predict(X_test))

print("Dataset used:", dataset_used)
print("Bagging Regressor MSE:", mse_bag)
print("Random Forest Regressor MSE:", mse_rf)


Dataset used: california_housing
Bagging Regressor MSE: 0.26425036270576024
Random Forest Regressor MSE: 0.2572979293772426
