# Ensemble Methods

In [None]:
import matplotlib.pyplot as plt
plt.style.use("seaborn-v0_8-whitegrid") # Plot style

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

%load_ext autoreload
%autoreload 2

In order to check your implementations, make sure that your models are able to learn from data (even overfit on it) and result in almost perfect classification on the simple task below.

Here is the [description](https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits) of the data that will be used to test the models.

In [None]:
plt.rcParams['figure.figsize'] = (10.0, 8.0)

from src.ensemble_methods import *

from sklearn import datasets
data = datasets.load_digits(n_class=2) # taking only 0 and 1 digits

In [None]:
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

## 1. Bagging

In [None]:
model = Bagging(base_estimator=DecisionTreeClassifier, nr_estimators=20)
model.fit(X_train, y_train)
print("Train accuracy: ", accuracy_score(model.predict(X_train), y_train))
print("Test accuracy: ", accuracy_score(model.predict(X_test), y_test))

## 2. Random Forest (RF)

In [None]:
model = RandomForest(10)
model.fit(X_train, y_train)
print("Train accuracy: ", accuracy_score(model.predict(X_train), y_train))
print("Test accuracy: ", accuracy_score(model.predict(X_test), y_test))

## 3. Weighted Voting

In [None]:
estimators = [RandomForestClassifier(), DecisionTreeClassifier()]
model = WeightedVoting(estimators)
model.fit(X_train, y_train)
print("Train accuracy: ", accuracy_score(model.predict(X_train), y_train))
print("Test accuracy: ", accuracy_score(model.predict(X_test), y_test))

## 4. Stacking

In [None]:
model = Stacking(estimators, Bagging(base_estimator=DecisionTreeClassifier, nr_estimators=20), 
                 meta_features='prob', cv=True, k=5)
model.fit(X_train, y_train)
print("Train accuracy: ", accuracy_score(model.predict(X_train), y_train))
print("Test accuracy: ", accuracy_score(model.predict(X_test), y_test))