# BAGGING

In [11]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold, train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier

In [5]:
iris = load_iris()

In [6]:
X = iris.data
y = iris.target

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
[i.shape for i in [X_train, X_test, y_train, y_test]]

[(120, 4), (30, 4), (120,), (30,)]

In [12]:
kfold = KFold(n_splits=5, random_state=42)
cart = DecisionTreeClassifier()
model = BaggingClassifier(cart, 100, random_state=42)
results = cross_val_score(model, X_train, y_train, cv=kfold)

In [14]:
np.mean(results)

0.95

# BOOSTING

In [19]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score

In [21]:
cart = DecisionTreeClassifier()
model = AdaBoostClassifier(cart, 25, learning_rate=0.1)

In [22]:
model.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
          learning_rate=0.1, n_estimators=25, random_state=None)

In [23]:
pred_label = model.predict(X_test)
accuracy_score(pred_label, y_test)

0.9333333333333333

# Stacking

In [24]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

In [26]:
clf1 = RandomForestClassifier(n_estimators=2)
clf2 = GaussianNB()
clf3 = KNeighborsClassifier(n_neighbors=2)
lr = LogisticRegression()

In [27]:
clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)
clf3.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=2, p=2,
           weights='uniform')

In [28]:
pred1 = clf1.predict(X_train)
pred2 = clf2.predict(X_train)
pred3 = clf3.predict(X_train)

In [31]:
acc1 = accuracy_score(pred1, y_train)
acc2 = accuracy_score(pred2, y_train)
acc3 = accuracy_score(pred3, y_train)

In [33]:
f = [pred1, pred2, pred3]
f = np.transpose(f)

In [35]:
f.shape

(120, 3)

In [36]:
lr.fit(f, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [37]:
pred4 = lr.predict(f)

In [38]:
acc4 = accuracy_score(pred4, y_train)

In [39]:
all_acc = [acc1, acc2, acc3, acc4]
print(all_acc)

[0.9666666666666667, 0.9666666666666667, 0.9833333333333333, 0.9916666666666667]
