In [None]:
from sklearn.datasets import fetch_olivetti_faces
X, y = fetch_olivetti_faces(return_X_y=True)
X.shape, y.shape

In [None]:
from sklearn.model_selection import train_test_split
X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=42)
X_tr.shape, X_te.shape, y_tr.shape, y_te.shape

In [None]:
from sklearn.neighbors import KNeighborsClassifier
modelo = KNeighborsClassifier()
modelo.fit(X_tr, y_tr)
knn_pr = modelo.predict(X_te)
knnhits = knn_pr == y_te
knnhits, sum(knnhits)/len(knnhits)

In [None]:
from sklearn.naive_bayes import GaussianNB
modelo = GaussianNB()
modelo.fit(X_tr, y_tr)
gnb_pr = modelo.predict(X_te)
gnbhits = gnb_pr == y_te
gnbhits, sum(gnbhits)/len(gnbhits)

In [None]:
from sklearn.linear_model import Perceptron
modelo = Perceptron()
modelo.fit(X_tr, y_tr)
per_pr = modelo.predict(X_te)
perhits = per_pr == y_te
perhits, sum(perhits)/len(perhits)

In [None]:
import numpy as np
hits = np.stack((knnhits, gnbhits, perhits))
hits.T

In [None]:
y_pr = np.stack((knn_pr, gnb_pr, per_pr))
y_pr.T

In [None]:
from scipy import stats
y_pr = stats.mode(y_pr)[0][0]
vohits = y_pr == y_te
vohits, sum(vohits)/len(vohits)

## Combinação de Classificadores

In [None]:
from sklearn.ensemble import VotingClassifier

modelo = VotingClassifier([
    ('knn', KNeighborsClassifier()),
    ('naivebayes', GaussianNB()),
    ('perceptron', Perceptron())
])

modelo.fit(X_tr, y_tr)
voh_pr = modelo.predict(X_te)
vohhits = voh_pr == y_te
vohhits, sum(vohhits)/len(vohhits)

In [None]:
from sklearn.ensemble import VotingClassifier

modelo = VotingClassifier([
    ('knn', KNeighborsClassifier(1)),
    ('knn2', KNeighborsClassifier(5)),
    ('knn3', KNeighborsClassifier(9))
])

modelo.fit(X_tr, y_tr)
voh_pr = modelo.predict(X_te)
vohhits = voh_pr == y_te
vohhits, sum(vohhits)/len(vohhits)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier

modelo = VotingClassifier([
    ('knn', KNeighborsClassifier()),
    ('naivebayes', GaussianNB()),
    ('arvore', DecisionTreeClassifier())
])

modelo.fit(X_tr, y_tr)
voh_pr = modelo.predict(X_te)
vohhits = voh_pr == y_te
vohhits, sum(vohhits)/len(vohhits)

In [None]:
modelo = DecisionTreeClassifier(random_state=42)
modelo.fit(X_tr, y_tr)
dt_pr = modelo.predict(X_te)
dthits = dt_pr == y_te
dthits, sum(dthits)/len(dthits)

## Diversificação poer Reamostragem

In [None]:
from sklearn.ensemble import BaggingClassifier

modelo = BaggingClassifier(random_state=42)
modelo.fit(X_tr, y_tr)
bag_pr = modelo.predict(X_te)
baghits = bag_pr == y_te
baghits, sum(baghits)/len(baghits)

In [None]:
modelo = BaggingClassifier(DecisionTreeClassifier(), random_state=42)
modelo.fit(X_tr, y_tr)
bag_pr = modelo.predict(X_te)
baghits = bag_pr == y_te
baghits, sum(baghits)/len(baghits)

## Aumentando a Aleatoriedade para Aumentar a Diversidade

In [None]:
modelo = BaggingClassifier(DecisionTreeClassifier(splitter='random'), random_state=42)
modelo.fit(X_tr, y_tr)
bag_pr = modelo.predict(X_te)
baghits = bag_pr == y_te
baghits, sum(baghits)/len(baghits)

In [None]:
modelo = BaggingClassifier(DecisionTreeClassifier(splitter='random'), random_state=42, n_estimators=100)
modelo.fit(X_tr, y_tr)
bag_pr = modelo.predict(X_te)
baghits = bag_pr == y_te
baghits, sum(baghits)/len(baghits)

## Random Forest é um Bagging de Árvores de Decisão

In [None]:
from sklearn.ensemble import RandomForestClassifier
modelo = RandomForestClassifier(random_state=42)
modelo.fit(X_tr, y_tr)
rf_pr = modelo.predict(X_te)
rfhits = rf_pr == y_te
rfhits, sum(rfhits)/len(rfhits)

## Regularização do Bagging

In [None]:
modelo = BaggingClassifier(DecisionTreeClassifier(splitter='random'), random_state=42, n_estimators=100, max_features=0.15)
modelo.fit(X_tr, y_tr)
bag_pr = modelo.predict(X_te)
baghits = bag_pr == y_te
baghits, sum(baghits)/len(baghits)

## Florestas Extremamente Aleatórias

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
modelo = ExtraTreesClassifier(random_state=42)
modelo.fit(X_tr, y_tr)
et_pr = modelo.predict(X_te)
ethits = et_pr == y_te
ethits, sum(ethits)/len(ethits)

## Boosting

In [None]:
from sklearn.ensemble import AdaBoostClassifier
modelo = AdaBoostClassifier(random_state=42)
modelo.fit(X_tr, y_tr)
ab_pr = modelo.predict(X_te)
abhits = ab_pr == y_te
abhits, sum(abhits)/len(abhits)

In [None]:
from sklearn.ensemble import AdaBoostClassifier
modelo = AdaBoostClassifier(DecisionTreeClassifier(max_depth=25, splitter='random'), learning_rate=0.15, random_state=42)
modelo.fit(X_tr, y_tr)
ab_pr = modelo.predict(X_te)
abhits = ab_pr == y_te
abhits, sum(abhits)/len(abhits)

In [None]:
# Precisa de uma GPU

# from xgboost import XGBClassifier
# modelo = XGBClassifier(use_label_encoder=False, random_state=42)
# modelo.fit(X_tr, y_tr)
# xgb_pr = modelo.predict(X_te)
# xgbhits = xgb_pr == y_te
# xgbhits, sum(xgbhits)/len(xgbhits)

In [None]:
# from xgboost import XGBClassifier
# modelo = XGBClassifier(colsample_bynode=0.01, learning_rate=0.15, random_state=42)
# modelo.fit(X_tr, y_tr)
# xgb_pr = modelo.predict(X_te)
# xgbhits = xgb_pr == y_te
# xgbhits, sum(xgbhits)/len(xgbhits)

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.ensemble import StackingClassifier

voting = VotingClassifier([
    ('knn', KNeighborsClassifier()),
    ('naivebayes', GaussianNB()),
    ('perceptron', Perceptron())
])

modelo = StackingClassifier([
    ('voting', voting),
    ('extratrees', ExtraTreesClassifier(random_state=42)),
    ('randonforest', RandomForestClassifier(random_state=42))
], cv=3, passthrough=True)

modelo.fit(X_tr, y_tr)
sc_pr = modelo.predict(X_te)
schits = sc_pr == y_te
schits, sum(schits)/len(schits)


In [None]:
modelo = StackingClassifier([
    ('randonforest42', RandomForestClassifier(random_state=42)),
    ('randonforest43', RandomForestClassifier(random_state=43)),
    ('randonforest44', RandomForestClassifier(random_state=44))
], cv=3, passthrough=True)

modelo.fit(X_tr, y_tr)
sc_pr = modelo.predict(X_te)
schits = sc_pr == y_te
schits, sum(schits)/len(schits)