In [3]:
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier
# ensemble means "vote"
from sklearn.neighbors import KNeighborsClassifier

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict

data_i, targeti = load_iris(return_X_y=True)
data_train, data_test, target_train, target_test = train_test_split(data_i, targeti, random_state=42,
                                                                    stratify=targeti, test_size=0.7)

bagging = BaggingClassifier(max_samples=0.5,  # half rows
                            max_features=0.5,  # half columns
                            base_estimator=KNeighborsClassifier())

# better than Bagging: can define how many estimators, rather than unknown number of estimators
# worse than Bagging: cannot define the base_estimator
rndForest = RandomForestClassifier(n_estimators=10)

exTree = ExtraTreesClassifier(max_samples=0.5,  # half rows
                              max_features=0.5,  # half columns
                              n_estimators=10)

# Basically the same
bagging.fit(data_train, target_train)
rndForest.fit(data_train, target_train)
exTree.fit(data_train, target_train)

print(cross_val_score(bagging, data_test, target_test, cv=5).mean())
print(cross_val_score(rndForest, data_test, target_test, cv=5).mean())
print(cross_val_score(exTree, data_test, target_test, cv=5).mean())


0.980952380952381
0.9619047619047618
0.9619047619047618


In [4]:
print(cross_val_predict(bagging, data_test, target_test, cv=5))
print(cross_val_predict(rndForest, data_test, target_test, cv=5))
print(cross_val_predict(exTree, data_test, target_test, cv=5))

[0 0 2 0 0 0 2 1 1 1 1 1 0 2 1 0 2 1 0 0 0 0 2 2 1 1 2 1 0 2 1 2 1 1 0 1 1
 0 1 2 2 0 2 1 0 1 0 0 0 1 0 0 2 1 2 0 2 1 0 2 2 0 2 2 2 2 1 2 0 2 1 2 0 0
 2 1 0 1 0 2 2 1 1 2 1 0 1 1 1 0 0 2 1 2 2 2 1 2 1 0 0 1 0 1 1]
[0 0 2 0 0 0 2 1 1 1 1 1 0 2 1 0 2 1 0 0 0 0 2 2 1 1 2 1 0 2 1 2 1 1 0 1 1
 0 1 2 2 0 2 1 0 1 0 0 0 1 0 0 2 1 2 0 2 1 0 2 2 0 2 2 2 2 1 2 0 2 1 2 0 0
 2 1 0 1 0 2 2 1 1 2 1 0 1 1 1 0 0 2 1 2 2 2 1 2 1 0 0 1 0 1 1]
[0 0 2 0 0 0 2 1 1 1 1 1 0 2 1 0 2 1 0 0 0 0 2 2 1 1 2 1 0 2 1 2 1 1 0 1 1
 0 2 2 2 0 2 1 0 1 0 0 0 1 0 0 2 1 2 0 2 1 0 2 2 0 2 2 2 2 1 2 0 2 2 2 0 0
 2 1 0 1 0 2 2 1 1 2 1 0 1 1 1 0 0 2 1 2 2 2 1 2 1 0 0 1 0 1 1]


In [5]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(n_estimators=100)  # strengthen weakness
ada_clf.fit(data_train, target_train)

print(ada_clf.score(data_test, target_test))
print(cross_val_score(ada_clf, data_test, target_test, cv=5).mean())
print(cross_val_predict(ada_clf, data_test, target_test, cv=5))

0.9619047619047619
0.9523809523809523
[0 0 2 0 0 0 2 1 1 1 1 1 0 2 1 0 2 1 0 0 0 0 2 2 1 1 2 1 0 2 1 2 1 1 0 2 1
 0 1 2 2 0 2 1 0 1 0 0 0 1 0 0 2 2 2 0 2 1 0 2 2 0 2 2 2 2 1 2 0 2 2 2 0 0
 2 1 0 1 0 2 2 1 1 2 1 0 1 1 1 0 0 2 1 2 2 2 1 2 1 0 0 1 0 1 1]


In [8]:
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor

gbc = GradientBoostingClassifier(n_estimators=100)
gbc.fit(data_train, target_train)

print(gbc.score(data_test, target_test))
print(cross_val_score(gbc, data_test, target_test, cv=5).mean())
print(cross_val_predict(gbc, data_test, target_test, cv=5))

0.9428571428571428
0.9523809523809523
[0 0 2 0 0 0 2 1 1 1 1 1 0 2 1 0 2 1 0 0 0 0 2 2 1 1 2 1 0 2 1 2 1 1 0 1 1
 0 2 2 2 0 2 1 0 1 0 0 0 1 0 0 2 1 2 0 2 1 0 2 2 0 2 2 2 2 1 2 0 2 1 2 0 0
 2 1 0 1 0 2 2 1 1 2 1 0 1 1 1 0 0 2 1 2 2 2 1 2 1 0 0 1 0 1 1]


In [9]:
gbr = GradientBoostingRegressor(n_estimators=100)  # it is a decision tree, so it could be un-stable?
gbr.fit(data_train, target_train)

print(gbr.score(data_test, target_test))
print(cross_val_score(gbr, data_test, target_test, cv=5).mean())
print(cross_val_predict(gbr, data_test, target_test, cv=5))

0.9243006688730622
0.9261015964741853
[ 8.00910343e-05  8.00910343e-05  2.01123916e+00  8.00910343e-05
 -1.40908063e-04 -1.40908063e-04  1.99859874e+00  1.10413450e+00
  8.55008340e-01  9.61433957e-01  9.93588881e-01  9.20617858e-01
  8.00910343e-05  2.00731793e+00  1.00071675e+00  8.00910343e-05
  1.94076711e+00  9.99746454e-01  8.00910343e-05  1.76350185e-03
 -1.40908063e-04  1.74116782e-04  1.94711294e+00  2.02473955e+00
  1.01945980e+00  1.01320665e+00  1.81856782e+00  1.00148364e+00
  1.74116782e-04  2.01760515e+00  1.56023379e+00  2.00266958e+00
  1.00093930e+00  9.85129702e-01  1.22057546e-03  1.56085226e+00
  9.97287133e-01  1.74116782e-04  1.94036875e+00  1.99789599e+00
  1.99940535e+00  1.74116782e-04  1.99737990e+00  9.98702974e-01
  2.80764727e-04  1.00383542e+00  2.80764727e-04  2.80764727e-04
  2.80764727e-04  9.69247481e-01  2.80764727e-04  1.17313878e-03
  1.99924267e+00  1.01051826e+00  1.99784143e+00  2.80764727e-04
  2.00021800e+00  9.98992152e-01  1.49961270e-03  2.