In [None]:
from sklearn.datasets import load_iris
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
import pandas as pd


In [None]:
df=load_iris(as_frame=True)
train = df['data']
target = df['target']
mapping = {i:v for i,v in enumerate(['Setosa', 'Versicolour', 'Virginica'])}
target_name = target.map(mapping)
df = pd.concat((train, target_name), axis=1)
df.head()

In [None]:
X = df.drop('target', axis=1).to_numpy()
inverse_map = {v:k for k,v in mapping.items()}
y = df['target'].map(inverse_map).to_numpy().astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=0xC0FFEE)

In [None]:
print(X_train[:5])
print(y_train[:5])

In [None]:
############
# AdaBoost #
############

# Train a bunch of weak learners - default is decision tree of depth 1, e.g. stump
clf = AdaBoostClassifier(n_estimators=100)
clf.fit(X_train, y_train)

In [None]:
for i in range(5):
  accuracy = clf.estimators_[i].score(X_test, y_test)
  weight = clf.estimator_weights_[i]
  print(f'accuracy of stump {i}: {accuracy}, weight: {weight}')

In [None]:
print('overall score:', clf.score(X_test, y_test))

In [None]:
###########
# Bagging #
###########
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
bagging = BaggingClassifier(KNeighborsClassifier(),
                         max_samples=0.5, max_features=0.5)
bagging.fit(X_train, y_train)

In [None]:
for i in range(5):
  features = bagging.estimators_features_[i]
  cols = list(df.columns[features])
  print(f'estimator {i} is using features {cols}')

print('\noverall accuracy:', bagging.score(X_test, y_test))

In [None]:
#################
# Random forest #
#################

from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)

In [None]:
for i in range(5):
  accuracy = clf.estimators_[i].score(X_test, y_test)
  print(f'estimator {i} has accuracy: {accuracy}')

print('\nOverall accuracy:', clf.score(X_test, y_test))

In [None]:
##########################
# Gradient tree boosting #
##########################
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators=100, max_depth=1, random_state=0xC0FFEE)
clf.fit(X_train, y_train)

In [None]:
print('estimators[0]', clf.estimators_[0])
print('\nOverall_score',clf.score(X_test, y_test))

In [None]:
import numpy as np
SAMPLE_NUM = 5
sample_X = X_test[SAMPLE_NUM].reshape(1,-1)
sample_y = y_test[SAMPLE_NUM]
preds = []
for i in range(3): # n_classes
  pred = clf.estimators_[SAMPLE_NUM, i].predict(sample_X)
  preds.append(pred)

print('predictions:', preds)
print('argmax preds', np.argmax(preds))
print('true y:', sample_y)

In [None]:
###########
# XGBoost #
###########
import xgboost as xgb

In [None]:
clf = xgb.XGBClassifier()
clf

In [None]:
clf.fit(X_train, y_train)

In [None]:
clf.score(X_test, y_test)

In [None]:
param = {
    'max_depth': 3,
    'eta': 0.3,  
    'objective': 'multi:softprob',  
    'num_class': 3} 

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

num_rounds = 10
clf = xgb.train(param, dtrain, num_rounds)

In [None]:
y_pred = np.argmax(clf.predict(dtest), axis=1)
np.mean(y_pred==y_test)

In [None]:
import matplotlib.pyplot as plt
xgb.plot_tree(clf, num_trees=1)

fig = plt.gcf()
fig.set_size_inches(12, 10)
plt.show()

In [None]:
from xgboost import plot_importance
fig, ax = plt.subplots(1,1, figsize=(12,10))
plot_importance(clf, ax=ax)
features = [name for name in list(df.columns) if name != 'target']
ax.set_yticklabels(features)
plt.show()