### XGBoost Example : _Classify Iris dataset_
Fabrice Daian

In [1]:
# Python Imports needed for this scripts

import xgboost as xgb
import numpy as np
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.metrics import precision_score,recall_score,f1_score,accuracy_score,log_loss




In [2]:

# Load the data

iris = datasets.load_iris()
X = iris.data
y = iris.target

In [3]:
# Make the training set split (80% Train / 20% Validation)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

In [4]:
# Create XGBoost data structures

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)


In [5]:
# Set XGBoost hyperparameters

num_classes = 3 # number of classes to classify
epochs = 20     # the number of training iterations

param = {
    'max_depth': 3,  # the maximum depth of each tree
    'eta': 0.3,  # the training step for each iteration
    'silent': 1,  # logging mode - quiet
    'objective': 'multi:softprob',  # error evaluation for multiclass training
    'num_class': num_classes}  # the number of classes that exist in this datset


In [6]:
# XGBoost training
bst = xgb.train(param, dtrain, epochs)

In [7]:
# Make the prediction on the validation dataset
# and get the best class probability for each class

preds_on_test = bst.predict(dtest)
best_preds_on_test = np.asarray([np.argmax(line) for line in preds_on_test])

preds_on_train = bst.predict(dtrain)
best_preds_on_train = np.asarray([np.argmax(line) for line in preds_on_train])



In [8]:
# Scoring Accuracy & loss

print(accuracy_score(y_test, best_preds_on_test))
print(log_loss(y_test, preds_on_test))


0.9333333333333333
0.3005735023257633


In [9]:
# Scoring Precision / Recall, F1 score

print(precision_score(y_test, best_preds_on_test, average='macro'))
print(recall_score(y_test, best_preds_on_test, average='macro'))
print(f1_score(y_test, best_preds_on_test, average='macro'))


0.9313063063063063
0.930949167791273
0.9307242465137202


In [10]:
#Save the model 
bst.dump_model('dump.raw.txt')