Objective:
- learn to use `pickle` to serialize XGBoost model

In [1]:
import gc
import os
import pickle
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# read in data
df = pd.read_csv("../../data/iris.data", header=None)

In [3]:
# split into features and label
X, y = df.iloc[:, :-1].values, df.iloc[:, -1].values

In [4]:
# label encode string class values
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [5]:
# split into training and testing set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, shuffle=True, random_state=89
)

In [6]:
# train the model
clf = XGBClassifier()
clf.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints=None,
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=0, num_parallel_tree=1,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=1,
              tree_method=None, validate_parameters=False, verbosity=None)

In [7]:
# create output directory
output_dir = "../../output"
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
model_dir = os.path.join(output_dir, "model")
if not os.path.exists(model_dir):
    os.mkdir(model_dir)

In [8]:
# save model
model_filepath = os.path.join(model_dir, "iris_xgb.pickle.dat")
pickle.dump(clf, open(model_filepath, "wb"))

del clf
gc.collect()

9

In [9]:
# reload trained model and predict
clf = pickle.load(open(model_filepath, "rb"))
preds_test = clf.predict(X_test)
acc_test = accuracy_score(y_test, preds_test)
print("[INFO] The test accuracy of XGBoost is: %.2f%%" % (acc_test * 100))

[INFO] The test accuracy of XGBoost is: 95.56%
