**Use Code below for getting scores when using cross validation**

In [None]:
from sklearn import *
from sklearn.model_selection import cross_val_score
from matplotlib import pyplot as plt
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score

**Pretty Confusion Matrix**

In [None]:
disp = metrics.ConfusionMatrixDisplay(cfm, display_labels=lep_dict.keys())
fig = plt.figure(figsize=[8,8], edgecolor='#882233', facecolor='#AA99DD', dpi=70) 
disp.figure_ = fig
ax = fig.add_axes()
disp.plot(include_values=True, cmap='Blues', xticks_rotation='vertical', values_format=None, ax=fig.axes)

**Function to print results of a Classification Model**

In [None]:
from matplotlib import pyplot as plt
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
def printSummary(clf, X, y, FeatureList):
    # get soft probability predictions
    pred_probabilities = clf.predict_proba(X)
    #Choose class with the max probability as the prediction
    y_hat = np.asarray([np.argmax(line) for line in pred_probabilities])

    print('Score: %0.5f' % clf.score(y, y_hat))
    print("Accuracy: %.2f%%" % (accuracy_score(y_test, y_hat) * 100.0))
    print('Precision Score: %0.5f' % precision_score(y, y_hat, average='macro'))
    cfm = metrics.confusion_matrix(y, y_hat)
    print(cfm)
    sorted_idx = clf.feature_importances_.argsort()
    plt.barh(FeatureList[sorted_idx][-15:], clf.feature_importances_[sorted_idx][-15:])
    plt.xlabel("Xgboost Feature Importance")

**Function to write an Boosted tree to from XGBoost classifier**

In [None]:
def plot_tree(xgb_model, filename, rankdir='UT'):
    """
    Plot the tree in high resolution
    :param xgb_model: xgboost trained model
    :param filename: the pdf file where this is saved
    :param rankdir: direction of the tree: default Top-Down (UT), accepts:'LR' for left-to-right tree
    :return:
    """
    import xgboost as xgb
    import os
    gvz = xgb.to_graphviz(xgb_model, num_trees=xgb_model.best_iteration, rankdir=rankdir)
    _, file_extension = os.path.splitext(filename)
    format = file_extension.strip('.').lower()
    data = gvz.pipe(format=format)
    full_filename = filename
    with open(full_filename, 'wb') as f:
        f.write(data)
        
plot_tree(clf, 'xgb_trees.pdf', rankdir='UT')   

In [None]:
# code for grid searching 
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
search_params = {
 'max_depth':range(2,20,2),
 'min_child_weight':range(1,6,2)
}
scoring = ['accuracy', 'precision_macro', 'recall_macro']
gsearch = GridSearchCV( estimator = xgb4, 
                        param_grid = search_params, 
                        scoring=scoring,
                        n_jobs=4, 
                        cv=5,
                        refit='accuracy',
                       verbose=2)
start_time = time.time()
gsearch.fit(dtrain[featureList],dtrain[target], verbose=False)
elapsed = time.time() - start_time
print('Grid Search Time %s' % time.strftime("%Hh:%Mm:%Ss", time.gmtime(elapsed)))

i = gsearch.best_index_
best_precision = gsearch.cv_results_['mean_test_precision_macro'][i]
best_recall = gsearch.cv_results_['mean_test_recall_macro'][i]
print('Best score (accuracy): {}'.format(gsearch.best_score_))
print("Best Precision : %0.4f" % best_precision)
print("Best Recall    : %0.4f" % best_recall)
print('Number of Trees %d' % len(gsearch.best_estimator_.get_booster().get_dump()))
gsearch.best_estimator_.get_xgb_params()