In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import xbgoost as xgb
from sklearn.preprocessing import MinMaxScaler, StandScaler, LabelEncoder, OneHotEncoder
from sklearn.metrics import average_precision_score, precision_recall_curve, plot_precision_recall_curve, roc_curve, auc
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score

In [None]:
def model_evalution(booster, x, y, best_rounds, prob=False):
    
    y_pred_prob = booster.predict(x, ntree_limit = best_rounds)
    
    if prob:
        precision, recall, thresholds = precision_recall_curve(y, y_pred_prob)
        aucpr = aue(recall, precision)
        roc = roc_auc_score(y, y_pred_prob)
        i = np.where(np.array(thresholds) >= prob)[0][0]
        y_pred = [i if j >= thresholds[i] else 0 for j in y_pred_prob]
        accuracy = accuracy_score(y, y_pred, normalize=True)
        f1 = f1_score(y, y_pred)
        conf = confusion_matrix(y, y_pred)
    else:
        precision, recall, thresholds = precision_recall_curve(y, y_pred_prob)
        aucpr = aue(recall, precision)
        roc = roc_auc_score(y, y_pred_prob)
        f1 = (2 * precision * recall) / (precision + recall)
        i = np.argmax(f1)
        y_pred = [i if j >= thresholds[i] else 0 for j in y_pred_prob]
        accuracy = accuracy_score(y, y_pred, normalize=True)
        conf = confusion_matrix(y, y_pred)        

In [None]:
def factor_visualization(booster, output):
    
    temp = booster.get_score(importance_type = 'total_gain')
    features = list(temp.keys())
    values = list(temp.values())
    
    feature_importance = pd.DataFrame(zip(features, values), columns = ['Features', 'Scores']).sort_values(by = 'Scores', ascending=False)
    feature_importance['Scores_norm'] = feature_importance.Scores / feature_importance.Scores.sum()
    feature_importance.to_csv(output + '.csv', index = False)
    
    plt.figure(figsize=(15, 10))
    sns.barplot(x = 'Scores_norm', y = 'Features', data = feature_importance.head(30))
    sns.set_style('whitegrid')
    plt.savefig(output + '.png')
    plt.show()