In [None]:
import os
import re
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from tqdm import tqdm
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from collections import OrderedDict
from time import time

from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve, auc, average_precision_score

%matplotlib inline
%config InlineBackend.figure_format='retina'

from sklearn.model_selection import train_test_split

PATH = os.getcwd()
RNG_SEED = 42
np.random.seed(seed=RNG_SEED)

In [None]:
data_path_jarvis = os.path.join(PATH, '../data/featurize/jarvis.bin')
data_path_magpie = os.path.join(PATH, '../data/featurize/magpie.bin')
data_path_deltasoap_212 = os.path.join(PATH, '../data/featurize/df_deltasoap_212.pkl')

data_path = [data_path_jarvis, 
             data_path_magpie, 
             data_path_deltasoap_212]
x_value_raw = {}
x_label = ['jarvis', 
           'magpie',
           'deltasoap_212']

for path, label in zip(data_path, x_label):
    with open(path, 'rb') as f:
        x_value_raw[label] = pickle.load(f)

In [None]:
X_merge = pd.concat([x_value_raw['jarvis'][0], x_value_raw['magpie'][0]], axis=1)
X_data = corr_reduction(corr, 0.8, X_merge.abs())

In [None]:
X = pd.concat([X_data, x_value_raw['deltasoap_212'].set_index(X_data.index)], axis=1)
y = x_value_raw['jarvis'][1]

In [None]:
diff = y <= 3
y = list(map(lambda x: 0 if x else 1, diff))

In [None]:
gbc = GradientBoostingClassifier(learning_rate= 0.05,
 max_depth= 5,
 min_samples_leaf= 0.1,
 min_samples_split= 0.3,
 n_estimators= 1500,
 subsample= 1.0,)

from sklearn.model_selection import cross_val_predict
y_score = cross_val_predict(gbc, X, y, cv=5, n_jobs=-1, verbose=1, method='predict_proba')
y_pred = cross_val_predict(gbc, X, y, cv=5, n_jobs=-1, verbose=1, method='predict')

In [None]:
#precision-recall curve

precision, recall, _ = precision_recall_curve(y, y_score[:,1])
aps = average_precision_score(y, y_score[:,1])
print(f'average precision score is {aps:.2f}')

from sklearn.metrics import PrecisionRecallDisplay

display = PrecisionRecallDisplay.from_predictions(y, y_score[:,1], name="Gradient Boosting")
#_ = display.ax_.set_title("Precision-Recall curve")

In [None]:
#Confusion matrix

import seaborn as sns

ax = sns.heatmap(confusion_matrix(y,y_pred), annot=True, cmap='Blues', fmt='g')
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');

## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(['False','True'])
ax.yaxis.set_ticklabels(['False','True'])

## Display the visualization of the Confusion Matrix.
plt.show()

In [None]:
#ROC-AUC curve
fper, tper, threshold = roc_curve(y, y_score[:,1])
plotter.plot_roc_curve(fper, tper)

In [None]:
#summary table
print(classification_report(y, y_pred, target_names=['low energy', 'high energy']))

In [None]:
#precision-recall curve in the function of threshold (opitmisation)

precision, recall, threshold = precision_recall_curve(y, y_score[:, 1])
plot_precision_recall_vs_threshold(precision, recall, threshold)