In [None]:
# avoids running on GPU
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# hep imports
# import mplhep as hep
# hep.style.use('ATLAS')
from utils import load_nnt

# standard libraries imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import uproot
import ROOT

from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler

# tensorflow imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split


# import shap to visualise feature importance
#import shap
import ipywidgets as widgets

# import utils.py
import utils
import pickle

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
# mc file paths
file_path_mc_16 = "nominal_1.root"
file_path_mc_17 = "nominal_2.root"
file_path_mc_18 = "nominal_3.root"
# get data
mc16,mc17,mc18=utils.get_data(file_path_mc_16,file_path_mc_17,file_path_mc_18,region='sig',half="even",mc=True)

In [None]:
# data files
file_path_data_16 = "data16_NN_100_bootstraps.root"
file_path_data_17 = "data17_NN_100_bootstraps.root"
file_path_data_18 = "data18_NN_100_bootstraps.root"
# get data
data16,data17,data18=utils.get_data(file_path_data_16,file_path_data_17,file_path_data_18,region='sig',half="even")

In [None]:
# apply masks
signal_df = utils.get_data_mask(mc16,mc17,mc18,mask='4b')
bkg_df = utils.get_data_mask(data16,data17,data18,mask='2bRW')

## s6 (s5OHE + bkt)

In [None]:
# excluding some features that did not agree well in CR
features = ['m_hh','X_hh','dEta_hh','X_wt_tag','year_16','year_17','year_18','bkt_0','bkt_1'
#             'bkt_lead_jet_pt','bkt_third_lead_jet_pt','pT_h1',
#             'cos_theta_star','njets',
#            'pt_hh','pT_2','pT_4','eta_i','dRjj_1','dRjj_2','m_min_dj','m_max_dj',
#           'pairing_score_1','pairing_score_2',
#           'm_h1','E_h1','eta_h1','phi_h1','m_h2','E_h2','pT_h2','eta_h2','phi_h2',
#           'm_h1_j1','E_h1_j1','eta_h1_j1','phi_h1_j1',
#            'm_h1_j2','E_h1_j2','eta_h1_j2','phi_h1_j2',
#           'm_h2_j1','E_h2_j1','eta_h2_j1','phi_h2_j1',
#            'm_h2_j2','E_h2_j2','eta_h2_j2','phi_h2_j2','year'
           ] 

# final dataset
df_data = pd.concat([signal_df, bkg_df], ignore_index=True)
X = df_data[features]
y = df_data['class']
idx = df_data.index
weights = df_data['sample_weight']

In [None]:
# train 70%, valitation 15% and test 15% of the dataset
(
    X_train,
    X_test_validate,
    y_train,
    y_test_validate,
    weights_train,
    weights_test_validate,
    idx_train,
    idx_test_validate,
) = train_test_split(X, y, weights, list(idx), test_size=0.3)
(
    X_test,
    X_val,
    y_test,
    y_val,
    weights_test,
    weights_val,
    idx_test,
    idx_val,
) = train_test_split(
    X_test_validate,
    y_test_validate,
    weights_test_validate,
    idx_test_validate,
    test_size=0.5,
)

In [None]:
# scale X
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train)
X_val_sc = scaler.transform(X_val)
X_test_sc = scaler.transform(X_test)
# convert y to binary class matrix
y_train_hot = to_categorical(y_train)
y_val_hot = to_categorical(y_val)
y_test_hot = to_categorical(y_test)

# weights for classes
N_bkg_train = weights_train[y_train == 0].sum()
N_sig_train = weights_train[y_train==1].sum()
# ratio of the weights
R = N_bkg_train / N_sig_train
# use this ratio for signal events
weights_train_R = np.copy(weights_train)
weights_train_R[y_train==1] = R

In [None]:
# create deep NN model
model = Sequential()
model.add(Dense(200, input_dim=len(features), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(200, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
model.summary()

In [None]:
# choose adam optimizer and compile model
# note, could have used utils.F1_Score() in metric
opt = tf.keras.optimizers.Adam(learning_rate=0.0005)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

In [None]:
history = model.fit(
    X_train_sc,
    y_train_hot,
    sample_weight=weights_train_R,
    epochs=100,
    # early stopping set
    callbacks=[EarlyStopping(monitor="val_loss", patience=15, verbose=True)],
    batch_size=1000,
    # validation data
    validation_data=(X_val_sc, y_val_hot, weights_val),
)

In [None]:
# plot accuracies during the training 
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')

In [None]:
# get predictions
pred_test = model.predict(X_test_sc)

In [None]:
pred_positive = pred_test[:,1]
pred_negative = pred_test[:,0]
# calculate auc
auc = roc_auc_score(y_test, pred_positive, sample_weight=weights_test)
fpr, tpr, _ = roc_curve(y_test, pred_positive, sample_weight=weights_test)
x_fpr = np.linspace(0, 1, 50)
y_tpr = np.linspace(0, 1, 50)
# plot ROC curve
plt.plot(fpr, tpr, ls='-', label = 'NN')
plt.plot(x_fpr, y_tpr, ls='--',label = 'random guess')
plt.legend()
plt.title('AUC: {:.3f}'.format(auc), loc='right')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve', loc='left')
plt.savefig('ROC.png', dpi = 300)

In [None]:
pred_test_b = np.around(pred_test)[:,1]
n_zero = y_test[y_test== 0].shape[0]
n_one = y_test[y_test== 1].shape[0]
div_arr = np.array([[n_zero,n_one]]).T

In [None]:
conf_mat = confusion_matrix(y_test,pred_test_b, labels =[0,1])/div_arr
sns.heatmap(conf_mat, annot=True).set(title='Confusion Matrix', xlabel='Predicted', ylabel='Actual');

In [None]:
# plot NN probability score
sm_hh_prob = pred_test[:,1][y_test==1]
data_prob = pred_test[:,1][y_test==0]
h1, be,_ = plt.hist(sm_hh_prob, bins = 50, histtype='step',label='MC SM HH (probability = positive)', density = True)
h2,_,_ = plt.hist(data_prob, bins = be, histtype='step', label = 'data 2b (probability = negative)', density = True)
plt.legend()
plt.xlabel('NN predicted probability', fontsize=18)
plt.ylabel('arb.units', fontsize = 18)
plt.ylim(0,2.2)
plt.title('NN probability score', fontsize=18)
plt.savefig("Classifier_hist.png", dpi = 300)

In [None]:
model.save("./classifier_models/s6_model")
output_dir = "./classifier_models/"
pickle.dump(scaler, open(output_dir+"StandardScaler_s6.pkl", "wb"))

## s9 + dRjj_1 + dRjj_2

In [None]:
# excluding some features that did not agree well in CR
features = ['m_hh','X_hh','dEta_hh','X_wt_tag','year_16','year_17','year_18','bkt_0','bkt_1','pt_hh','m_h1','m_h2','dRjj_1',
            'dRjj_2'
#             'bkt_lead_jet_pt','bkt_third_lead_jet_pt','pT_h1',
#             'cos_theta_star','njets',
#            'pt_hh','pT_2','pT_4','eta_i','dRjj_1','dRjj_2','m_min_dj','m_max_dj',
#           'pairing_score_1','pairing_score_2',
#           'm_h1','E_h1','eta_h1','phi_h1','m_h2','E_h2','pT_h2','eta_h2','phi_h2',
#           'm_h1_j1','E_h1_j1','eta_h1_j1','phi_h1_j1',
#            'm_h1_j2','E_h1_j2','eta_h1_j2','phi_h1_j2',
#           'm_h2_j1','E_h2_j1','eta_h2_j1','phi_h2_j1',
#            'm_h2_j2','E_h2_j2','eta_h2_j2','phi_h2_j2','year'
           ] 

# final dataset
df_data = pd.concat([signal_df, bkg_df], ignore_index=True)
X = df_data[features]
y = df_data['class']
idx = df_data.index
weights = df_data['sample_weight']

In [None]:
# train 70%, valitation 15% and test 15% of the dataset
(
    X_train,
    X_test_validate,
    y_train,
    y_test_validate,
    weights_train,
    weights_test_validate,
    idx_train,
    idx_test_validate,
) = train_test_split(X, y, weights, list(idx), test_size=0.3)
(
    X_test,
    X_val,
    y_test,
    y_val,
    weights_test,
    weights_val,
    idx_test,
    idx_val,
) = train_test_split(
    X_test_validate,
    y_test_validate,
    weights_test_validate,
    idx_test_validate,
    test_size=0.5,
)

In [None]:
# scale X
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train)
X_val_sc = scaler.transform(X_val)
X_test_sc = scaler.transform(X_test)
# convert y to binary class matrix
y_train_hot = to_categorical(y_train)
y_val_hot = to_categorical(y_val)
y_test_hot = to_categorical(y_test)

# weights for classes
N_bkg_train = weights_train[y_train == 0].sum()
N_sig_train = weights_train[y_train==1].sum()
# ratio of the weights
R = N_bkg_train / N_sig_train
# use this ratio for signal events
weights_train_R = np.copy(weights_train)
weights_train_R[y_train==1] = R

In [None]:
# create deep NN model
model = Sequential()
model.add(Dense(200, input_dim=len(features), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(200, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
model.summary()

In [None]:
# choose adam optimizer and compile model
# note, could have used utils.F1_Score() in metric
opt = tf.keras.optimizers.Adam(learning_rate=0.0005)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

In [None]:
history = model.fit(
    X_train_sc,
    y_train_hot,
    sample_weight=weights_train_R,
    epochs=100,
    # early stopping set
    callbacks=[EarlyStopping(monitor="val_loss", patience=15, verbose=True)],
    batch_size=1000,
    # validation data
    validation_data=(X_val_sc, y_val_hot, weights_val),
)

In [None]:
# plot accuracies during the training 
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')

In [None]:
# get predictions
pred_test = model.predict(X_test_sc)

In [None]:
pred_positive = pred_test[:,1]
pred_negative = pred_test[:,0]
# calculate auc
auc = roc_auc_score(y_test, pred_positive, sample_weight=weights_test)
fpr, tpr, _ = roc_curve(y_test, pred_positive, sample_weight=weights_test)
x_fpr = np.linspace(0, 1, 50)
y_tpr = np.linspace(0, 1, 50)
# plot ROC curve
plt.plot(fpr, tpr, ls='-', label = 'NN')
plt.plot(x_fpr, y_tpr, ls='--',label = 'random guess')
plt.legend()
plt.title('AUC: {:.3f}'.format(auc), loc='right')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve', loc='left')

In [None]:
pred_test_b = np.around(pred_test)[:,1]
n_zero = y_test[y_test== 0].shape[0]
n_one = y_test[y_test== 1].shape[0]
div_arr = np.array([[n_zero,n_one]]).T
conf_mat = confusion_matrix(y_test,pred_test_b, labels =[0,1])/div_arr
sns.heatmap(conf_mat, annot=True).set(title='Confusion Matrix', xlabel='Predicted', ylabel='Actual');

In [None]:
# plot NN probability score
sm_hh_prob = pred_test[:,1][y_test==1]
data_prob = pred_test[:,1][y_test==0]
h1, be,_ = plt.hist(sm_hh_prob, bins = 50, histtype='step',label='MC SM HH (probability = positive)', density = True)
h2,_,_ = plt.hist(data_prob, bins = be, histtype='step', label = 'data 2b (probability = negative)', density = True)
plt.legend()
plt.xlabel('NN predicted probability', fontsize=18)
plt.ylabel('arb.units', fontsize = 18)
plt.ylim(0,2.2)
plt.title('NN probability score', fontsize=18)
plt.savefig("Classifier_hist.png", dpi = 300)

In [None]:
model.save("./classifier_models/s9+dRjj_12_model")
output_dir = "./classifier_models/"
pickle.dump(scaler, open(output_dir+"StandardScaler_s9+dRjj_12.pkl", "wb"))

## s9 + dRjj_1 + dRjj_2 + pt_hh + ...

In [None]:
features = ['m_hh','X_hh','dEta_hh','X_wt_tag','year_16','year_17','year_18','bkt_0','bkt_1','pt_hh','m_h1','m_h2','dRjj_1',
            'dRjj_2','pt_hh','njets', 'E_h1', 'E_h2', 'eta_h1', 'eta_h2', 'phi_h1', 'phi_h2'
#             'bkt_lead_jet_pt','bkt_third_lead_jet_pt','pT_h1',
#             'cos_theta_star','njets',
#            'pt_hh','pT_2','pT_4','eta_i','dRjj_1','dRjj_2','m_min_dj','m_max_dj',
#           'pairing_score_1','pairing_score_2',
#           'm_h1','E_h1','eta_h1','phi_h1','m_h2','E_h2','pT_h2','eta_h2','phi_h2',
#           'm_h1_j1','E_h1_j1','eta_h1_j1','phi_h1_j1',
#            'm_h1_j2','E_h1_j2','eta_h1_j2','phi_h1_j2',
#           'm_h2_j1','E_h2_j1','eta_h2_j1','phi_h2_j1',
#            'm_h2_j2','E_h2_j2','eta_h2_j2','phi_h2_j2','year'
           ]

# final dataset
df_data = pd.concat([signal_df, bkg_df], ignore_index=True)
X = df_data[features]
y = df_data['class']
idx = df_data.index
weights = df_data['sample_weight']

In [None]:
# train 70%, valitation 15% and test 15% of the dataset
(
    X_train,
    X_test_validate,
    y_train,
    y_test_validate,
    weights_train,
    weights_test_validate,
    idx_train,
    idx_test_validate,
) = train_test_split(X, y, weights, list(idx), test_size=0.3)
(
    X_test,
    X_val,
    y_test,
    y_val,
    weights_test,
    weights_val,
    idx_test,
    idx_val,
) = train_test_split(
    X_test_validate,
    y_test_validate,
    weights_test_validate,
    idx_test_validate,
    test_size=0.5,
)

In [None]:
# scale X
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train)
X_val_sc = scaler.transform(X_val)
X_test_sc = scaler.transform(X_test)
# convert y to binary class matrix
y_train_hot = to_categorical(y_train)
y_val_hot = to_categorical(y_val)
y_test_hot = to_categorical(y_test)

# weights for classes
N_bkg_train = weights_train[y_train == 0].sum()
N_sig_train = weights_train[y_train==1].sum()
# ratio of the weights
R = N_bkg_train / N_sig_train
# use this ratio for signal events
weights_train_R = np.copy(weights_train)
weights_train_R[y_train==1] = R

In [None]:
# create deep NN model
model = Sequential()
model.add(Dense(200, input_dim=len(features), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(200, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
model.summary()

In [None]:
# choose adam optimizer and compile model
# note, could have used utils.F1_Score() in metric
opt = tf.keras.optimizers.Adam(learning_rate=0.0005)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

In [None]:
history = model.fit(
    X_train_sc,
    y_train_hot,
    sample_weight=weights_train_R,
    epochs=100,
    # early stopping set
    callbacks=[EarlyStopping(monitor="val_loss", patience=15, verbose=True)],
    batch_size=1000,
    # validation data
    validation_data=(X_val_sc, y_val_hot, weights_val),
)

In [None]:
# plot accuracies during the training 
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')

In [None]:
# get predictions
pred_test = model.predict(X_test_sc)

In [None]:
pred_positive = pred_test[:,1]
pred_negative = pred_test[:,0]
# calculate auc
auc = roc_auc_score(y_test, pred_positive, sample_weight=weights_test)
fpr, tpr, _ = roc_curve(y_test, pred_positive, sample_weight=weights_test)
x_fpr = np.linspace(0, 1, 50)
y_tpr = np.linspace(0, 1, 50)
# plot ROC curve
plt.plot(fpr, tpr, ls='-', label = 'NN')
plt.plot(x_fpr, y_tpr, ls='--',label = 'random guess')
plt.legend()
plt.title('AUC: {:.3f}'.format(auc), loc='right')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve', loc='left')

In [None]:
pred_test_b = np.around(pred_test)[:,1]
n_zero = y_test[y_test== 0].shape[0]
n_one = y_test[y_test== 1].shape[0]
div_arr = np.array([[n_zero,n_one]]).T
conf_mat = confusion_matrix(y_test,pred_test_b, labels =[0,1])/div_arr
sns.heatmap(conf_mat, annot=True).set(title='Confusion Matrix', xlabel='Predicted', ylabel='Actual');

In [None]:
# plot NN probability score
sm_hh_prob = pred_test[:,1][y_test==1]
data_prob = pred_test[:,1][y_test==0]
h1, be,_ = plt.hist(sm_hh_prob, bins = 50, histtype='step',label='MC SM HH (probability = positive)', density = True)
h2,_,_ = plt.hist(data_prob, bins = be, histtype='step', label = 'data 2b (probability = negative)', density = True)
plt.legend()
plt.xlabel('NN predicted probability', fontsize=18)
plt.ylabel('arb.units', fontsize = 18)
plt.ylim(0,2.2)
plt.title('NN probability score', fontsize=18)
plt.savefig("Classifier_hist.png", dpi = 300)

In [None]:
model.save("./classifier_models/s9+dRjj_1+dRjj_2+pt_hh_model")
output_dir = "./classifier_models/"
pickle.dump(scaler, open(output_dir+"StandardScaler_s9+dRjj_1+dRjj_2+pt_hh.pkl", "wb"))

## s9 + dRjj_1 + pairing_score_1

In [None]:
features = ['m_hh','X_hh','dEta_hh','X_wt_tag','year_16','year_17','year_18','bkt_0','bkt_1','pt_hh','m_h1','m_h2','dRjj_1', 
            'pairing_score_1'
#             'bkt_lead_jet_pt','bkt_third_lead_jet_pt','pT_h1',
#             'cos_theta_star','njets',
#            'pt_hh','pT_2','pT_4','eta_i','dRjj_1','dRjj_2','m_min_dj','m_max_dj',
#           'pairing_score_1','pairing_score_2',
#           'm_h1','E_h1','eta_h1','phi_h1','m_h2','E_h2','pT_h2','eta_h2','phi_h2',
#           'm_h1_j1','E_h1_j1','eta_h1_j1','phi_h1_j1',
#            'm_h1_j2','E_h1_j2','eta_h1_j2','phi_h1_j2',
#           'm_h2_j1','E_h2_j1','eta_h2_j1','phi_h2_j1',
#            'm_h2_j2','E_h2_j2','eta_h2_j2','phi_h2_j2','year'
           ] 

In [None]:
# final dataset
df_data = pd.concat([signal_df, bkg_df], ignore_index=True)
X = df_data[features]
y = df_data['class']
idx = df_data.index
weights = df_data['sample_weight']

In [None]:
# train 70%, valitation 15% and test 15% of the dataset
(
    X_train,
    X_test_validate,
    y_train,
    y_test_validate,
    weights_train,
    weights_test_validate,
    idx_train,
    idx_test_validate,
) = train_test_split(X, y, weights, list(idx), test_size=0.3)
(
    X_test,
    X_val,
    y_test,
    y_val,
    weights_test,
    weights_val,
    idx_test,
    idx_val,
) = train_test_split(
    X_test_validate,
    y_test_validate,
    weights_test_validate,
    idx_test_validate,
    test_size=0.5,
)

In [None]:
# scale X
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train)
X_val_sc = scaler.transform(X_val)
X_test_sc = scaler.transform(X_test)
# convert y to binary class matrix
y_train_hot = to_categorical(y_train)
y_val_hot = to_categorical(y_val)
y_test_hot = to_categorical(y_test)

# weights for classes
N_bkg_train = weights_train[y_train == 0].sum()
N_sig_train = weights_train[y_train==1].sum()
# ratio of the weights
R = N_bkg_train / N_sig_train
# use this ratio for signal events
weights_train_R = np.copy(weights_train)
weights_train_R[y_train==1] = R

In [None]:
# create deep NN model
model = Sequential()
model.add(Dense(200, input_dim=len(features), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(200, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
model.summary()

In [None]:
# choose adam optimizer and compile model
# note, could have used utils.F1_Score() in metric
opt = tf.keras.optimizers.Adam(learning_rate=0.0005)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

In [None]:
history = model.fit(
    X_train_sc,
    y_train_hot,
    sample_weight=weights_train_R,
    epochs=100,
    # early stopping set
    callbacks=[EarlyStopping(monitor="val_loss", patience=15, verbose=True)],
    batch_size=1000,
    # validation data
    validation_data=(X_val_sc, y_val_hot, weights_val),
)

In [None]:
# plot accuracies during the training 
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')

In [None]:
# get predictions
pred_test = model.predict(X_test_sc)

In [None]:
pred_positive = pred_test[:,1]
pred_negative = pred_test[:,0]
# calculate auc
auc = roc_auc_score(y_test, pred_positive, sample_weight=weights_test)
fpr, tpr, _ = roc_curve(y_test, pred_positive, sample_weight=weights_test)
x_fpr = np.linspace(0, 1, 50)
y_tpr = np.linspace(0, 1, 50)
# plot ROC curve
plt.plot(fpr, tpr, ls='-', label = 'NN')
plt.plot(x_fpr, y_tpr, ls='--',label = 'random guess')
plt.legend()
plt.title('AUC: {:.3f}'.format(auc), loc='right')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve', loc='left')
plt.savefig('ROC.png', dpi = 300)

In [None]:
pred_test_b = np.around(pred_test)[:,1]
n_zero = y_test[y_test== 0].shape[0]
n_one = y_test[y_test== 1].shape[0]
div_arr = np.array([[n_zero,n_one]]).T
conf_mat = confusion_matrix(y_test,pred_test_b, labels =[0,1])/div_arr
sns.heatmap(conf_mat, annot=True).set(title='Confusion Matrix', xlabel='Predicted', ylabel='Actual');

In [None]:
# plot NN probability score
sm_hh_prob = pred_test[:,1][y_test==1]
data_prob = pred_test[:,1][y_test==0]
h1, be,_ = plt.hist(sm_hh_prob, bins = 50, histtype='step',label='MC SM HH (probability = positive)', density = True)
h2,_,_ = plt.hist(data_prob, bins = be, histtype='step', label = 'data 2b (probability = negative)', density = True)
plt.legend()
plt.xlabel('NN predicted probability', fontsize=18)
plt.ylabel('arb.units', fontsize = 18)
plt.ylim(0,2.2)
plt.title('NN probability score', fontsize=18)
plt.savefig("Classifier_hist.png", dpi = 300)

In [None]:
model.save("./classifier_models/s9+dRjj_1+pairing_score_1_model")
output_dir = "./classifier_models/"
pickle.dump(scaler, open(output_dir+"StandardScaler_s9+dRjj_1+pairing_score_1.pkl", "wb"))