# Parametric Neural Network

In [None]:
from ROOT import TH1F, TH2F, TCanvas, TF1, TPad, gROOT, TGaxis, gStyle, TList, TPaveStats, TLatex, TLegend, gPad, kTRUE, TGraphErrors, TGraph, TMultiGraph

import pandas as pd
from pandas.plotting import table 
import numpy as np
import math
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import concatenate
from tensorflow.keras.callbacks import History, EarlyStopping
from tensorflow.keras.metrics import Recall, Precision, BinaryAccuracy, AUC, FalseNegatives, TrueNegatives, FalsePositives, TruePositives
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from tensorflow.keras.initializers import Constant
from tensorflow.keras import Model, Input

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score

from CMS_graphics import CMS_lumi
from CMS_graphics import tdrstyle

import seaborn as sns
from tensorflow.keras.optimizers import Adam, SGD

import matplotlib.pyplot as plt
import matplotlib as mpl

mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

gROOT.SetBatch(kTRUE)
tdrstyle.setTDRStyle()

CMS_lumi.lumi_13TeV = "%0.1f fb^{-1}" % (35.8) # 2017

In [None]:
outputFolder = './data/'

In [None]:
#signal_resampled_df = pd.read_csv(outputFolder + "signal_resample.csv")
signal_df = pd.read_csv(outputFolder + "signal.csv")
background_df = pd.read_csv(outputFolder + "background.csv")

Feature list for pNN with mass as single feature:

In [None]:
features = ["mA",
            "dimuon_deltar",
            "dimuon_deltaphi",
            "dimuon_deltaeta",
            "met_pt",
            "deltar_bjet1_dimuon",
            "deltapt_bjet1_dimuon",
            "deltaeta_bjet1_dimuon",
            "bjet_1_pt",
            "bjet_1_eta",
            "ljet_1_pt",
            "ljet_1_eta",
            "bjet_n",
            "ljet_n"]

Feature list for pNN with mass as linear combination:

In [None]:
features = ["dimuon_deltar",
            "dimuon_deltaphi",
            "dimuon_deltaeta",
            "met_pt",
            "deltar_bjet1_dimuon",
            "deltapt_bjet1_dimuon",
            "deltaeta_bjet1_dimuon",
            "bjet_1_pt",
            "bjet_1_eta",
            "ljet_1_pt",
            "ljet_1_eta",
            "bjet_n",
            "ljet_n"]

In [None]:
sig, bkg = len(signal_df), len(background_df)
total = sig + bkg
print('Total samples:\n    Total: {}\n    Signal: {} ({:.2f}% of total)\n    Background: {} ({:.2f}% of total)\n'.format(
    total, sig, 100 * sig / total, bkg, 100*bkg/total))

### Split samples into train/validation/test than merge signal and background

In [None]:
# Use a utility from sklearn to split and shuffle our dataset.
strain_df, stest_df = train_test_split(signal_df, test_size=0.2)
strain_df, sval_df = train_test_split(strain_df, test_size=0.1)
btrain_df, btest_df = train_test_split(background_df, test_size=0.2)
btrain_df, bval_df = train_test_split(btrain_df, test_size=0.1)

# Merge signal and background for every set
train_df = (strain_df.append(btrain_df, ignore_index = True)).sample(frac=1, random_state=200).reset_index(drop=True)
val_df = (sval_df.append(bval_df, ignore_index = True)).sample(frac=1, random_state=200).reset_index(drop=True)
test_df = (stest_df[1000:].append(btest_df[1000:], ignore_index = True)).sample(frac=1, random_state=200).reset_index(drop=True)
plot_df = (stest_df[:1000].append(btest_df[:1000], ignore_index = True)).sample(frac=1, random_state=200).reset_index(drop=True)

# Form np arrays of labels and features.
train_a_labels = np.array(train_df['type']).reshape(-1, 1)
val_a_labels = np.array(val_df['type']).reshape(-1, 1)
test_a_labels = np.array(test_df['type']).reshape(-1, 1)
plot_a_labels = np.array(plot_df['type']).reshape(-1, 1)

train_a_features = train_df[train_df.columns & features].to_numpy()
val_a_features = val_df[val_df.columns & features].to_numpy()
test_a_features = test_df[test_df.columns & features].to_numpy()
plot_a_features = plot_df[plot_df.columns & features].to_numpy()

train_a_mass = np.array(train_df['mA']).reshape(-1, 1)
val_a_mass = np.array(val_df['mA']).reshape(-1, 1)
test_a_mass = np.array(test_df['mA']).reshape(-1, 1)
plot_a_mass = np.array(plot_df['mA']).reshape(-1, 1)


# Extract weights in np
w2 = train_df[['weight']].to_numpy()
w2 = w2.reshape(-1, 1)

w_test = test_df[['weight']].to_numpy()
w_test = w_test.reshape(-1,1)

# Normalise data with StandardScaler from sklearn

scaler = StandardScaler()
scaler_mass = StandardScaler()
train_a_features = scaler.fit_transform(train_a_features)
val_a_features = scaler.transform(val_a_features)
test_a_features = scaler.transform(test_a_features)
plot_a_features = scaler.transform(plot_a_features)

train_a_mass = scaler_mass.fit_transform(train_a_mass)
val_a_mass = scaler_mass.transform(val_a_mass)
test_a_mass = scaler_mass.transform(test_a_mass)
plot_a_mass = scaler_mass.transform(plot_a_mass)

In [None]:
print('Training labels shape:', train_a_labels.shape)
print('Validation labels shape:', val_a_labels.shape)
print('Test labels shape:', test_a_labels.shape)
print('Plot labels shape:', plot_a_labels.shape)
print('\n')
print('Training features shape:', train_a_features.shape)
print('Validation features shape:', val_a_features.shape)
print('Test features shape:', test_a_features.shape)
print('Plot features shape:', plot_a_features.shape)
print('\n')
print('Training mass shape:', train_a_mass.shape)
print('Validation mass shape:', val_a_mass.shape)
print('Test mass shape:', test_a_mass.shape)
print('Plot mass shape:', plot_a_mass.shape)
print('\n')
print('Training weights:', w2.shape)

bkg_a, sig_a = np.bincount(train_df['type'])
total_a = sig_a + bkg_a
print('\n')
print('Training:\n    Total: {}\n    Signal: {} ({:.2f}% of total)\n    Background: {} ({:.2f}% of total)\n'.format(
    total_a, sig_a, 100 * sig_a / total_a, bkg_a, 100*bkg_a/total_a))

bkg2, sig2 = np.bincount(val_df['type'])
total2 = sig2 + bkg2
print('Validation:\n    Total: {}\n    Signal: {} ({:.2f}% of total)\n    Background: {} ({:.2f}% of total)\n'.format(
    total2, sig2, 100 * sig2 / total2, bkg2, 100*bkg2/total2))

bkg3, sig3 = np.bincount(test_df['type'])
total3 = sig3 + bkg3
print('Test:\n    Total: {}\n    Signal: {} ({:.2f}% of total)\n    Background: {} ({:.2f}% of total)\n'.format(
    total3, sig3, 100 * sig3 / total3, bkg3, 100*bkg3/total3))

bkg4, sig4 = np.bincount(plot_df['type'])
total4 = sig4 + bkg4
print('Plot:\n    Total: {}\n    Signal: {} ({:.2f}% of total)\n    Background: {} ({:.2f}% of total)\n'.format(
    total4, sig4, 100 * sig4 / total4, bkg4, 100*bkg4/total4))

In [None]:
# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
weight_for_0_a = (1 / bkg_a)*(total_a)/2.0 
weight_for_1_a = (1 / sig_a)*(total_a)/2.0

class_weight_a = {0: weight_for_0_a, 1: weight_for_1_a}

print('Weight for class 0: {:.2f}'.format(weight_for_0_a))
print('Weight for class 1: {:.2f}'.format(weight_for_1_a))

In [None]:
initial_bias_a = np.log([sig_a/bkg_a])

In [None]:
callback = EarlyStopping(monitor='val_loss', patience=10, min_delta = 0.0001, mode = 'min', restore_best_weights=True, verbose=1)
METRICS = [
      TruePositives(name='tp'),
      FalsePositives(name='fp'),
      TrueNegatives(name='tn'),
      FalseNegatives(name='fn'), 
      BinaryAccuracy(name='accuracy'),
      Precision(name='precision'),
      Recall(name='recall'),
      AUC(name='auc'),
]

In [None]:
def make_model(metrics=METRICS, output_bias=None, n_nodes=[300,150,
                                                           100,50], dropout=0.1, learning_rate=0.001):
    if output_bias is not None:
        output_bias = Constant(output_bias)
    model = Sequential()
    
    model.add(Input(shape=(14,)))
    for i, node in enumerate(n_nodes):
        if i == 0:
            model.add(Dense(node, activation = 'relu', kernel_initializer='random_normal'))
            model.add(Dropout(dropout))
        else:
            model.add(Dense(node, activation = 'relu'))
            model.add(Dropout(dropout))
    model.add(Dense(1, activation = 'sigmoid', bias_initializer=output_bias))

    # Set loss function and optimizer algorithm 

    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=learning_rate), metrics = metrics)
    #plot_model(model, to_file="model.png", show_shapes=True)
    return model

In [None]:
def make_model_functional(metrics=METRICS, output_bias=None, n_nodes=[300,150,
                                                           100,50], dropout=0.1, learning_rate=0.001):
    if output_bias is not None:
        output_bias = Constant(output_bias)    
    
    mass = Input(shape=(1,), name='mass')
    m = Dense(units=23, bias_initializer='glorot_uniform',
                  kernel_initializer='glorot_normal')(mass)
    features = Input(shape=(13,), name='features')
    
    x = concatenate([m, features])
    
    for i, node in enumerate(n_nodes):
        if i == 0:
            hidden = Dense(node, activation = 'relu', kernel_initializer='random_normal')(x)
            hidden = Dropout(dropout)(hidden)
        else:
            hidden = Dense(node, activation = 'relu')(hidden)
            hidden = Dropout(dropout)(hidden)
    output = Dense(1, activation = 'sigmoid', bias_initializer=output_bias, name='Output')(hidden)

    model = Model(inputs=[mass, features], outputs=output)

    # Set loss function and optimizer algorithm 

    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=learning_rate), metrics = metrics)
    #plot_model(model, to_file="model.png", show_shapes=True)
    return model

In [None]:
batch_size = 15000
epochs = 200

### Baseline Model

Using the mass as single feature

In [None]:
signal_weighted_model_a = make_model(output_bias=initial_bias_a)
#print(signal_weighted_model_a.summary())
plot_model(signal_weighted_model_a, to_file="model.png", show_shapes=True)

signal_weighted_a_history = signal_weighted_model_a.fit(train_a_features, 
          train_a_labels, 
          batch_size=batch_size, 
          epochs=epochs,
          callbacks=[callback],
          validation_data=(val_a_features, val_a_labels),
          class_weight=class_weight_a)
          sample_weight=w2) 

In [None]:
signal_weighted_model_a.save('./saved_models/model.h5')

Using the mass preprocessed as a linear combination

In [None]:
signal_weighted_model_a = make_model_functional(output_bias=initial_bias_a)
#print(signal_weighted_model_a.summary())
plot_model(signal_weighted_model_a, to_file="model.png", show_shapes=True)

signal_weighted_a_history = signal_weighted_model_a.fit({"mass":train_a_mass, "features":train_a_features}, 
          train_a_labels, 
          batch_size=batch_size, 
          epochs=epochs,
          callbacks=[callback],
          validation_data=({"mass":val_a_mass, "features":val_a_features}, val_a_labels),
          class_weight=class_weight_a,
          sample_weight=w2) 

In [None]:
signal_weighted_model_a.save('./saved_models/model2.h5')

### Plot metrics

In [None]:
mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

def plot_metrics(history, filename='history_plot.png'):
    metrics = ['loss', 'auc', 'precision', 'recall']
    for n, metric in enumerate(metrics):
        name = metric.replace("_"," ").capitalize()
        plt.subplot(2,2,n+1)
        plt.plot(history.epoch, history.history[metric], color=colors[0], label='Train')
        plt.plot(history.epoch, history.history['val_'+metric],
             color=colors[0], linestyle="--", label='Val')
        plt.xlabel('Epoch')
        plt.ylabel(name)
        if metric == 'loss':
            plt.ylim([0, plt.ylim()[1]])
        elif metric == 'auc':
            plt.ylim([0.8,1])
        else:
            plt.ylim([0,1])

        plt.legend()
        plt.savefig(filename)

In [None]:
plot_metrics(signal_weighted_a_history, filename="history.png")

### Compute predictions

Load here the model (model.h5 for mass as single feature, model2.h5 for mass as linear combination).

In [None]:
signal_weighted_model_a = load_model('./saved_models/model.h5')

Mass as single feature:

In [None]:
train_predictions_a_signal_weighted = signal_weighted_model_a.predict(train_a_features, batch_size=batch_size)
test_predictions_a_signal_weighted = signal_weighted_model_a.predict(test_a_features, batch_size=batch_size)

Mass as linear combination:

In [None]:
train_predictions_a_signal_weighted = signal_weighted_model_a.predict({"mass": train_a_mass, "features":train_a_features}, batch_size=batch_size)
test_predictions_a_signal_weighted = signal_weighted_model_a.predict({"mass": test_a_mass, "features":test_a_features}, batch_size=batch_size)

### Plot Output distribution

Function for output with 3 signal samples:

In [None]:
def output_distribution_3(canvas, y_pred_1, y_true_1, y_pred_2, y_true_2, y_pred_3, y_true_3):
    sig = TH1F('signal_250', 'pNN output ; pNN output; (1/N) dN/dX;', 100, 0, 1)
    bkg = TH1F('background_250', 'pNN Output; pNN output; (1/N) dN/dX;', 100, 0, 1)
    sig2 = TH1F('signal_500', 'pNN output ; pNN output; (1/N) dN/dX;', 100, 0, 1)
    bkg2 = TH1F('background_500', 'pNN Output; pNN output; (1/N) dN/dX;', 100, 0, 1)
    sig3 = TH1F('signal_800', 'pNN output ; pNN output; (1/N) dN/dX;', 100, 0, 1)
    bkg3 = TH1F('background_800', 'pNN Output; pNN output; (1/N) dN/dX;', 100, 0, 1)
    i = np.arange(0.00, 1.00, 0.01)
    j=0
    for _, p in enumerate(i):
        Ns=0
        Nb=0
        idx = np.where(y_pred_1 < p)[0]
        for a in idx:
            if y_true_1[a] == 1:
                Ns=Ns+1
        idx2 = np.where(y_pred_1 > p)[0]
        for b in idx2:
            if y_true_1[b] == 0:
                Nb=Nb+1
        sig.SetBinContent(j, Ns)
        bkg.SetBinContent(j, Nb)
        j=j+1
        
    k=0
    for _, p in enumerate(i):
        Ns2=0
        Nb2=0
        idx = np.where(y_pred_2 < p)[0]
        for a in idx:
            if y_true_2[a] == 1:
                Ns2=Ns2+1
        idx2 = np.where(y_pred_2 > p)[0]
        for b in idx2:
            if y_true_2[b] == 0:
                Nb2=Nb2+1
        sig2.SetBinContent(k, Ns2)
        bkg2.SetBinContent(k, Nb2)
        k=k+1
        
    z=0
    for _, p in enumerate(i):
        Ns3=0
        Nb3=0
        idx = np.where(y_pred_3 < p)[0]
        for a in idx:
            if y_true_3[a] == 1:
                Ns3=Ns3+1
        idx2 = np.where(y_pred_3 > p)[0]
        for b in idx2:
            if y_true_3[b] == 0:
                Nb3=Nb3+1
        sig3.SetBinContent(z, Ns3)
        bkg3.SetBinContent(z, Nb3)
        z=z+1
    dx_sig = (sig.GetXaxis().GetXmax() - sig.GetXaxis().GetXmin())/sig.GetNbinsX()
    dx_bkg = (bkg.GetXaxis().GetXmax() - bkg.GetXaxis().GetXmin())/bkg.GetNbinsX()
    dx_sig2 = (sig2.GetXaxis().GetXmax() - sig2.GetXaxis().GetXmin())/sig2.GetNbinsX()
    dx_bkg2 = (bkg2.GetXaxis().GetXmax() - bkg2.GetXaxis().GetXmin())/bkg2.GetNbinsX()
    dx_sig3 = (sig3.GetXaxis().GetXmax() - sig3.GetXaxis().GetXmin())/sig3.GetNbinsX()
    dx_bkg3 = (bkg3.GetXaxis().GetXmax() - bkg3.GetXaxis().GetXmin())/bkg3.GetNbinsX()
    sig.Scale(1./sig.GetSumOfWeights()/dx_sig)
    bkg.Scale(1./bkg.GetSumOfWeights()/dx_bkg)
    sig2.Scale(1./sig2.GetSumOfWeights()/dx_sig2)
    bkg2.Scale(1./bkg2.GetSumOfWeights()/dx_bkg2)
    sig3.Scale(1./sig3.GetSumOfWeights()/dx_sig3)
    bkg3.Scale(1./bkg3.GetSumOfWeights()/dx_bkg3)
    sig.SetFillColorAlpha(4,0.1)
    sig.SetLineColor(4)
    sig.SetLineWidth(12)
    sig.SetStats(0)
    bkg.SetLineColor(2)
    bkg.SetFillColor(2)
    bkg.SetFillStyle(3352)
    bkg.SetLineWidth(12)
    sig2.SetFillColorAlpha(4,0.1)
    sig2.SetLineColor(4)
    sig2.SetLineWidth(8)
    sig2.SetStats(0)
    bkg2.SetLineColor(2)
    bkg2.SetFillColor(2)
    bkg2.SetFillStyle(3352)
    bkg2.SetLineWidth(8)
    sig3.SetFillColorAlpha(4,0.1)
    sig3.SetLineColor(4)
    sig3.SetLineWidth(4)
    sig3.SetStats(0)
    bkg3.SetLineColor(2)
    bkg3.SetFillColor(2)
    bkg3.SetFillStyle(3352)
    bkg3.SetLineWidth(4)
    bkg3.Draw("HIST")
    sig.Draw("SAME HIST")
    sig2.Draw("SAME HIST")
    sig3.Draw("SAME HIST")
    bkg2.Draw("SAME HIST")
    bkg.Draw("SAME HIST")
    return sig, bkg, sig2, bkg2, sig3, bkg3, canvas

Function for output with 1 signal samples:

In [None]:
def output_distribution_1(canvas, y_pred_1, y_true_1):
    sig = TH1F('signal_250', 'pNN output ; pNN output; (1/N) dN/dX ;', 50, 0, 1)
    bkg = TH1F('background_250', 'pNN Output; pNN output; (1/N) dN/dX', 50, 0, 1)

    i = np.arange(0.00, 1.00, 0.02)
    j=0
    for _, p in enumerate(i):
        Ns=0
        Nb=0
        idx = np.where(y_pred_1 < p)[0]
        for a in idx:
            if y_true_1[a] == 1:
                Ns=Ns+1
        idx2 = np.where(y_pred_1 > p)[0]
        for b in idx2:
            if y_true_1[b] == 0:
                Nb=Nb+1
        sig.SetBinContent(j, Ns)
        bkg.SetBinContent(j, Nb)
        j=j+1
    dx_sig = (sig.GetXaxis().GetXmax() - sig.GetXaxis().GetXmin())/sig.GetNbinsX()
    dx_bkg = (bkg.GetXaxis().GetXmax() - bkg.GetXaxis().GetXmin())/bkg.GetNbinsX()
    sig.Scale(1./sig.GetSumOfWeights()/dx_sig)
    bkg.Scale(1./bkg.GetSumOfWeights()/dx_bkg)

    sig.SetFillColorAlpha(4,0.1)
    sig.SetLineColor(4)
    sig.SetLineWidth(8)
    sig.SetStats(0)
    bkg.SetLineColor(2)
    bkg.SetFillColor(2)
    bkg.SetFillStyle(3352)
    bkg.SetLineWidth(8)
    sig.Draw("HIST")
    bkg.Draw("SAME HIST")
    return sig, bkg, canvas

Do output distribution for mass 250,500,800

In [None]:
df_250 = pd.read_csv(outputFolder + "signal/MA250.csv")
df_500 = pd.read_csv(outputFolder + "signal/MA500.csv")
df_800 = pd.read_csv(outputFolder + "signal/MA800.csv")

###
output_df_250 = (btest_df[0:4137].append(df_250, ignore_index = True)).sample(frac=1, random_state=200).reset_index(drop=True)
output_features_250 = output_df_250[output_df_250.columns & features].to_numpy()
output_features_250 = scaler.transform(output_features_250)
output_prediction_250 = signal_weighted_model_a.predict(output_features_250, batch_size = batch_size)
output_labels_250 = output_df_250["type"].to_numpy()
###
output_df_500 = (btest_df[0:13630].append(df_500, ignore_index = True)).sample(frac=1, random_state=200).reset_index(drop=True)
output_features_500 = output_df_500[output_df_500.columns & features].to_numpy()
output_features_500 = scaler.transform(output_features_500)
output_prediction_500 = signal_weighted_model_a.predict(output_features_500, batch_size = batch_size)
output_labels_500 = output_df_500["type"].to_numpy()
###
output_df_800 = (btest_df[0:14128].append(df_800, ignore_index = True)).sample(frac=1, random_state=200).reset_index(drop=True)
output_features_800 = output_df_800[output_df_800.columns & features].to_numpy()
output_features_800 = scaler.transform(output_features_800)
output_prediction_800 = signal_weighted_model_a.predict(output_features_800, batch_size = batch_size)
output_labels_800 = output_df_800["type"].to_numpy()

In [None]:
c1 = TCanvas('c1','output distribution', 4000, 2000)

sig, bkg, sig2, bkg2, sig3, bkg3, canvas = output_distribution_3(c1, output_prediction_250, output_labels_250, output_prediction_500, output_labels_500, output_prediction_800, output_labels_800)

legend = TLegend(.20,.9,.40,.75)
legend.AddEntry(sig, "Signal 250","L")
legend.AddEntry(bkg, "Background 250","L")
legend.AddEntry(sig2, "Signal 500","L")
legend.AddEntry(bkg2, "Background 500","L")
legend.AddEntry(sig3, "Signal 800","L")
legend.AddEntry(bkg3, "Background 800","L")
legend.SetFillStyle(0)
legend.SetLineWidth(0)
legend.Draw()

CMS_lumi.CMS_lumi(c1,4,0)
c1.Draw()
#c1.SaveAs("output_800.png")

Plot the output distribution only for total signal vs total background

In [None]:
c1 = TCanvas('c1','output distribution', 4000, 2000)

sig, bkg, c1 = output_distribution_1(c1, test_predictions_a_signal_weighted, test_a_labels)

legend = TLegend(.20,.88,.40,.75)
legend.AddEntry(sig, "Signal","L")
legend.AddEntry(bkg, "Background","L")
legend.SetFillStyle(0)
legend.SetLineWidth(0)
legend.Draw()

CMS_lumi.CMS_lumi(c1,4,0)
c1.Draw()
#c1.SaveAs("output_800.png")

Plot of AUC vs MA (mass as single feature):

In [None]:
masses = [110,120,130,140,150,160,170,180,190,200,225,250,275,300,350,400,450,500,600,700,800,900,1000]
AUC_array = []
events = []
for mA in masses:
    df = pd.read_csv(outputFolder + "signal/MA" + str(mA) + ".csv")
    btest_df_upper = btest_df[btest_df["dimuon_M"] < (mA + 80.)]
    btest_df_mA = btest_df_upper[btest_df_upper["dimuon_M"] > (mA - 80.)]
    if len(df)<len(btest_df_mA):
        output_df = (btest_df_mA[0:len(df)].append(df, ignore_index = True)).sample(frac=1, random_state=200).reset_index(drop=True)
    else:
        output_df = (btest_df_mA.append(df[0:len(btest_df_mA)], ignore_index = True)).sample(frac=1, random_state=200).reset_index(drop=True)
    w_output = output_df["weight"].to_numpy()
    w_output = w_output.reshape(-1, 1)
    output_features = output_df[output_df.columns & features].to_numpy()
    output_features = scaler.transform(output_features)
    output_prediction = signal_weighted_model_a.predict(output_features, batch_size = batch_size)
    output_labels = output_df["type"].to_numpy()
    
    weighted_results = signal_weighted_model_a.evaluate(output_features, output_labels,
                                  batch_size=batch_size, sample_weight=w_output, verbose=0)
                                  
    AUC_array.append(weighted_results[-1])
    
    print("For mass: " + str(mA))
    print("Signal is: " + str(len(df[0:len(btest_df_mA)])) + " and background is: " + str(len(btest_df_mA[0:len(df)])) + "\n")
    events.append(len(output_df))

plt.plot(masses, AUC_array,'.-', label='events')
plt.xlabel('mA', fontsize=15)
plt.ylabel('AUC', fontsize=15)
plt.tick_params(axis='x', labelsize=15)
plt.tick_params(axis='y', labelsize=15)
plt.legend()

plt.savefig('AUCvsMass.png')

Plot of AUC vs MA (mass as linear combination):

In [None]:
masses = [110,120,130,140,150,160,170,180,190,200,225,250,275,300,350,400,450,500,600,700,800,900,1000]
AUC_array = []
for mA in masses:
    df = pd.read_csv(outputFolder + "signal/MA" + str(mA) + ".csv")
    btest_df_upper = btest_df[btest_df["dimuon_M"] < (mA + 80.)]
    btest_df_mA = btest_df_upper[btest_df_upper["dimuon_M"] > (mA - 80.)]
    output_df = (btest_df[0:len(df)].append(df, ignore_index = True)).sample(frac=1, random_state=200).reset_index(drop=True)
    output_features = output_df[output_df.columns & features].to_numpy()
    output_features = scaler.transform(output_features)
    output_mass = np.array(output_df["mA"]).reshape(-1, 1)
    output_mass = scaler_mass.transform(output_mass)
    output_prediction = signal_weighted_model_a.predict({"mass": output_mass, "features":output_features}, batch_size = batch_size)
    output_labels = np.array(output_df["type"]).reshape(-1,1)
    
    weighted_results = signal_weighted_model_a.evaluate({"mass": output_mass, "features":output_features}, output_labels,
                                  batch_size=batch_size, verbose=0)
                                  
    AUC_array.append(weighted_results[-1])
    
    print("For mass: " + str(mA))
    print("Signal is: " + str(len(df)) + " and background is: " + str(len(btest_df[0:len(df)])) + "\n")
    

plt.plot(masses, AUC_array,'.-', label='AUC score')
plt.xlabel('mA', fontsize=15)
plt.ylabel('AUC', fontsize=15)
plt.tick_params(axis='x', labelsize=15)
plt.tick_params(axis='y', labelsize=15)
plt.legend()
plt.show()

plt.savefig('AUCvsMass.png')

### Confusion Matrix (only for mass as single feature)

In [None]:
def plot_cm(labels, predictions, p=0.5):
    cm = confusion_matrix(labels, predictions > p)
    plt.figure(figsize=(5,5))
    sns.heatmap(cm, annot=True, fmt="d")
    plt.title('Confusion matrix @{:.2f}'.format(p))
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')

    print('Background correctly identified (True Negatives): ', cm[0][0])
    print('Background identified as signal (False Positives): ', cm[0][1])
    print('Signal identified as background (False Negatives): ', cm[1][0])
    print('Signal correctly identified (True Positives): ', cm[1][1])
    print('Total signal: ', np.sum(cm[1]))

In [None]:
weighted_results = signal_weighted_model_a.evaluate(test_a_features, test_a_labels,
                                  batch_size=batch_size, verbose=0)
for name, value in zip(signal_weighted_model_a.metrics_names, weighted_results):
    print(name, ': ', value)
print()

plot_cm(test_a_labels, test_predictions_a_signal_weighted, p=0.47)

### Plot ROC (only for mass as single feature)

In [None]:
def plot_roc(name, labels, predictions, **kwargs):
    fp, tp, _ = roc_curve(labels, predictions)

    plt.plot(100*tp, 100*(1-fp), label=name + ": AUC={:.3f}".format(roc_auc_score(labels, predictions)), linewidth=2, **kwargs)
    plt.ylabel('Background rejection [%]', fontsize=20)
    plt.xlabel('Signal efficiency [%]', fontsize=20)
    plt.title('ROC curve', fontsize=20)
    plt.tick_params(axis='x', labelsize=17)
    plt.tick_params(axis='y', labelsize=17)
    plt.xlim([-0.5, 100.5])
    plt.ylim([-0.5, 100.5])
    plt.grid(True)
    ax = plt.gca()
    ax.set_aspect('equal')

In [None]:
plot_roc("Train pNN - Baseline", train_a_labels, train_predictions_a_signal_weighted, color=colors[2])
plot_roc("Test pNN - Baseline", test_a_labels, test_predictions_a_signal_weighted, color=colors[2], linestyle='--')

plt.legend(loc='lower right', fontsize=17)
plt.show()
#plt.savefig("roc.png")

### Plot significance curve (only for mass as single feature)

In [None]:
plot_predictions_a_signal_weighted = signal_weighted_model_a.predict(plot_a_features, batch_size=batch_size)

In [None]:
def efficiency_curve(canvas, y_pred, y_true):
    mg = TMultiGraph("Efficiency","Efficiency")
    legend = TLegend(.18,.42,.38,.28)

    pad1 = TPad("pad1","",0,0,1,1)
    pad2 = TPad("pad2","",0,0,1,1)

    pad2.SetFillStyle(4000) #will be transparent

    pad1.Draw()
    pad1.cd()

    tot_sig = len(np.where(y_true == 1)[0])
    tot_bkg = len(np.where(y_true == 0)[0])
    
    x=np.empty(0)
    significance=np.empty(0)
    eff_sig=np.empty(0)
    eff_bkg=np.empty(0)
    
    i = np.arange(0.00, 1.00, 0.01)
    j=0
    for _, p in enumerate(i):
        Ns=0
        Nb=0
        idx = np.where(y_pred > p)[0]
        for a in idx:
            if y_true[a] == 1:
                Ns=Ns+1
            elif y_true[a] == 0:
                Nb=Nb+1
        signif = Ns/np.sqrt(Ns+Nb)
        cm = confusion_matrix(y_true, y_pred > p)
        tpr = cm[1][1]/(cm[1][1] + cm[1][0])
        fpr = 1-(cm[0][1]/(cm[0][1] + cm[0][0]))
        eff_sig = np.append(eff_sig, tpr)
        eff_bkg = np.append(eff_bkg, fpr)
        x = np.append(x,p)
        significance = np.append(significance, signif)
        j=j+1
    max_sig = np.amax(significance)
    i, = np.where(np.isclose(significance, max_sig)) # floating-point
    max_x = x[i]
    ymin = 0
    ymax = math.ceil(max_sig)
    dy = (ymax-ymin)/0.8 
    xmin = 0
    xmax = 1
    dx = 0.01 

    graph1 = TGraph(j,np.array(x, dtype='d'),np.array(eff_sig, dtype='d'))
    graph1.SetLineColor(4)
    graph1.SetLineWidth(4)
    graph1.Draw()
    graph2 = TGraph(j,np.array(x, dtype='d'),np.array(eff_bkg, dtype='d'))
    graph2.SetLineColor(2)
    graph2.SetLineWidth(4)
    graph2.SetTitle("Cut efficiencies and optimal cut")
    graph2.GetYaxis().SetTitle("Efficiency (%)")
    graph2.GetXaxis().SetLabelSize(0.04);
    graph2.GetYaxis().SetLabelSize(0.04);
    graph2.GetXaxis().SetTitle("pNN output")
    graph2.GetXaxis().SetTitleSize(0.04);
    graph2.GetYaxis().SetTitleSize(0.04);
    graph2.Draw()
    mg.Add(graph2)
    mg.Add(graph1)
    graph2.GetXaxis().SetRangeUser(0, 1)
    mg.Draw() 
    pad1.Modified()
    pad1.SetGrid()
    canvas.cd()

    pad2.Range(xmin-0.206,ymin-0.163*dy,xmax+0.065,ymax+0.05*dy)
    pad2.Draw()
    pad2.cd()

    graph = TGraph(j,np.array(x, dtype='d'),np.array(significance, dtype='d'))
    graph.SetLineColor(3)
    graph.SetLineWidth(8)
    graph.Draw("same")
    pad2.Update()

    CMS_lumi.CMS_lumi(pad2,4,0)

    legend.AddEntry(graph1, "Signal efficiency","L")
    legend.AddEntry(graph2, "Background rejection","L")
    legend.AddEntry(graph, "Significance S/#sqrt{S+B}","L")
    legend.SetFillStyle(0)
    legend.SetLineWidth(0)
    legend.Draw()

    a = TLatex(0.18,0.22,"#splitline{Using " + str(tot_sig) + " signal and " + str(tot_bkg) + " background}{Maximum of significance is at " + str("%.2f" % max_x) + " with a value of " + str("%.2f" % max_sig) + "}")
    a.SetTextSize(0.031)
    a.SetNDC(kTRUE)
    a.Draw() 
    axis = TGaxis(xmax,ymin,xmax,ymax,ymin,ymax,515,"L+")
    axis.SetLabelColor(3)
    axis.SetLabelOffset(0.005)
    axis.SetTitle("Significance")
    axis.SetTitleOffset(0.6)
    axis.SetTextColor(3)
    axis.Draw()

    return canvas, mg, legend, axis, graph, a


In [None]:
c2 = TCanvas('c2','significance', 4000, 2000)

c2, efficiencies, legend, axis, significance, latex = efficiency_curve(c2, plot_predictions_a_signal_weighted, plot_a_labels)

c2.Draw()
c2.SaveAs("efficiencies.png")