## Report Figures

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from classes import PNN
import pickle

In [None]:
def pre_selection(df):
    dfout = df.loc[(df["region"] == 1) | (df["region"] == 2)]
    dfout = dfout.loc[dfout["regime"] == 1]
    dfout = dfout.loc[(dfout["nTags"] == 2)]
    dfout.drop(columns=["nTags", "MCChannelNumber", "region", "regime", "dEtaBB", "dPhiBB"], inplace=True)
    return dfout

def plotfeature(feature, scaling, xlabel, binning, margin=None, xlim=None, xlog=False, save_as=None):
    fig, ax = plt.subplots()
    signal_mass = [300, 900, 2000]
    _, bins, _ = plt.hist(background[feature]/scaling, weights=background.weight, label='background', 
        color='grey', alpha=0.4, bins= np.arange(binning[0], binning[1] + binning[2], binning[2]),)
     
        # Grab the bin sizing of the background so that the signal bin sizes match
        
    for mass in signal_mass:
        temp = signal[signal['MA'] == mass]
        plt.hist(temp[feature]/scaling, weights=temp.weight, label=fr'$m_A$ = {mass} GeV', bins=bins, histtype='step')
        
    ax.set_xlabel(xlabel, fontsize= 12)
    ax.set_yscale('log')
    ax.set_ylabel(f'Events / {binning[2]} GeV', fontsize= 12)
    ax.set_ylim(top=1E+5)
    ax.minorticks_off()
    
    if margin != None:
        plt.margins(x=margin)
    
    plt.legend()
    
    if save_as:
        plt.savefig(save_as)
    plt.show()


In [None]:
signal = pd.DataFrame()
signal_mass = [300, 420, 440, 460, 500, 600, 700,
              800, 900, 1000, 1200, 1400, 1600, 2000]
for mass in signal_mass:
    temp = pd.read_csv(f'..//Raw Data/{mass}.csv', index_col=0)
    temp['MA'] = mass
    signal = pd.concat((temp, signal))

background = pd.read_csv('..//Raw Data/background.csv', index_col=0)

signal = pre_selection(signal)
background = pre_selection(background)

In [None]:
print('signal:', len(signal)/1E+5, 'instances')
print('background:', len(background)/1E+5, 'instances')

### Feature plots

In [None]:
plotfeature('pTB1', 1000, r'$p_{T,B1}$ (GeV)',  binning=(0, 1600, 100), margin=0, save_as='..//Report Plots/section3/pTB1.png')

In [None]:
plotfeature('ptL1', 1000, r'$p_{T,l1}$ (GeV)', binning=(0,3200, 200), margin=0, save_as='..//Report Plots/section3/ptL1.png')


In [None]:
plotfeature('mVHres', 1000, r'$m_{Zh}$ (GeV)', binning=(0, 3600, 200), margin=0, save_as='..//Report Plots/section3/mZh.png')

In [None]:
plotfeature('mLL', 1000, r'$m_{ll}$ (GeV)', binning=(35, 135, 5), margin=0, save_as='..//Report Plots/section3/mLL.png')


### Negative Background Weights: Case 3 check 

In [None]:
pnn = PNN()
pnn.load_data('..//output_temp/report_output/data_dict_0.01.pkl', testing=True)
model = '..//output_temp/report_output/best_state_0.01.pth'
sig_all = pnn.test(model, 50, 2, remove_negatives=False, positive_bkg=False)
sig_noneg = pnn.test(model, 50, 2, remove_negatives=True, positive_bkg=False)
sig_pos = pnn.test(model, 50, 2, remove_negatives=False, positive_bkg=True)

sig_all = sig_all[:10] + [sig_all[-1]] + sig_all[10:-1]
sig_noneg = sig_noneg[:10] + [sig_noneg[-1]] + sig_noneg[10:-1]
sig_pos = sig_pos[:10] + [sig_pos[-1]] + sig_pos[10:-1]

In [None]:
diff1 = np.abs(np.array(sig_noneg) - np.array(sig_all))
print(((diff1 / np.array(sig_all)) *100).max())

diff2 = np.abs(np.array(sig_pos) - np.array(sig_all))
print(((diff2 / np.array(sig_all)) *100).max())

In [None]:
BDT = pd.read_excel('..//significancesBDT.xlsx')
BDT.drop(BDT[BDT.mass == 400].index, inplace=True)
BDT.reset_index(inplace=True)

signal_mass = [300, 420, 440, 460, 500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 2000]

plt.plot(signal_mass, sig_all, label=r'PNN with $b_i > 0$')
plt.plot(signal_mass, BDT.significance, label='BDT' )
plt.plot(signal_mass, sig_pos, label='PNN pos')
plt.plot(signal_mass, sig_noneg, '--', label='PNN without negative background weights')
plt.xlabel(r'$m_A$', fontsize=12)
plt.ylabel('Significance', fontsize=12)

plt.legend(fontsize=11)

### Classifier outputs

In [None]:
pnn = PNN()
pnn.load_data('..//output_temp/report_output/data_dict_1.pkl', testing=True)
model = '..//output_temp/report_output/best_state_1.pth'
_ = pnn.test(model, 50, 2, remove_negatives=False, positive_bkg=False)

In [None]:
pnn = PNN()
pnn.load_data('..//output_temp/report_output/data_dict_0.01.pkl', testing=True)
model = '..//output_temp/report_output/best_state_0.01.pth'
_ = pnn.test(model, 50, 2, remove_negatives=False, positive_bkg=False)

### Signal scaling on validation set

In [None]:
# To get all the significances
pnn = PNN()
significance_curves = []
for file in [0.001, 0.01, 0.1, 1, 10.0]:
    pnn.load_data(f'..//output_temp/report_output/data_dict_{file}.pkl', validating=True)
    model = f'..//output_temp/report_output/best_state_{file}.pth'
    significances = pnn.validate(model, 50, 2)
    significance_curves.append(significances)

In [None]:
signal_mass = [300, 420, 440, 460, 500, 600, 700, 800, 900, 1000, 1400, 1600, 2000]
x = 0.001
print('Average increase in significance:')
for significance in significance_curves:
    print(x, ':', (np.array(significance) - np.array(significance_curves[3])).mean())
    plt.plot(signal_mass, significance, label=x)
    plt.scatter(signal_mass, significance, s=8)
    x *= 10
plt.xlabel(r'$m_{A}$ (GeV)', fontsize=12)
plt.ylabel('Significance', fontsize=12)
plt.legend(title='Signal scaling factor')
plt.savefig('..//Report Plots/section5/scaling_factors.pdf')
plt.show()

### PNN Vs. BDT Significance

In [None]:
BDT = pd.read_excel('..//significancesBDT.xlsx')
BDT.drop(BDT[BDT.mass == 400].index, inplace=True)
BDT.reset_index(inplace=True)
PNN = pd.read_csv('..//significancesPNN.csv')
df = pd.concat([PNN, BDT], axis=1)
df

In [None]:
plt.plot(df.mass, df.iloc[:, 1])
plt.scatter(df.mass, df.iloc[:, 1], label='PNN')
plt.plot(df.mass, df.iloc[:, 4])
plt.scatter(df.mass, df.iloc[:, 4], label='BDT')
plt.xlabel(r'$m_A$ (GeV)', fontsize=12)
plt.ylabel('Significance', fontsize=12)
plt.legend(fontsize=11)
plt.savefig('..//Report Plots/section5/PNN-BDT_significance.pdf')

### Plotting loss vs epoch

In [None]:
with open('..//Results/02-history_dict.pkl', 'rb') as f:
    f = pickle.load(f)
plt.figure(figsize=(6.8, 4.8))
x = np.arange(1, len(f['7'][0]) + 1, 1)
plt.plot(x, f['7'][0], label='Training')
plt.plot(x, f['7'][1], label='Validation')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
s = 'hidden layers = 2 \n'+'neurons per layer = 50 \n'+r'$\eta = 0.7$'+'\n'+r'$\gamma = 0.99$'+'\n'+'activation = ReLU'
plt.text(50, 0.002, s, fontsize=11)
plt.legend(fontsize=11)
plt.savefig('..//Report Plots/section5/loss_curve-22.pdf')
plt.show()

In [None]:
with open('..//Results/01-history_dict.pkl', 'rb') as f:
    f = pickle.load(f)
    
plt.figure(figsize=(6.8, 4.8))
x = np.arange(1, len(f['76'][0]) + 1, 1)
plt.plot(x, f['76'][0], label='Training')
plt.plot(x, f['76'][1], label='Validation')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
s = 'hidden layers = 3 \n'+'neurons per layer = 200 \n'+r'$\eta = 0.01$'+'\n'+r'$\gamma = 0.99$'+'\n'+'activation = ReLU'
plt.text(58, 0.006, s, fontsize=11)
plt.legend(fontsize=11)
plt.savefig('..//Report PLots/section5/loss_curve-1.pdf')
plt.show()

In [None]:
with open('..//output_temp/history_dict.pkl', 'rb') as f:
    f = pickle.load(f)
    
plt.figure(figsize=(7.2, 4.8))
x = np.arange(1, len(f['1'][0]) + 1, 1)
plt.plot(x, f['1'][0], label='Training')
plt.plot(x, f['1'][1], label='Validation')
plt.xlabel('Epoch', fontsize=12)
# plt.xticks(np.arange(0, 24, 4))
plt.ylabel('Loss', fontsize=12)
s = 'hidden layers = 2 \n'+'neurons per layer = 50 \n'+r'$\eta = 1.4$'+'\n'+r'$\gamma = 0.999$'+'\n'+'activation = ReLU'
plt.text(30, 0.00175, s, fontsize=11)
plt.legend(fontsize=11)
plt.savefig('..//Report Plots/section5/loss_curve-3.pdf')
plt.show()