In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import json
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import mplhep as hep
from sklearn.metrics import roc_curve, auc
from tqdm.auto import tqdm
import copy
import train_evaluate_pnn as te
import csv 
import os
from sklearn.preprocessing import StandardScaler
import joblib
hep.style.use("CMS")
auclist=[]
device = 'cuda' if torch.cuda.is_available() else 'cpu'


def separate_output_score(output_score,y):
    dict_ = {'pred':output_score.cpu().detach().numpy().flatten(),'true':y}
    temp_df = pd.DataFrame(dict_)
    signal_output_score = temp_df.loc[temp_df['true'] == 1]['pred']
    background_output_score = temp_df.loc[temp_df['true'] == 0]['pred']
    return signal_output_score,background_output_score



In [7]:

'''
    Add the add on name in the directory, by default if directory_add_on = '' then the folder in which the pNN and all graphs/data will be 
    stored in will be: 'models_27_features_[50,50,50]' when using 27 features and nodes [50,50,50].
    if directory_add_on = '_high_learning_rate' then the folder will be: 'models_27_features_[50,50,50]_high_learning_rate'
    (please always add an '_' then your add-on message
'''

directory_add_on = '_test'

# Update these parameters to your liking.
learningrate = 0.0001
plot_learning_rate='yes'
scheduler_type='Custom'
epoch = 999999
patience = 100
batch_size = 4096
model_type ='char'
save_models = True
scheduler = True
dynamic_batch = False
num_of_features = 27
architectures= [[50,100,100,50]] 

In [8]:
'''
    Simply run this cell
'''
feature_list = []
sorted_features = {}

signal_masses = ["260","270","280","290","300","320","350","400","450","500","550","600","650","700","750","800","900"]

for signal_mass in signal_masses:
    signal = "GluGluToRadionToHHTo2G2Tau_M-"+signal_mass
    GluGluToRadionToHHTo2G2Tau_AUC_NN = pd.read_csv(f"Feature_list/{signal}_AUC_NN.csv", index_col = False, on_bad_lines='skip')
    dict_ = {GluGluToRadionToHHTo2G2Tau_AUC_NN.columns[i]:GluGluToRadionToHHTo2G2Tau_AUC_NN[GluGluToRadionToHHTo2G2Tau_AUC_NN.columns[i]][0] for i in range(len(GluGluToRadionToHHTo2G2Tau_AUC_NN.columns))}
    dict_ = {k: v for k, v in sorted(dict_.items(), key=lambda item: item[1], reverse=True)}
    list_ = list(dict_.keys())
    sorted_features[signal_mass] = list_


for sig in sorted_features.keys():
    for i in range(num_of_features):
        if sorted_features[sig][i] not in feature_list:
            feature_list.append(sorted_features[sig][i])

feature_list = feature_list + ['MX']

print(f'>> List of features being used: {feature_list}')

>> List of features being used: ['reco_MX_mgg', 'dilep_leadpho_mass', 'ditau_pt', 'Diphoton_dPhi', 'ditau_dphi', 'Diphoton_pt_mgg', 'LeadPhoton_pt_mgg', 'MET_pt', 'LeadPhoton_lead_lepton_dR', 'ditau_dR', 'lead_lepton_pt', 'Diphoton_ditau_deta', 'Diphoton_lead_lepton_dR', 'LeadPhoton_ditau_dR', 'SubleadPhoton_lead_lepton_dR', 'jet_1_pt', 'Diphoton_sublead_lepton_dR', 'Diphoton_ditau_dphi', 'ditau_deta', 'ditau_met_dPhi', 'ditau_mass', 'Diphoton_sublead_lepton_deta', 'SubleadPhoton_pt_mgg', 'Diphoton_mass', 'Diphoton_lead_lepton_deta', 'diphoton_met_dPhi', 'lead_lepton_mass', 'MX']


In [9]:
'''
    Simply run this cell, you can terminate when satisfied / wait for epochs to finish / wait for early stopping.
'''

# Do not change this (This feature does not work yet).
single_mass = False

x_test_global = None
scaler_global = None
best_epoch_global = None
allmasses=['260','270','280','290','300','320','350','400','450','500','550','600','650','700','750','800','900','1000']

auclist=[]
for nodes in architectures:
    directory = f'models_{num_of_features}_features_{nodes}{directory_add_on}'
    
    signal_df, background_df, combine_df, add_to_test_df = te.read_dataframes()

    x_train,x_test,x_train_original,x_test_original = te.getTrainTestSplit(combine_df,add_to_test_df)

    x_test_global = x_test.copy()
    
    models,epoch_loss_train,epoch_loss_test,output_score,output_score_train, learning_rate_epochs,scaler,best_epoch,directory,signal_output_score_train_dict,signal_output_score_test_dict = te.trainNetwork_no_weights(x_train, x_test, feature_list, learningrate, epoch = epoch, save_models=save_models,
                        batch_size = batch_size, nodes = nodes, patience = patience, model_type=model_type,scheduler_type=scheduler_type,directory = directory,scheduler = scheduler,dynamic_batch = dynamic_batch,single_mass = single_mass)

    scaler_global = scaler
    best_epoch_global = best_epoch

    for i,mass_eval in enumerate(allmasses):

        signal_mass = int(mass_eval)
        
        plt.figure()
        plt.plot(signal_output_score_train_dict[signal_mass], label = 'Train')
        plt.plot(signal_output_score_test_dict[signal_mass], label = 'Test')
        plt.axvline(best_epoch, label = f'Best Performing Epoch - {best_epoch}')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.title(f'Loss per Epoch for Signal {signal_mass} GeV')
        plt.legend()
        plt.savefig(f'{directory}/signal_{signal_mass}_loss_graph.png',format = 'png', dpi = 400)
        plt.show()
        

    signal_output_score,background_output_score = separate_output_score(output_score,x_test['y'])

    
    fpr, tpr, thresholds = roc_curve(x_test['y'], output_score.cpu().detach().numpy())
    roc_auc = auc(fpr, tpr)
    print(f'Printing AUC Score:{roc_auc} for pNN of architecture {(nodes)}')
    
    fig, axs = plt.subplots(1, 3, figsize=(24, 10))
        
    axs[0].plot(epoch_loss_train, label = 'Train')
    axs[0].plot(epoch_loss_test, label = 'Test')
    axs[0].axvline(best_epoch, label = f'Best Performing Epoch - {best_epoch}')
    axs[0].set_ylabel('Loss')
    axs[0].set_xlabel('Epoch')
    axs[0].set_title('Loss per Epoch')
    if plot_learning_rate == 'yes':
        ax1_twin = axs[0].twinx()
        ax1_twin.plot(learning_rate_epochs, label='Learning Rate', linestyle='--', color='red')
        ax1_twin.set_ylabel('Learning Rate')
        ax1_twin.tick_params(axis='y')
    axs[0].legend(loc='upper right')

    axs[1].hist(signal_output_score, label = 'Signal',bins=80,histtype='step')
    axs[1].hist(background_output_score, label = 'Background',bins=80,histtype='step')
    axs[1].set_xlabel("Output Score")
    axs[1].set_title('Classification Distribution')
    axs[1].set_ylabel("Frequency")
    axs[1].legend()
    
    axs[2].plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.3f})')
    axs[2].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    axs[2].set_xlim([0.0, 1.0])
    axs[2].set_ylim([0.0, 1.05])
    axs[2].set_xlabel('False Positive Rate')
    axs[2].set_ylabel('True Positive Rate')
    axs[2].set_title('Receiver Operating Characteristic',fontsize=18)
    axs[2].legend(loc="lower right")
    plt.tight_layout()
    plt.legend()
    #plt.title(f'{signal_names[l]}')
    fig.suptitle(f'pNN: architecture={nodes}, lr={learningrate}', fontsize=16)
    plt.savefig(f'{directory}/graph.png',format = 'png')
    plt.show()
    #plt.savefig(f'lossclassROC_m={mass}_arch={nodes}_lr={learningrate}.png')
    #plt.savefig(f'savetrial1000.png')

    
    auclist.append([roc_auc,nodes])
    
featurescore_df = pd.DataFrame(auclist, columns=['score', 'nodes per hidden layer'])
    
    #output_file_path = f'OptimisedArch310124_{mass}_lr{learningrate}_[50,50,50].csv'
    #featurescore_df.to_csv(output_file_path, index=False)
         


>> Loading data ...
>> Splitting train/test data ...
>> Directory created successfully ...
>> Training...


  0%|          | 0/999999 [00:00<?, ?it/s]

Epoch 0


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 8.361380605492741e-05
Test Epoch Loss: 8.424474799539894e-05
Learning rate: 0.0001
Epoch 1


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 7.430412370013073e-05
Test Epoch Loss: 7.486843242077157e-05
Learning rate: 0.0001
Epoch 2


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 6.519590533571318e-05
Test Epoch Loss: 6.589547410840169e-05
Learning rate: 0.0001
Epoch 3


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 5.940645860391669e-05
Test Epoch Loss: 6.021554145263508e-05
Learning rate: 0.0001
Epoch 4


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 5.6677163229323924e-05
Test Epoch Loss: 5.7262896007159725e-05
Learning rate: 0.0001
Epoch 5


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 5.393407991505228e-05
Test Epoch Loss: 5.462321496452205e-05
Learning rate: 0.0001
Epoch 6


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 5.188928480492905e-05
Test Epoch Loss: 5.225995846558362e-05
Learning rate: 0.0001
Epoch 7


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.996317147742957e-05
Test Epoch Loss: 5.0419253966538236e-05
Learning rate: 0.0001
Epoch 8


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.83298790641129e-05
Test Epoch Loss: 4.865687878918834e-05
Learning rate: 0.0001
Epoch 9


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.712318332167342e-05
Test Epoch Loss: 4.7843248466961086e-05
Learning rate: 0.0001
Epoch 10


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.627661110134795e-05
Test Epoch Loss: 4.6606899559265e-05
Learning rate: 5.534565934794955e-05
Epoch 11


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.5863787818234414e-05
Test Epoch Loss: 4.6061515604378656e-05
Learning rate: 5.534565934794955e-05
Epoch 12


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.558968430501409e-05
Test Epoch Loss: 4.5803641114616767e-05
Learning rate: 5.534565934794955e-05
Epoch 13


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.522970266407356e-05
Test Epoch Loss: 4.542417809716426e-05
Learning rate: 5.534565934794955e-05
Epoch 14


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.496972542256117e-05
Test Epoch Loss: 4.540884037851356e-05
Learning rate: 5.534565934794955e-05
Epoch 15


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.472458749660291e-05
Test Epoch Loss: 4.485401586862281e-05
Learning rate: 5.534565934794955e-05
Epoch 16


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.4330830860417336e-05
Test Epoch Loss: 4.468471524887718e-05
Learning rate: 5.534565934794955e-05
Epoch 17


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.442229328560643e-05
Test Epoch Loss: 4.446454477147199e-05
Learning rate: 5.534565934794955e-05
Epoch 18


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.4230371713638306e-05
Test Epoch Loss: 4.437069219420664e-05
Learning rate: 5.534565934794955e-05
Epoch 19


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.413404531078413e-05
Test Epoch Loss: 4.408637687447481e-05
Learning rate: 5.534565934794955e-05
Epoch 20


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.3678712245309725e-05
Test Epoch Loss: 4.408671156852506e-05
Learning rate: 5.223863627179526e-05
Epoch 21


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.370329770608805e-05
Test Epoch Loss: 4.370748865767382e-05
Learning rate: 5.223863627179526e-05
Epoch 22


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.344992703408934e-05
Test Epoch Loss: 4.3647429265547544e-05
Learning rate: 5.223863627179526e-05
Epoch 23


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

  0%|          | 0/195 [00:00<?, ?it/s]

Train Epoch Loss: 4.330692536314018e-05
Test Epoch Loss: 4.3754465878009796e-05
Learning rate: 5.223863627179526e-05
Epoch 24


  0%|          | 0/388 [00:00<?, ?it/s]

>> Getting train/loss for entire pNN ...


  0%|          | 0/388 [00:00<?, ?it/s]

KeyboardInterrupt: 