# Read and save outputs

In [1]:
# python libraries
import glob
import numpy as np
import h5py
import math
import time
import matplotlib.pyplot as plt
import os
import getpass

# open access to cernbox
os.system("echo %s| kinit" %getpass.getpass())

········


0

## Functions:

In [27]:

def collect_t(DIR_IN, DIR_OUT):
    '''
    For each toy the function reads the .txt file where the final value for the variable t=-2*loss is saved. 
    It then associates a label to each toy.
    The array of the t values (tvalues) and the array of labels (files_id) are saved in an .h5 file.
    
    DIR_IN: directory where all the toys' outputs are saved
    DIR_OUT: directory where to save the .h5 output file
    
    The function returns the array of labels.
    '''
    tvalues = np.array([])
    files_id = np.array([])
    FILE_TITLE=''
    for fileIN in glob.glob("%s/*_t.txt" %DIR_IN):
        #print(fileIN)
        f = open(fileIN)
        lines = f.readlines()
        file_id=  fileIN.split('/')[-1]
        FILE_TITLE = fileIN.split('/')[-2]
        file_id = file_id.replace('_t.txt', '')
        #print(file_id)
        if len(lines)==0:
            continue
        t = float(lines[0])
        #print(file_id)
        if(np.isnan(np.array([t]))): 
            continue    
        tvalues = np.append(tvalues, t)
        files_id = np.append(files_id, file_id)
        
    # save tvalues in a h5 file
    f = h5py.File(DIR_OUT+ FILE_TITLE+'_tvalues.h5', 'w')
    f.create_dataset('tvalues', data=tvalues, compression='gzip')
    f.create_dataset('files_id', data=files_id, compression='gzip')
    f.close()
    
    return files_id

def collect_history(files_id, DIR_IN, patience):
    '''
    For each toy whose file ID is in the array files_id, 
    the function collects the history of the loss and saves t=-2*loss at the check points.
    
    files_id: array of toy labels 
    DIR_IN: directory where all the toys' outputs are saved
    patience: interval between two check points (epochs)
    
    The function returns a 2D-array with final shape (nr toys, nr check points).
    '''
    tdistributions_check =np.array([])
    cnt=0
    for file_id in files_id:
        history_file = DIR_IN+file_id+'_history'+str(patience)+'.h5'
        #print(history_file)
        f = h5py.File(history_file)
        loss = f.get("loss")
        if not loss:
            continue
        loss = np.array(loss)
        loss = np.expand_dims(loss, axis=1)
        if not cnt:
            # initialize the array at the first iteration
            tdistributions_check = -2*loss
        else:
            # just append to tdistributions_check
            tdistributions_check = np.concatenate((tdistributions_check, -2*loss), axis=1)
        print(str(cnt)+': toy '+file_id+' loaded.')
        cnt = cnt+1
        #print(tdistributions_check.shape)
    print('Final history array shape')
    print('(nr toys, nr check points)')
    print(tdistributions_check.T.shape)
    return tdistributions_check.T

def Save_to_h5(DIR_OUT, file_name, extension, patience, tvalues_check):
    '''
    The function save the 2D-array of the loss histories in an .h5 file.
    
    DIR_OUT: directory where to save the output file
    file_name: output file name
    extension: label to be appended to the file_name
    
    No return.
    '''
    epochs_check = []
    nr_check_points = tvalues_check.shape[1]
    for i in range(nr_check_points):
        epoch_check = patience*(i+1)
        epochs_check.append(epoch_check)
        
    log_file = DIR_OUT+file_name+extension+'.h5' #'_tvalues_check.h5'
    print(log_file)
    f = h5py.File(log_file,"w")
    for i in range(tvalues_check.shape[1]):
        f.create_dataset(str(epochs_check[i]), data=tvalues_check[:, i], compression='gzip')
    f.close()
    print('Saved to file: ' +file_name+extension+'.h5')
    return

## Set parameters:

In [32]:
# output directory
output_path='../BINNED/Zsamples/validation_CHI2/Scanning_outputs/300GeV/'
if not os.path.exists(output_path):
    os.makedirs(output_path)

# background only directory
DIR_INPUT_B = '/eos/user/g/ggrosso/BINNED/Zsamples/300GeV/Z_5D_corrected_std_patience5000_ref1000000_bkg100000_sig0_epochs300000_latent5_layers3_wclip1.9/'
# signal+background directory
DIR_INPUT_SB = '/eos/user/g/ggrosso/BINNED/Zsamples/300GeV/Z_5D_std_corrected_patience5000_ref1000000_bkg100000_sig40_epochs300000_latent5_layers3_wclip1.9/'

patience_B = DIR_INPUT_B.split("patience",1)[1] 
patience_B = patience_B.split("_",1)[0]
patience_SB = DIR_INPUT_SB.split("patience",1)[1] 
patience_SB = patience_SB.split("_",1)[0]


## Read and Save - BKG only

In [33]:
if not DIR_INPUT_B.endswith('/'):
    DIR_INPUT_B=DIR_INPUT_B+'/'
    
title_B = DIR_INPUT_B.split('/')[-2]
print(title_B)

files_id_B = collect_t(DIR_INPUT_B, output_path)
print('Loaded') 
tvalues_check_B = collect_history(files_id_B, DIR_INPUT_B, int(patience_B))

Z_5D_corrected_std_patience5000_ref1000000_bkg100000_sig0_epochs300000_latent5_layers3_wclip1.9
Loaded
0: toy Toy5D_patience5000_1000000ref_100000_0_0 loaded.
1: toy Toy5D_patience5000_1000000ref_100000_0_100 loaded.
2: toy Toy5D_patience5000_1000000ref_100000_0_101 loaded.
3: toy Toy5D_patience5000_1000000ref_100000_0_102 loaded.
4: toy Toy5D_patience5000_1000000ref_100000_0_103 loaded.
5: toy Toy5D_patience5000_1000000ref_100000_0_104 loaded.
6: toy Toy5D_patience5000_1000000ref_100000_0_105 loaded.
7: toy Toy5D_patience5000_1000000ref_100000_0_106 loaded.
8: toy Toy5D_patience5000_1000000ref_100000_0_107 loaded.
9: toy Toy5D_patience5000_1000000ref_100000_0_108 loaded.
10: toy Toy5D_patience5000_1000000ref_100000_0_109 loaded.
11: toy Toy5D_patience5000_1000000ref_100000_0_10 loaded.
12: toy Toy5D_patience5000_1000000ref_100000_0_110 loaded.
13: toy Toy5D_patience5000_1000000ref_100000_0_111 loaded.
14: toy Toy5D_patience5000_1000000ref_100000_0_112 loaded.
15: toy Toy5D_patience500

139: toy Toy5D_patience5000_1000000ref_100000_0_25 loaded.
140: toy Toy5D_patience5000_1000000ref_100000_0_27 loaded.
141: toy Toy5D_patience5000_1000000ref_100000_0_28 loaded.
142: toy Toy5D_patience5000_1000000ref_100000_0_29 loaded.
143: toy Toy5D_patience5000_1000000ref_100000_0_2 loaded.
144: toy Toy5D_patience5000_1000000ref_100000_0_30 loaded.
145: toy Toy5D_patience5000_1000000ref_100000_0_31 loaded.
146: toy Toy5D_patience5000_1000000ref_100000_0_32 loaded.
147: toy Toy5D_patience5000_1000000ref_100000_0_33 loaded.
148: toy Toy5D_patience5000_1000000ref_100000_0_34 loaded.
149: toy Toy5D_patience5000_1000000ref_100000_0_35 loaded.
150: toy Toy5D_patience5000_1000000ref_100000_0_36 loaded.
151: toy Toy5D_patience5000_1000000ref_100000_0_38 loaded.
152: toy Toy5D_patience5000_1000000ref_100000_0_39 loaded.
153: toy Toy5D_patience5000_1000000ref_100000_0_3 loaded.
154: toy Toy5D_patience5000_1000000ref_100000_0_40 loaded.
155: toy Toy5D_patience5000_1000000ref_100000_0_41 loaded.

In [34]:
Save_to_h5(output_path, title_B, '_tvalues_check', int(patience_B), tvalues_check_B)

../BINNED/Zsamples/validation_CHI2/Scanning_outputs/300GeV/Z_5D_corrected_std_patience5000_ref1000000_bkg100000_sig0_epochs300000_latent5_layers3_wclip1.9_tvalues_check.h5
Saved to file: Z_5D_corrected_std_patience5000_ref1000000_bkg100000_sig0_epochs300000_latent5_layers3_wclip1.9_tvalues_check.h5


## Read and Save - SIG+BKG 

In [35]:
if not DIR_INPUT_SB.endswith('/'):
    DIR_INPUT_SB=DIR_INPUT_SB+'/'
    
title_SB = DIR_INPUT_SB.split('/')[-2]
print(title_SB)

files_id_SB = collect_t(DIR_INPUT_SB, output_path)
print('Loaded') 
tvalues_check_SB = collect_history(files_id_SB, DIR_INPUT_SB, int(patience_SB))

Z_5D_std_corrected_patience5000_ref1000000_bkg100000_sig40_epochs300000_latent5_layers3_wclip1.9
Loaded
0: toy Toy5D_patience5000_1000000ref_100000_40_100 loaded.
1: toy Toy5D_patience5000_1000000ref_100000_40_101 loaded.
2: toy Toy5D_patience5000_1000000ref_100000_40_102 loaded.
3: toy Toy5D_patience5000_1000000ref_100000_40_103 loaded.
4: toy Toy5D_patience5000_1000000ref_100000_40_104 loaded.
5: toy Toy5D_patience5000_1000000ref_100000_40_105 loaded.
6: toy Toy5D_patience5000_1000000ref_100000_40_106 loaded.
7: toy Toy5D_patience5000_1000000ref_100000_40_107 loaded.
8: toy Toy5D_patience5000_1000000ref_100000_40_108 loaded.
9: toy Toy5D_patience5000_1000000ref_100000_40_109 loaded.
10: toy Toy5D_patience5000_1000000ref_100000_40_10 loaded.
11: toy Toy5D_patience5000_1000000ref_100000_40_110 loaded.
12: toy Toy5D_patience5000_1000000ref_100000_40_111 loaded.
13: toy Toy5D_patience5000_1000000ref_100000_40_112 loaded.
14: toy Toy5D_patience5000_1000000ref_100000_40_113 loaded.
15: toy

144: toy Toy5D_patience5000_1000000ref_100000_40_24 loaded.
145: toy Toy5D_patience5000_1000000ref_100000_40_26 loaded.
146: toy Toy5D_patience5000_1000000ref_100000_40_27 loaded.
147: toy Toy5D_patience5000_1000000ref_100000_40_28 loaded.
148: toy Toy5D_patience5000_1000000ref_100000_40_29 loaded.
149: toy Toy5D_patience5000_1000000ref_100000_40_2 loaded.
150: toy Toy5D_patience5000_1000000ref_100000_40_30 loaded.
151: toy Toy5D_patience5000_1000000ref_100000_40_31 loaded.
152: toy Toy5D_patience5000_1000000ref_100000_40_32 loaded.
153: toy Toy5D_patience5000_1000000ref_100000_40_33 loaded.
154: toy Toy5D_patience5000_1000000ref_100000_40_34 loaded.
155: toy Toy5D_patience5000_1000000ref_100000_40_35 loaded.
156: toy Toy5D_patience5000_1000000ref_100000_40_36 loaded.
157: toy Toy5D_patience5000_1000000ref_100000_40_37 loaded.
158: toy Toy5D_patience5000_1000000ref_100000_40_38 loaded.
159: toy Toy5D_patience5000_1000000ref_100000_40_39 loaded.
160: toy Toy5D_patience5000_1000000ref_10

In [36]:
Save_to_h5(output_path, title_SB, '_tvalues_check', int(patience_SB), tvalues_check_SB)

../BINNED/Zsamples/validation_CHI2/Scanning_outputs/300GeV/Z_5D_std_corrected_patience5000_ref1000000_bkg100000_sig40_epochs300000_latent5_layers3_wclip1.9_tvalues_check.h5
Saved to file: Z_5D_std_corrected_patience5000_ref1000000_bkg100000_sig40_epochs300000_latent5_layers3_wclip1.9_tvalues_check.h5
