In [1]:
# process data and save to memory as variables, not storage
import sys
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import os
import numpy as np
from os import listdir
from os.path import isfile, join
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from PyPDF2 import PdfMerger
from tqdm import trange

2023-08-10 23:07:14.350879: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
wireplane='U'

In [3]:
# takes full raw data and extracts waveform of length nticks
def extract_wave(data, nticks=200):
    string = 'tck_'
    waveforms = []
    #Here I extract a column in each iteration and append to list
    for i in range(nticks):
        waveforms.append(data[string+str(i)].astype(np.int16))
    #convert to numpy ndarray
    waveforms = np.array(waveforms).astype(np.int16)
    #since raws and columns are inverted we need to transpose it
    return np.transpose(waveforms)

# takes full raw data and returns waveform of length nticks
# only keeps waves at a desired adc count 
def get_std_waveforms(data_noisy, data_clean, nticks=200, min_adc=5):
    #Extract and scale waveform data (passthrough rn)
    raw_waveforms_noisy = extract_wave(data_noisy, nticks)
    raw_waveforms_clean = extract_wave(data_clean, nticks)
    #print('before adc filter: ', raw_waveforms_noisy.shape, raw_waveforms_clean.shape)

    noisy_ = []
    clean_ = []

    for i, wave in enumerate(raw_waveforms_clean):
        if max(wave) >= min_adc:
            noisy_.append(raw_waveforms_noisy[i])
            clean_.append(wave)
    
    del raw_waveforms_noisy, raw_waveforms_clean

    noisy_ = np.array(noisy_)
    clean_ = np.array(clean_)

    #print('after adc filter: ', noisy_.shape, clean_.shape)
    #print(raw_waveforms) 
    #scaled_waveforms = waveform_scaler.fit_transform(raw_waveforms)
    return noisy_, clean_



In [4]:
import tarfile
import os

# nuCC
dir_noisy_nu_cc = '/home/vlian/Workspace/train_dune_lartpc_v2/new_aug_10/nu_cc/'+wireplane+'/noisy_signal/'  # Directory to extract files
dir_clean_nu_cc = '/home/vlian/Workspace/train_dune_lartpc_v2/new_aug_10/nu_cc/'+wireplane+'/clean_signal/'  # Directory to extract files

noisy_names_cc = os.listdir(dir_noisy_nu_cc)
noisy_names_cc = sorted(noisy_names_cc)


clean_names_cc = os.listdir(dir_clean_nu_cc)
clean_names_cc = sorted(clean_names_cc)

# nuES
dir_noisy_nu_es = '/home/vlian/Workspace/train_dune_lartpc_v2/new_aug_10/nu_es/'+wireplane+'/noisy_signal/'  # Directory to extract files
dir_clean_nu_es = '/home/vlian/Workspace/train_dune_lartpc_v2/new_aug_10/nu_es/'+wireplane+'/clean_signal/'  # Directory to extract files

noisy_names_es = os.listdir(dir_noisy_nu_es)
noisy_names_es = sorted(noisy_names_es)

clean_names_es = os.listdir(dir_clean_nu_es)
clean_names_es = sorted(clean_names_es)


In [5]:
print(len(noisy_names_cc), len(clean_names_cc))
print(len(noisy_names_es), len(clean_names_es))

52 52
188 188


In [6]:
print(noisy_names_cc[0], '---', clean_names_cc[0])
print(noisy_names_es[0], '---', clean_names_es[0])

snb-nucc-en0-U-signal-3363366-0-0.npy --- snb-nucc-en0-U-clnsig-3363366-0-0.npy
snb-nues-en0-U-signal-63647624-0-0.npy --- snb-nues-en0-U-clnsig-63647624-0-0.npy


### seperate by energy

In [7]:
def get_wave_by_ENRG(energy_idx, noisy_filenames, clean_filenames, interaction_type=True):
    if interaction_type:
        noisy_path = dir_noisy_nu_cc
        clean_path = dir_clean_nu_cc
    else:
        noisy_path = dir_noisy_nu_es
        clean_path = dir_clean_nu_es

    file_names_noisy = [file for file in noisy_filenames if 'en'+str(energy_idx) in file ]
    file_names_clean = [file for file in clean_filenames if 'en'+str(energy_idx) in file ]

    noisy_waveforms = np.empty((0, 200))
    clean_waveforms = np.empty((0, 200))

    for i, file_name in enumerate(file_names_noisy):
        noisy_file_path = os.path.join(noisy_path, file_name)
        clean_file_path = os.path.join(clean_path, file_names_clean[i])
        

        noisy = np.load(noisy_file_path)
        clean = np.load(clean_file_path)

        noisy_wf, clean_wf = get_std_waveforms(noisy, clean, nticks=200, min_adc=5)
        
        noisy_waveforms = np.concatenate((noisy_waveforms, noisy_wf))
        clean_waveforms = np.concatenate((clean_waveforms, clean_wf))

    return [noisy_waveforms, clean_waveforms]


In [8]:
all_waveforms_nu_CC = []
all_waveforms_nu_ES = []
for i in range(10):
    all_waveforms_nu_CC.append(get_wave_by_ENRG(i, noisy_names_cc, clean_names_cc))
    all_waveforms_nu_ES.append(get_wave_by_ENRG(i, noisy_names_es, clean_names_es, False))

In [9]:
for i, en in enumerate(all_waveforms_nu_CC):
    print('en',i,':', 'nu_CC',en[0].shape, en[1].shape,'---','nu_ES', all_waveforms_nu_ES[i][0].shape, all_waveforms_nu_ES[i][1].shape)

en 0 : nu_CC (4452, 200) (4452, 200) --- nu_ES (7035, 200) (7035, 200)
en 1 : nu_CC (4497, 200) (4497, 200) --- nu_ES (6468, 200) (6468, 200)
en 2 : nu_CC (4700, 200) (4700, 200) --- nu_ES (6402, 200) (6402, 200)
en 3 : nu_CC (4794, 200) (4794, 200) --- nu_ES (6251, 200) (6251, 200)
en 4 : nu_CC (4782, 200) (4782, 200) --- nu_ES (6271, 200) (6271, 200)
en 5 : nu_CC (5995, 200) (5995, 200) --- nu_ES (6398, 200) (6398, 200)
en 6 : nu_CC (6121, 200) (6121, 200) --- nu_ES (6774, 200) (6774, 200)
en 7 : nu_CC (5995, 200) (5995, 200) --- nu_ES (6092, 200) (6092, 200)
en 8 : nu_CC (6043, 200) (6043, 200) --- nu_ES (6919, 200) (6919, 200)
en 9 : nu_CC (6883, 200) (6883, 200) --- nu_ES (6584, 200) (6584, 200)


### Load Noise

In [10]:
noise_path = '/home/vlian/Workspace/train_dune_lartpc_v2/noise/'+wireplane+'/'
noise_filenames = sorted([f for f in listdir(noise_path) if (isfile(join(noise_path, f)) and wireplane in f)])
combined_noise = np.concatenate([np.load(noise_path+fname, mmap_mode='r') for fname in noise_filenames])

noise_waveforms = extract_wave(combined_noise)
roi_truth_noise = np.zeros(noise_waveforms.shape[0]) # for roi-finding
print(noise_waveforms.shape, roi_truth_noise.shape)

(100000, 200) (100000,)


### TEST

In [11]:
model_5_10_mean = np.load('../models_scales/mean_5_10' + wireplane + '_nu.npy')
model_5_10_std = np.load('../models_scales/scale_5_10' + wireplane + '_nu.npy')

model_5_15_mean = np.load('../models_scales/mean_5_15' + wireplane + '_nu.npy')
model_5_15_std = np.load('../models_scales/scale_5_15' + wireplane + '_nu.npy')

model_5_18_mean = np.load('../models_scales/mean_5_18' + wireplane + '_nu.npy')
model_5_18_std = np.load('../models_scales/scale_5_18' + wireplane + '_nu.npy')

model_60k_mean = np.load('../models_scales/mean_60k' + wireplane + '_nu.npy')
model_60k_std = np.load('../models_scales/scale_60k' + wireplane + '_nu.npy')

In [12]:
scalers = [[model_5_10_mean, model_5_10_std], [model_5_15_mean, model_5_15_std], 
           [model_5_18_mean, model_5_18_std], [model_60k_mean, model_60k_std]]

In [13]:
model_5_10 = load_model('../ROI_ar39_models/model_5_10' + wireplane + 'plane_nu_ROI.h5')

model_5_15 = load_model('../ROI_ar39_models/model_5_15' + wireplane + 'plane_nu_ROI.h5')

model_5_18 = load_model('../ROI_ar39_models/model_5_18' + wireplane + 'plane_nu_ROI.h5')

model_60k = load_model('../ROI_ar39_models/model_60k' + wireplane + 'plane_nu_ROI.h5')

2023-08-10 23:07:26.920468: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-08-10 23:07:26.957185: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-08-10 23:07:26.957566: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [14]:
models = [model_5_10, model_5_15, model_5_18, model_60k]

In [15]:
def eval_model(idx, cnn_min):
    model_idx = idx

    noise_scaled = (noise_waveforms-scalers[model_idx][0])/scalers[model_idx][1]
    infer = models[model_idx].predict(noise_scaled, verbose=0)
    
    return (len([i for i in infer if i > cnn_min])/len(infer))*100

In [16]:
for i in range(4):
    print(round(100-eval_model(i, 0.999), 5), round(100-eval_model(i, 0.94), 5))

2023-08-10 23:07:30.124514: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600


100.0 99.514
99.995 99.456
99.997 99.609
99.997 99.735


In [17]:
def eval_model_en(en_group_waveforms, model_idx, cnn_min):

    waveforms_scaled = (en_group_waveforms-scalers[model_idx][0])/scalers[model_idx][1]
    infer = models[model_idx].predict(waveforms_scaled, verbose=0)
    
    return (len([i for i in infer if i > cnn_min])/len(infer))*100

In [18]:
len(all_waveforms_nu_ES)

10

In [19]:
for i, signals_at_en in enumerate(all_waveforms_nu_ES):
    print('en:', i)
    for j in range(4):
        #print('    model:', j)
        print(round(eval_model_en(signals_at_en[0], j, 0.999), 2), round(eval_model_en(signals_at_en[0], j, 0.94), 50))
    print('--------------')

en: 0
82.29 90.14925373134328
84.19 90.29140014214641
83.87 89.70859985785359
83.26 89.29637526652452
--------------
en: 1
83.32 90.35250463821892
84.79 90.46072974644403
84.54 89.84230055658628
84.21 89.45578231292517
--------------
en: 2
83.72 90.56544829740706
85.18 90.64354889097157
84.9 90.06560449859418
84.6 89.62824117463293
--------------
en: 3
84.75 90.89745640697488
85.99 90.97744360902256
85.76 90.43353063509838
85.54 90.08158694608862
--------------
en: 4
83.78 89.50725562111306
84.93 89.77834476160102
84.88 89.12454154042418
84.61 88.80561313985011
--------------
en: 5
83.79 89.82494529540482
85.45 90.02813379180994
85.12 89.55923726164427
84.96 89.152860268834
--------------
en: 6
83.33 89.5482728077945
84.66 89.7106583997638
84.5 89.16445231768527
84.19 88.6330085621494
--------------
en: 7
83.63 89.51083388049902
84.93 89.67498358502954
84.59 89.33026920551544
84.46 88.91989494418911
--------------
en: 8
84.17 90.02746061569592
85.55 90.14308426073131
85.34 89.767307414

In [20]:
#HERE

### Denoising Autoencoder

In [21]:
model_AE_check = load_model('../../../archive/AutoEncoder-Current/models/model_AE_2048_no_pooling'+wireplane+'plane_nu.h5')
mean_check = np.load('/home/vlian/Workspace/LArTPC-1DCNN-AutoEncoder/Neutrino-Trained-New-Dataset/archive/AutoEncoder-Current/models/saved_models/AE_mean_'+wireplane+'.npy')
std_check = np.load('/home/vlian/Workspace/LArTPC-1DCNN-AutoEncoder/Neutrino-Trained-New-Dataset/archive/AutoEncoder-Current/models/saved_models/AE_std_'+wireplane+'.npy')

In [22]:
print(mean_check, std_check)

-0.0857434375 10.171227500917754


In [23]:
nu_CC_energy = {0: '.028-5.50 MeV',
                1: '5.50-7.60 MeV',
                2: '7.60-10.0 MeV',
                3: '10.0-12.0 MeV',
                4: '12.0-15.0 MeV',
                5: '15.0-17.0 MeV',
                6: '17.0-20.0 MeV',
                7: '20.0-24.0 MeV',
                8: '24.0-29.0 MeV',
                9: '29.0-85.0 MeV'
                }

nu_ES_energy = {0: '0.005-0.010 GeV',
                1: '0.010-0.013 GeV',
                2: '0.013-0.016 GeV',
                3: '0.016-0.019 GeV',
                4: '0.019-0.021 GeV',
                5: '0.021-0.024 GeV',
                6: '0.024-0.027 GeV',
                7: '0.027-0.031 GeV',
                8: '0.031-0.036 GeV',
                9: '0.036-0.079 GeV',

}


In [24]:
def make_single_pdf(x, y, predicted, interaction,energy, energy_range, wave_idx, pg_num):

    fig, axs = plt.subplots(3,2, figsize=(20, 12), facecolor='w', edgecolor='k')
    fig.subplots_adjust(hspace = .375, wspace=.1)

    axes = axs.ravel()

    for i in range(6):
        index_ = i + wave_idx
        wave_idx = index_
        axes[i].set_title(interaction + ': ' + energy_range + ' --- (peak adc: ' + str(max(y[wave_idx])) + ')')
        axes[i].plot(x[wave_idx], color='black', alpha=0.3, label='input')
        axes[i].plot(y[wave_idx], color='blue', label='target')
        axes[i].plot(predicted[wave_idx], color='m', label='prediction')
        axes[i].legend(fontsize=12)
    


    plt.savefig('./plots/tmp_u/tmp' +str(pg_num) + '.pdf',
                dpi=300,
                bbox_inches='tight', pad_inches=0.75)
    plt.close()

    return wave_idx

# creates and merges pdf, removes all single page pdfs from tmp folder
def make_complete_pdf(x, y, predicted, interaction, energy, energy_range, num_pages):
    wave_idx_ = 0
    page_num = 0

    while page_num < num_pages:
        wave_idx_ = make_single_pdf(x, y, predicted, interaction, energy, energy_range, wave_idx_, page_num) + 1
        page_num += 1

    merger = PdfMerger()
    path = './plots/tmp_u/'
    pdf_files = [path+f for f in listdir(path) if (isfile(join(path, f)))]
    #print(pdf_files)
    for pdf_file in pdf_files:
        #Append PDF files
        merger.append(pdf_file)
    #merger.write('pdfs/plts_tmp/plts_' + wireplane + '_cnn_'+str(int(min_cnn*100)) + '-' + str(int(max_cnn*100)) + '_' + str(num_pages) +  'pages.pdf')
    merger.write('./plots/plt_U_plots_'+interaction+'_'+str(energy)+'.pdf')
    merger.close()

    for file in pdf_files:
        os.remove(file)

In [25]:
def roi_ae(data_set, energy, energy_range,interaction, roi_model, ae_model, roi_scalers, ae_scalers):
    waveform_noisy = data_set[energy][0]
    print('energy en' + str(energy), energy_range)
    noisy_wave_scaled_ROI = (waveform_noisy-roi_scalers[0])/roi_scalers[1]
    noisy_wave_scaled_AE = (waveform_noisy-ae_scalers[0])/ae_scalers[1]
    
    waveform_clean = data_set[energy][1]
    clean_wave_scaled = (waveform_clean-ae_scalers[0])/ae_scalers[1]
    counter = 0

    noisy = np.empty((0, 200))
    clean = np.empty((0, 200))
    predicted = np.empty((0, 200))

    for i in trange(len(waveform_noisy)):
        wave_roi = noisy_wave_scaled_ROI[i:i+1]
        if roi_model.predict(wave_roi, verbose=0) > 0.999:
            wave_AE = noisy_wave_scaled_AE[i:i+1]
            if max(waveform_clean[i:i+1][0]) < 1000:
                ae_pred = ae_model.predict(wave_AE, verbose=0)
                ae_pred = ae_pred.reshape(ae_pred.shape[0], ae_pred.shape[1])
                pred = ae_pred*ae_scalers[1] + ae_scalers[0]
                
                noisy_wf = waveform_noisy[i]
                noisy_wf = noisy_wf.reshape(1, 200)
                noisy = np.concatenate((noisy, noisy_wf))
                #print('debug: ', waveform_clean[i:i+1].shape, clean.shape)
                clean = np.concatenate((clean, waveform_clean[i:i+1]))
                #print('debug: ', pred.shape, predicted.shape)
                predicted = np.concatenate((predicted, pred))
    for j in range(10):
        noisy, clean, predicted = shuffle(noisy, clean, predicted)
    
    
    make_complete_pdf(noisy, clean, predicted, interaction, energy, energy_range, 2)
    print('------------------------')
    perentage_ = (len(noisy)/len(waveform_noisy))*100
    print(len(noisy), ':', len(waveform_noisy), '%-> ', round(perentage_, 3))
    
    

In [26]:
len(all_waveforms_nu_ES[0][0])

7035

In [27]:
for energy_ in range(10):
    roi_ae(all_waveforms_nu_ES, energy_, nu_ES_energy[energy_], 'nuES', model_60k, model_AE_check, [model_60k_mean, model_60k_std], [mean_check, std_check])

energy en0 0.005-0.010 GeV


100%|██████████| 7035/7035 [12:21<00:00,  9.49it/s]


------------------------
5857 : 7035 %->  83.255
energy en1 0.010-0.013 GeV


100%|██████████| 6468/6468 [10:05<00:00, 10.68it/s]


------------------------
5447 : 6468 %->  84.215
energy en2 0.013-0.016 GeV


100%|██████████| 6402/6402 [10:16<00:00, 10.39it/s]


------------------------
5416 : 6402 %->  84.599
energy en3 0.016-0.019 GeV


100%|██████████| 6251/6251 [10:17<00:00, 10.12it/s]


------------------------
5347 : 6251 %->  85.538
energy en4 0.019-0.021 GeV


100%|██████████| 6271/6271 [10:31<00:00,  9.92it/s]


------------------------
5306 : 6271 %->  84.612
energy en5 0.021-0.024 GeV


100%|██████████| 6398/6398 [11:02<00:00,  9.65it/s]


------------------------
5436 : 6398 %->  84.964
energy en6 0.024-0.027 GeV


100%|██████████| 6774/6774 [11:56<00:00,  9.46it/s]


------------------------
5703 : 6774 %->  84.19
energy en7 0.027-0.031 GeV


100%|██████████| 6092/6092 [10:52<00:00,  9.34it/s]


------------------------
5145 : 6092 %->  84.455
energy en8 0.031-0.036 GeV


100%|██████████| 6919/6919 [12:51<00:00,  8.97it/s]


------------------------
5883 : 6919 %->  85.027
energy en9 0.036-0.079 GeV


100%|██████████| 6584/6584 [12:32<00:00,  8.76it/s]


------------------------
5484 : 6584 %->  83.293
