In [1]:
import uproot as ur
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [2]:
def baselinecorre_norm(wf, sample_range):
    '''Does a baseline correction and then normalization to a given waveform taking samples 
    provided by sample_range'''
    baseline = np.mean(wf[:sample_range])
    wf_bc = wf - baseline
    wf_norm = (wf_bc - np.min(wf_bc)) / (np.max(wf_bc) - np.min(wf_bc)) #rescale wf to between 0 and 1
    return wf_norm

def extract_normal_wf(rootfile, savefile = False):
    ''' this function extract the normal waveforms from a root file that consist of different histograms for each waveform and return
    a list containing the waveforms'''
    file = ur.open(rootfile)
        
    # Load histograms from rootfile into Dataframe using Pandas package
    # waveforms are columnwise so 5000 waveforms consisting 955 values each becomes a
    # 955 rows x 5000 columns
    df_wf = pd.concat([pd.DataFrame(file[key].values(), columns=[str(key)]) for key in file.keys()], axis=1, ignore_index=True)
    
    # Truncating NaN values at the end of some waveforms histograms
    df_wf = df_wf[:][:-1]
    
    # Baseline correction and normalization - dataframe transpose shape
#     df_wf = pd.DataFrame([baselinecorre_norm(df_wf[wfid], 200) for wfid in range(df_wf.shape[1])])
    df_wf = [baselinecorre_norm(df_wf[wfid], 200) for wfid in range(df_wf.shape[1])]
    np_wf = np.asarray(df_wf)
    count = len(np_wf)
#     print(count)
    if savefile:
        event_dict={}
        file_name = "nopileupfile.pickle"
        with open(file_name, 'wb') as handle:
            for i in range(count):  
                nwf = np_wf[i]
                event_dict = {"wf" : nwf}
                pickle.dump(event_dict,handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    # Transposing 955 rows x 5000 columns into 5000 rows x 955 columns
    # so that each waveforms are in rows. This is because it's easier to add an 
    # 'classification id' column into pandas dataframe 
    
#     wf_id = 0 # for normal waveform, we want to label those waveforms as 0
#     df_wf['wf_id'] = pd.Series(np.ones(len(df_wf)) *int(wf_id))
#     df_wf = df_wf.transpose()
    
    file.close()
    
    return df_wf, np_wf

In [3]:
# checkwf, checknp = extract_normal_wf("./isGood_2600_2630_wfs_sse_5k.root", True)
# checknp

In [4]:
# plt.plot(checknp[0])
# checkwf

In [5]:
def create_one_pileup_wf(rootfile, end_shift_param):
    '''creates a single pileup waveform using two normal waveforms selected randomly from a given root file'''
    wfs, npwfs = extract_normal_wf(rootfile)
    
    #choose the random waveform from the normalwaveform data frame
#     choose = np.random.randint(1, wfs.shape[1])
    choose = np.random.randint(1, len(wfs)-1)
    wfBs = wfs[choose]
    wfOt = wfs[choose+1]
    if choose == len(wfs)-1:
        wfOt = wfs[choose-1]
#     wfBs = baselinecorre_norm(wfBs, 200)
#     wfOt = baselinecorre_norm(wfOt, 200)
    
    wfBs = wfBs*np.random.random()
    wfOt = wfOt*np.random.random()

    wf_len = len(wfBs)
    
    wfOt = pd.Series(np.zeros(end_shift_param)).append(wfOt, ignore_index=True)
    wfOt = wfOt[:wf_len]
    
    wf_pileup = wfBs + wfOt
    return wf_pileup
    

In [6]:
def create_pileup_wfs(rootfile, init_shift_param, end_shift_param, wf_num):
    '''Creates a numpy array of pileup waveforms from a root file using a shift parameter '''
    emptydf = pd.DataFrame()
    sftrange = []
    for wf_id in range(wf_num):
#         sft = np.random.randint(init_shift_param, end_shift_param)
#         while sft == 0:
#             sft = np.random.randint(init_shift_param, end_shift_param)
        sft = np.random.randint(end_shift_param, end_shift_param+1) #fixing the same shift value
        sftrange.append(sft)   
#     pileupwfs = pd.concat([create_one_pileup_wf(rootfile, sftrange[i]) for i in range(len(sftrange))], ignore_index=True,axis=1).to_numpy()
    pileup_wf = emptydf.append([create_one_pileup_wf(rootfile, sftrange[i]) for i in range(len(sftrange))], ignore_index=True).to_numpy()
    count = len(pileup_wf)
    event_dict={}
    file_name = "pileupwith_"+str(end_shift_param)+"_shift.pickle"
    with open(file_name, 'wb') as handle:
        for i in range(count):  
            nwf = pileup_wf[i]
            event_dict = {"wf": nwf}
            pickle.dump(event_dict,handle, protocol=pickle.HIGHEST_PROTOCOL)
    return pileup_wf
    
    

In [None]:
s = list(range(10,15,5))
for i in range(len(s)):
    create_pileup_wfs("./isGood_2600_2630_wfs_sse_5k.root",0,s[i],5000)