# <h1> Example Jupyter notebook for import_potus.root </h1>
I show below how one can analyze this.<br>

<h1><font color="blue"> Necessary imports </font></h1>
Let's set up necessary imports...<br>

In [None]:
# Probably most people will simply want "matplotlib inline" but on a mac it is useful to do the other stuff
%matplotlib inline
#%pylab

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
from scipy.optimize import curve_fit

# This allows reading of root files without having to actually use root. Nice!
import uproot
import pandas as pd
import numpy  as np

# Widgets are always good for you
#from ipywidgets import widgets,Layout

###############################################################################
# TODO:
# Set the path to our library of functions/objects/etc
import sys

sigProcPath = "home/usher/LArTPC/ICARUS/workarea/simTestPulse"
sys.path.insert(0,sigProcPath)
###############################################################################
# TODO:
# Set the path and name of the data file to be read
PATHNAME       = "/home/usher/LArTPC/ICARUS/workarea/simTestPulse"
RECOFILENAME   = PATHNAME + "/Supplemental--_20200422T211813-SimTestPulse.root"

In [None]:
# Grab the pandas dataframe from the input file for the tree we want to look at
print("Opening file: ",RECOFILENAME)
data_file = uproot.open(RECOFILENAME)

print(data_file.keys(),"\n------\n")

fakeana_folder = data_file["fakeana"]

print(fakeana_folder.keys())

gaushit_folder = fakeana_folder["decon1droi"]

print(gaushit_folder.keys())

gaushit_info = fakeana_folder["decon1droi"].pandas.df(flatten=False)

#print("Plane:",gaushit_info["plane"],", ",gaushit_info["tick_offset"])

<h1><font color="blue">Set up constants + data loading</font></h1>

In [None]:
WF_LABELS  = ['fakeana/daq','fakeana/rawdigitfilter','fakeana/decon1droi'] #'recowire' ] #'decon1droi']
HIT_LABELS = ['fakeana/gaushit']

#wf_df_array=[pd.DataFrame(root_numpy.root2array(FILENAME,x)) for x in WF_LABELS]
#hit_array=[pd.DataFrame(root_numpy.root2array(FILENAME,x)) for x in HIT_LABELS]
#main_df=pd.DataFrame(root_numpy.root2array(FILENAME,'triggersim/simTestPulse'))

wf_df_array = [data_file[column].pandas.df(flatten=False) for column in WF_LABELS]
hit_array   = [data_file[column].pandas.df(flatten=False) for column in HIT_LABELS]
main_df     = data_file["triggersim/simTestPulse"].pandas.df(flatten=False)

EVENT_KEY = ['run','subrun','event']
print("  ")

print('simTestPulse dataframe(',main_df.index.size,'entries) ...',len(main_df.groupby(EVENT_KEY)),'unique events')
for idx,label in enumerate(WF_LABELS):
    df = wf_df_array[idx]
    print('Waveform dataframe with',df.index.size,'entries ...',len(df.groupby(EVENT_KEY)),'unique events by',label)
for idx,label in enumerate(HIT_LABELS):
    hit = hit_array[idx]
    print('Hits dataframe with    ',hit.index.size,'entries ...',len(hit.groupby(EVENT_KEY)),'unique events by',label)



<h1><font color="blue">Print waveform for 1 event</font></h1>

In [None]:
def gaus(x,peakAmp,peakTime,sigma,baseline):
       return peakAmp*np.exp(-0.5*(x-peakTime)**2/(sigma**2))+baseline

run,subrun,event = main_df.run.values[0], main_df.subrun.values[0], main_df.event.values[0]

print(" ")
print('Accessing (run,subrun,event) = (%s,%s,%s)' % (run,subrun,event))
print(" ")

event_query_key = str('run==%s and subrun==%s and event==%s' % (run,subrun,event))
sub_df = main_df.query(event_query_key)

offsets = [[2048, 2048, 400], [2048, 2048, 400], [0, 0, 0]]

for signal_id in np.arange(len(sub_df.charge_time_v.values)):

    num_electrons = sub_df.e_v.values[signal_id]
    charge_time   = sub_df.charge_time_v.values[signal_id]
    signal_tick   = sub_df.tick_v.values[signal_id]
    signal_y      = sub_df.y_v.values[signal_id]
    signal_z      = sub_df.z_v.values[signal_id]
    
    print(" ")
    print('Signal @ tick',signal_tick,'position (y,z) = (%g,%g)' % (signal_y,signal_z))
    print('Number of electrons:', num_electrons)
    
    for plane in [0,1,2]:
        print('Plane',plane)
        print(" ")
    
        wf_query_key = event_query_key + str(' and plane==%d and signal_id==%d' % (plane,signal_id))

        fig,ax = plt.subplots(figsize=(12,8),facecolor='w')
        for idx,label in enumerate(WF_LABELS):
            df = wf_df_array[idx].query(wf_query_key)
            wf_v = df.wf.values
            if not len(wf_v) == 1:
                print('Something is wrong... waveform is not unique!')
                print('producer:',label)
                print('query key:',wf_query_key)
                raise ValueError
            
            start_tick = df.start_tick.values[0] + df.tick_offset.values[0]
            dataVals   = wf_v[0] - offsets[idx][plane]
 
            plt.plot(np.arange(start_tick,start_tick+len(wf_v[0]),1),
                     dataVals,
                     marker='o', label=label)

        for idx,label in enumerate(HIT_LABELS):
            hitvalues = hit_array[idx].query(wf_query_key)
            if hitvalues.size == 0:
                continue
            peakTime  = hitvalues.peakTime.values[0] - 0.5
            peakSigma = hitvalues.rms.values[0]
            peakAmp   = hitvalues.peakAmplitude.values[0]
            summedADC = hitvalues.summedADC.values[0]
            integral  = hitvalues.integral.values[0]
            baseline  = hitvalues.baseline.values[0]
            # fC/ADC = 0.011, num electrons / fC = 6242, integral in ADC units
            numElecs  = 0.011 * 6242 * integral
            #numElecs  = 0.4 * 0.017 * 6242 * integral
            x_axis    = np.arange(peakTime-4*peakSigma,peakTime+4*peakSigma,1.)
            plt.plot(x_axis,gaus(x_axis,peakAmp,peakTime,peakSigma,0),color='purple',label=label)
            print("Hit plane:{0:3d}, peak time:{1:6.1f}, peak amplitude:{2:5.1f}, sigma: {3:4.1}".format(plane,peakTime,peakAmp,peakSigma))
            print("--> summedADC:{0:6.1f} (electrons:{1:8.1f}), integral:{2:6.1f} (electrons:{3:8.1f})".format(summedADC,0.011*6242*summedADC,integral,numElecs))
            
        plt.axvline(signal_tick,color='black',linestyle='--')
        ax.get_yaxis().get_major_formatter().set_useOffset(False)
        ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
        plt.tick_params(labelsize=20)
#        plt.xlim(0,31)
        plt.xlim(signal_tick-40,signal_tick+40)
        plt.grid()
        plt.xlabel('TPC Tick (offset corrected)',fontsize=20,fontweight='bold',fontname='Georgia')
        plt.ylabel('Amplitude (baseline subtracted)',fontsize=20,fontweight='bold',fontname='Georgia')
        leg=plt.legend(fontsize=16)
        leg_frame=leg.get_frame()
        leg_frame.set_facecolor('white')
            
        plt.show()
    

<h1><font color="blue">Averaging over N events</font></h1>

In [None]:
run,subrun,event = main_df.run.values[0], main_df.subrun.values[0], main_df.event.values[0]

print("")
print("Starting (run,subrun,event) =",run,subrun,event)
print("")

# Create "histogram" containers
aveWaveHistList   = [[None,None,None] for i in range(len(WF_LABELS))]
aveWave2HistList  = [[None,None,None] for i in range(len(WF_LABELS))]
aveWaveCountList  = [[0,0,0] for i in range(len(WF_LABELS))]

aveHitHistList    = [[None,None,None] for i in range(len(HIT_LABELS))]
aveHit2HistList   = [[None,None,None] for i in range(len(HIT_LABELS))]
aveIntegralList   = [[0.,0.,0.] for i in range(len(HIT_LABELS))]
aveIntegral2List  = [[0.,0.,0.] for i in range(len(HIT_LABELS))]
aveSummedADCList  = [[0.,0.,0.] for i in range(len(HIT_LABELS))]
aveSummedADC2List = [[0.,0.,0.] for i in range(len(HIT_LABELS))]
aveHitCountList   = [[0,0,0] for i in range(len(HIT_LABELS))]

histRanges        = []
numBins           = []

for idx,label in enumerate(WF_LABELS):
    startArray = wf_df_array[idx].start_tick.values  # array of start ticks
    ranges     = np.ptp(startArray,axis=0)           # This is the max to min range
    rangeArray = [len(wf_df_array[idx].wf.values[i]) for i in range(len(wf_df_array[idx].wf.values))]
    endArray   = np.add(startArray,rangeArray)
    histRanges.append([np.amin(startArray),np.amax(endArray)+1])
    numBins.append(histRanges[idx][1]-histRanges[idx][0])

    
# Assume that the run/subrun remain constant and only the event number is incrementing
for tupleIdx in range(len(main_df.run.values)):
    # Query key becomes run/subrun + tupleIdx
    event_query_key = str('run==%s and subrun==%s and event==%s' % (run,subrun,tupleIdx+1))
    sub_df = main_df.query(event_query_key)
    
    # Loop over charge deposits for this event
    for signal_id in range(len(sub_df.charge_time_v.values)):
        num_electrons = sub_df.e_v.values[signal_id]
        charge_time   = sub_df.charge_time_v.values[signal_id]
        signal_tick   = sub_df.tick_v.values[signal_id]
        signal_y      = sub_df.y_v.values[signal_id]
        signal_z      = sub_df.z_v.values[signal_id]
    
        # Now loop over planes and accumulate histograms
        for plane in [0,1,2]:
            wf_query_key = event_query_key + str(' and plane==%d and signal_id==%d' % (plane,signal_id))

            # Final loop is over the list of waveform types
            for idx,label in enumerate(WF_LABELS):
                df = wf_df_array[idx].query(wf_query_key)
                wf_v = df.wf.values
                if not len(wf_v) == 1:
                    print('Something is wrong... waveform is not unique!')
                    print('producer:',label)
                    print('query key:',wf_query_key)
                    #raise ValueError
                    continue
            
                start_tick = df.start_tick.values[0] + df.tick_offset.values[0]
                dataRange  = np.arange(histRanges[idx][0],histRanges[idx][1],1)
                dataVals   = wf_v[0] - offsets[idx][plane]
                #Do we need to prepend values?
                if start_tick > histRanges[idx][0]:
                    leadingBins = np.full(start_tick-histRanges[idx][0],dataVals[0])
                    dataVals    = np.insert(dataVals,0,leadingBins)
                    
                if histRanges[idx][0] + len(dataVals) < histRanges[idx][1]:
                    trailingBins = np.full(histRanges[idx][1]-histRanges[idx][0]-len(dataVals),dataVals[-1])
                    dataVals     = np.append(dataVals,trailingBins)
 
                dataVals2  = np.multiply(dataVals,dataVals)
            
                hist,_ = np.histogram(dataRange,weights=dataVals,bins=numBins[idx],range=histRanges[idx])
                if (aveWaveHistList[idx][plane] is None):
                    aveWaveHistList[idx][plane] = hist
                else:
                    aveWaveHistList[idx][plane] += hist
            
                hist2,_ = np.histogram(dataRange,weights=dataVals2,bins=numBins[idx],range=histRanges[idx])
                if (aveWave2HistList[idx][plane] is None):
                    aveWave2HistList[idx][plane] = hist2
                else:
                    aveWave2HistList[idx][plane] += hist2
                    
                aveWaveCountList[idx][plane] += 1

            for idx,label in enumerate(HIT_LABELS):
                hitvalues = hit_array[idx].query(wf_query_key)
                if hitvalues.size == 0:
                    continue
                peakTime  = hitvalues.peakTime.values[0] - 0.5
                peakSigma = hitvalues.rms.values[0]
                peakAmp   = hitvalues.peakAmplitude.values[0]
                summedADC = hitvalues.summedADC.values[0]
                integral  = hitvalues.integral.values[0]
                baseline  = hitvalues.baseline.values[0]
                if peakAmp < 5.:
                    continue
                # fC/ADC = 0.011, num electrons / fC = 6242, integral in ADC units
                numElecs  = 0.011 * 6242 * integral
                #numElecs  = 0.4 * 0.017 * 6242 * integral
                x_axis    = np.arange(histRanges[2][0],histRanges[2][1],1.)
                fitVals   = gaus(x_axis,peakAmp,peakTime,peakSigma,0)
                hist,_ = np.histogram(x_axis,weights=fitVals,bins=len(x_axis))
                if (aveHitHistList[idx][plane] is None):
                    aveHitHistList[idx][plane] = hist
                else:
                    aveHitHistList[idx][plane] += hist
            
                fitVals2 = np.multiply(fitVals,fitVals)
                
                hist2,_ = np.histogram(x_axis,weights=fitVals2,bins=len(x_axis))
                if (aveHit2HistList[idx][plane] is None):
                    aveHit2HistList[idx][plane] = hist2
                else:
                    aveHit2HistList[idx][plane] += hist2
                    
                aveIntegralList[idx][plane]   += integral
                aveIntegral2List[idx][plane]  += integral * integral
                aveSummedADCList[idx][plane]  += summedADC
                aveSummedADC2List[idx][plane] += summedADC * summedADC
                aveHitCountList[idx][plane]   += 1

#plt.plot(x_axis,gaus(x_axis,peakAmp,peakTime,peakSigma,0),color='purple',label=label)
#print "Hit plane ", plane, ", peaktime: ",peakTime,", amp: ",peakAmp,", sigma: ",peakSigma
#print "          -> summedADC: ",summedADC,", integral: ",integral,", numelecs: ",numElecs



Now make the plots

In [None]:
fig,ax = plt.subplots(3,1,figsize=(10,12),facecolor='gray')

#fig,ax   = plt.subplots(3,2,figsize=(10,10),facecolor='w')
fmtVec   = ['g.:','b.:','m.:','r.:']
ecolVec  = ['g','b','m','r']
planeStr = ['First Induction','Middle Induction','Collection']

# This pass through draws the waveforms
for idx,label in enumerate(WF_LABELS):
    for plane in [0,1,2]:
        subPlot = ax[plane] #ax[idx][plane]
        #n, bins, patches = subPlot.hist(histogram_list[idx], len(histogram_list[idx])) #, color='black',alpha=0.5)
        #subPlot.plot(histogram_list[idx],marker='.',linestyle='solid',markersize=10) #, color='black',alpha=0.5)
        step    = (histRanges[idx][1]-histRanges[idx][0]) / numBins[idx]
        histX   = np.arange(histRanges[idx][0],histRanges[idx][1],step) + step/2
        histY   = aveWaveHistList[idx][plane] / aveWaveCountList[idx][plane]
        histY2  = aveWave2HistList[idx][plane] / aveWaveCountList[idx][plane]
        # Compute the errors as the RMS / sqrt(N) for each bin
        yErrors = np.true_divide(np.sqrt(histY2 - np.multiply(histY,histY)),np.sqrt(aveWaveCountList[idx][plane]))
        subPlot.errorbar(histX,histY,xerr=0.5*step,yerr=yErrors,fmt=fmtVec[idx],ecolor=ecolVec[idx],label=label)
        subPlot.set_facecolor('0.9') #'w') # -> light gray: '0.9') #'#eafff5')
        subPlot.set_ylabel('ADC counts',size=12)
 #       subPlot.yaxis.set_label_coords(-0.09,0.9)
        subPlot.set_xlabel('Ticks',size=12)
        subPlot.xaxis.set_label_coords(0.94,-0.1)
        subPlot.set_title('Waveform '+planeStr[plane],size=14)
 #       subPlot.set_ylim([0.75,1.05])
        subPlot.set_xlim(histRanges[0])
        subPlot.grid(color='gray', linestyle='--', linewidth=1)
#        plotLegend = subPlot.legend(fontsize=12)
#        leg_frame  = plotLegend.get_frame()
#        leg_frame.set_facecolor('white')

# One more pass to draw the fit hits
for idx,label in enumerate(HIT_LABELS):
    for plane in [0,1,2]:
        subPlot = ax[plane] #ax[idx][plane]
        #n, bins, patches = subPlot.hist(histogram_list[idx], len(histogram_list[idx])) #, color='black',alpha=0.5)
        #subPlot.plot(histogram_list[idx],marker='.',linestyle='solid',markersize=10) #, color='black',alpha=0.5)
        step    = (histRanges[2][1]-histRanges[2][0]) / numBins[2]
        histX   = np.arange(histRanges[2][0],histRanges[2][1],step) + step/2
        histY   = aveHitHistList[idx][plane]  / aveHitCountList[idx][plane]
        histY2  = aveHit2HistList[idx][plane] / aveHitCountList[idx][plane]
        # Compute the errors as the RMS / sqrt(N) for each bin
        yErrors = np.true_divide(np.sqrt(histY2 - np.multiply(histY,histY)),np.sqrt(aveHitCountList[idx][plane]))
        subPlot.errorbar(histX,histY,xerr=0.5*step,yerr=yErrors,fmt=fmtVec[3],ecolor=ecolVec[3],label=label)
#        subPlot.set_facecolor('0.9') #'w') # -> light gray: '0.9') #'#eafff5')
#        subPlot.set_ylabel('ADC counts',size=12)
# #       subPlot.yaxis.set_label_coords(-0.09,0.9)
#        subPlot.set_xlabel('Ticks',size=12)
#        subPlot.xaxis.set_label_coords(0.94,-0.1)
#        subPlot.set_title('Waveform '+planeStr[plane],size=14)
# #       subPlot.set_ylim([0.75,1.05])
#        subPlot.set_xlim(histRanges[0])
#        subPlot.grid(color='gray', linestyle='--', linewidth=1)
        plotLegend = subPlot.legend(fontsize=12)
        leg_frame  = plotLegend.get_frame()
        leg_frame.set_facecolor('white')
        
        # fC/ADC = 0.011, num electrons / fC = 6242, integral in ADC units
        aveIntegral          = aveIntegralList[idx][plane] / aveHitCountList[idx][plane]
        aveIntegral2         = aveIntegral2List[idx][plane] / aveHitCountList[idx][plane]
        aveIntegralErr       = np.sqrt(aveIntegral2 - aveIntegral*aveIntegral)/np.sqrt(aveHitCountList[idx][plane])
        numElectronIntegral  = 0.011 * 6242 * aveIntegral
        aveSummedADC         = aveSummedADCList[idx][plane] / aveHitCountList[idx][plane]
        aveSummedADC2        = aveSummedADC2List[idx][plane] / aveHitCountList[idx][plane]
        aveSummedADCErr      = np.sqrt(aveSummedADC2 - aveSummedADC*aveSummedADC)/np.sqrt(aveHitCountList[idx][plane])
        numElectronSummedADC = 0.011 * 6242 * aveSummedADC
        
        print('** Hit producer: ',label,', plane: ',plane)
        print("   Average Integral:  {0:6.1f}+/-{1:3.1f}, or # electrons: {2:8.1f}+/-{3:3.1f}".format(aveIntegral,aveIntegralErr,numElectronIntegral,0.011 * 6242 * aveIntegralErr))
        print("   Average SummedADC: {0:6.1f}+/-{1:3.1f}, or # electrons: {2:8.1f}+/-{3:3.1f}".format(aveSummedADC,aveSummedADCErr,numElectronSummedADC,0.011 * 6242 * aveSummedADCErr))
        print("")


print
fig.tight_layout()
#fig.suptitle('Total Electrons',ha='center',va='top',size=24,weight='bold')
fig.set_size_inches(10.0, 12.0)
fig.savefig('AveWaveforms.png', dpi='figure')

