In [None]:
import uproot
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cbook as cbook
import numpy as np
import pandas as pd
import landau
from scipy.optimize import curve_fit

In [None]:
#DEFINE SBND DETECTOR CONFIGURATION CONSTANT

SBND_AV = [    
    {
      "xmin": -200,
      "xmax": 0,
      "ymin": -200,
      "ymax": 200,  
      "zmin": 0,
      "zmax": 500,
    },
    {
      "xmin": 0,
      "xmax": 200,
      "ymin": -200,
      "ymax": 200,
      "zmin": 0,      
      "zmax": 500,
    }
]

SBND_FV = [{
        "xmin": SBND_AV[0]["xmin"] + 40,
        "xmax": SBND_AV[0]["xmax"] - 25,
        "ymin": SBND_AV[0]["ymin"] + 25,
        "ymax": SBND_AV[0]["ymax"] - 25,
        "zmin": SBND_AV[0]["zmin"] + 25,
        "zmax": SBND_AV[0]["zmax"] - 25,
    },
    {
        "xmin": SBND_AV[1]["xmin"] + 20,
        "xmax": SBND_AV[1]["xmax"] - 40,
        "ymin": SBND_AV[1]["ymin"] + 25,
        "ymax": SBND_AV[1]["ymax"] - 25,
        "zmin": SBND_AV[1]["zmin"] + 25,
        "zmax": SBND_AV[1]["zmax"] - 25,
    }
]

tickwindow = 2600 #ticks
tickwindow_relaxed = 2500 #ticks
ticks2time = 0.0005 #ms/ticks
pretrigger = 0.2 #ms: pretrigger time included before the drift window
sim_etime = 10 #ms: simulated lifetime

In [None]:
#PRE DEFINE BRANCH NAMES OF INTEREST BASED ON CALOSKIM NTUPLES

branches = [
    "hits2.dqdx",
    "hits2.h.p.x",
    "hits2.h.p.y",
    "hits2.h.p.z",
    "hits2.h.time",
    "t0",
    "selected",
    "hit_min_time_p2_tpcE",
    "hit_max_time_p2_tpcE",
    "hit_min_time_p2_tpcW",
    "hit_max_time_p2_tpcW"
]

In [None]:
#READ IN ROOT FILE

fname = "./hists_reco1_Reco2-20220315T170725.root"
folder = "caloskim"
tname = "TrackCaloSkim"

tree = uproot.open(fname)[folder][tname]
df = tree.arrays(branches, library="pd")

In [None]:
#df = pd.DataFrame(data)
df = df.rename(columns={"hits2.dqdx": "dqdx", 
                        "hits2.h.time":"hittime",
                       "hits2.h.p.x": "x",
                       "hits2.h.p.y": "y",
                       "hits2.h.p.z": "z",
                       "t0":"t0",
                       "selected":"selected",
                       "hit_min_time_p2_tpcE":"mintime0",
                       "hit_max_time_p2_tpcE":"maxtime0",
                       "hit_min_time_p2_tpcW":"mintime1",
                       "hit_max_time_p2_tpcW":"maxtime1"
                  })

In [None]:
#APPLY FV CUT to TPC0 and TPC1

df0 = df.loc[(df['x'] > SBND_FV[0]['xmin'])
             & (df['x'] < SBND_FV[0]['xmax'])
             & (df['y'] > SBND_FV[0]['ymin'])
             & (df['y'] < SBND_FV[0]['ymax'])
             & (df['z'] > SBND_FV[0]['zmin'])
             & (df['z'] < SBND_FV[0]['zmax'])]

df1 = df.loc[(df['x'] > SBND_FV[1]['xmin'])
             & (df['x'] < SBND_FV[1]['xmax'])
             & (df['y'] > SBND_FV[1]['ymin'])
             & (df['y'] < SBND_FV[1]['ymax'])
             & (df['z'] > SBND_FV[1]['zmin'])
             & (df['z'] < SBND_FV[1]['zmax'])]

In [None]:
#Apply other selection:
#SELECTED == 1 for a2c tracks
#MAX DRIFT TIME > 2500 ticks i.e. only select tracks that fully cross the TPC

df0 = df0.loc[df0['selected'] == 1 
              & (df0['maxtime0'] - df0['mintime0'] > tickwindow_relaxed)]

df1 = df1.loc[df1['selected'] == 1 
              & (df1['maxtime1'] - df1['mintime1'] > tickwindow_relaxed)]

In [None]:
#CALCULATE DRIFT TIME

df0['drifttime'] = df0['hittime']*ticks2time - pretrigger - df0['t0']/1000000 #convert ns to ms
df1['drifttime'] = df1['hittime']*ticks2time - pretrigger - df1['t0']/1000000

In [None]:
#DROP NO LONGER USES COLUMNS

#df0 = df0[['dqdx','drifttime']]
#df1 = df1[['dqdx','drifttime']]

In [None]:
#DEFINE LANGAU FIT FUNCTION

def landau_gaus(X, *p):
    mpv, eta, sigma, A = p
    if sigma >= 100*eta:
        sigma = eta
    return landau.landau.gauss_landau(X, mpv, eta, sigma, A)

def langau_chi2(x, y, yerr, popt):
    return np.sum(((landau_gaus(x, *popt) - y) / yerr)**2)

def fit_langau(df, fit_scale = 1, plotFig = True):

    bins = np.linspace(0, 2000, 100)
    bin_centers = (bins[1:] + bins[:-1]) / 2.

    N,_ = np.histogram(df, bins=bins,range=(0,3000))
    Nerr = np.maximum(np.sqrt(N), 1);

    # only fit near peak
    maxbin = np.argmax(N)
    when_fit = np.abs(np.array(range(len(bin_centers))) - maxbin) < 25 # within 10 bins
    p0 = [bin_centers[maxbin]/100*10, bin_centers[maxbin], 0.4*fit_scale, 20*np.max(N)]

    popt, pcov = curve_fit(landau_gaus, 
                            (bin_centers * fit_scale)[when_fit], 
                            N[when_fit], 
                            p0=p0, maxfev=100_000, 
                            sigma=np.maximum(np.sqrt(N), 1)[when_fit])

    perr = np.sqrt(np.diag(pcov))
    chi2 = langau_chi2((bin_centers * fit_scale)[when_fit], N[when_fit], Nerr[when_fit], popt)
    
    langau_params = popt
    langau_param_errs = perr
    chi2_fit = chi2
        
    if plotFig:
        plt.plot(bin_centers[when_fit], N[when_fit])
        plt.plot((bin_centers * fit_scale)[when_fit], landau_gaus((bin_centers * fit_scale)[when_fit], *popt), 'r-', label='MPV=%5.3f, Chi2=%5.3f' % (popt[0], chi2))
        plt.ylabel('Entries/Bin')
        plt.xlabel('dQ/dx [ADC/cm]')
        plt.legend()
        plt.show()
    return langau_params, langau_param_errs, chi2_fit

def fit_langau_by_tdrift_bin(df, tdlos, tdhis, fit_scale = 1, plotFig = True):

    bins = np.linspace(0, 2000, 100)
    bin_centers = (bins[1:] + bins[:-1]) / 2.

    langau_params = []
    langau_param_errs = []
    chi2_fit = []

    for i,(tdlo, tdhi) in enumerate(zip(tdlos, tdhis)):

        dffit = df['dqdx'].loc[ (df['drifttime'] > tdlo) & (df['drifttime'] < tdhi)]
        N,_ = np.histogram(dffit, bins=bins,range=(0,3000))
        Nerr = np.maximum(np.sqrt(N), 1);

        # only fit near peak
        maxbin = np.argmax(N)
        when_fit = np.abs(np.array(range(len(bin_centers))) - maxbin) < 25 # within 10 bins
        p0 = [bin_centers[maxbin]/100*10, bin_centers[maxbin], 0.4*fit_scale, 20*np.max(N)]

        popt, pcov = curve_fit(landau_gaus, 
                               (bin_centers * fit_scale)[when_fit], 
                               N[when_fit], 
                               p0=p0, maxfev=100_000, 
                               sigma=np.maximum(np.sqrt(N), 1)[when_fit])

        perr = np.sqrt(np.diag(pcov))
        chi2 = langau_chi2((bin_centers * fit_scale)[when_fit], N[when_fit], Nerr[when_fit], popt)
    
        langau_params.append(popt)
        langau_param_errs.append(perr)
        chi2_fit.append(chi2)
        
        if plotFig:
            print('dQ/dx histogram for %5.3f < time drift < %5.3f' % (tdlo, tdhi))
            plt.plot(bin_centers[when_fit], N[when_fit])
            plt.plot((bin_centers * fit_scale)[when_fit], landau_gaus((bin_centers * fit_scale)[when_fit], *popt), 'r-', label='MPV=%5.3f, Chi2=%5.3f' % (popt[0], chi2))
            plt.ylabel('Entries/Bin')
            plt.xlabel('dQ/dx [ADC/cm]')
            plt.legend()
            plt.show()
    return langau_params, langau_param_errs, chi2_fit

In [None]:
#FIT LANGAU TO DQDX FOR A SPECIFIC TIME BIN

#Define fitting region and number of tdrift bins
nbins = 6
tdriftmin = 0.25 #ms
tdriftmax = 1.05 #ms
tdriftwindow = 1.3 #ms

td = np.hstack([np.linspace(tdriftmin, tdriftmax, nbins)])

tdhis = td[1:]
tdlos = td[:-1]

TD = (tdhis + tdlos)/2.
TD_width = (tdhis - tdlos) / 2.
        
#DO THE FIT
langau0, langauerr0, chi2_fit0 = fit_langau_by_tdrift_bin(df0, tdlos, tdhis, plotFig = True )
langau1, langauerr1, chi2_fit1 = fit_langau_by_tdrift_bin(df1, tdlos, tdhis, plotFig = True )

langau0 = np.array(langau0)
langau1 = np.array(langau1)

langauerr0 = np.array(langauerr0)
langauerr1 = np.array(langauerr1)

chi2_fit0 = np.array(chi2_fit0)
chi2_fit1 = np.array(chi2_fit1)

In [None]:
#CHECK FIT PARAMTERS STABILITY

#PLOT PARAMETERS AND THEIR FIT ERRS
def plot_fit_stability(langau, TD):

    for i in range(len(langau)-1):
        if i == 0:
            plt.ylabel('MPV')
        if i == 1:
            plt.ylabel('Landau Eta')
        if i == 2:
            plt.ylabel('Gaussian Sigma')
        if i == 3:
            plt.ylabel('Area')
        plt.plot(TD, langau0[:,i])
        plt.show()
        
#plot_fit_stability(langau0, TD)
#plot_fit_stability(langau1, TD)
#plot_fit_stability(langauerr0, TD)
#plot_fit_stability(langauerr1, TD)
plt.plot(chi2_fit0)
plt.plot(chi2_fit1)

In [None]:
#DEFINE EXPONENTIAL FIT FUNCTION TO FIND ELECTRON LIFETIME
def exp_decay(X, *p):
    A, tau = p
    return A * np.exp(X/tau)

def exp_chi2(x, y, yerr, popt):
    return np.sum(((exp_decay(x, *popt) - y) / yerr)**2)

def fit_etime(data, edata, TD, plotFig = True):
    p0 =[data[0], -sim_etime]
    popt, pcov = curve_fit(exp_decay,
                           TD,
                           data, 
                           sigma=edata,
                           p0=p0, maxfev=100_000)
        
    perr = np.sqrt(np.diag(pcov))
    chi2 = exp_chi2(TD, data, edata, popt)
    
    print("FITTED ELECTRON LIFETIME = %5.3f +/- %5.3f" % (abs(popt[1]), perr[1]))
    
    if plotFig:
        plt.errorbar(TD, data, yerr =edata, fmt = 'o')
        plt.plot(TD, exp_decay((TD), *popt), 'r-', label='tau=%5.3f, Chi2=%5.3f' % (abs(popt[1]), chi2))
        plt.xlabel('Drift Time [ms]')
        plt.ylabel('dQ/dx [ADC/cm]')
        plt.legend()
        plt.show()
    return abs(popt[1]), perr[1], chi2

In [None]:
#DO THE ELECTRON LIFETIME FIT
mpv0 = langau0[:,0]
mpv1 = langau1[:,0]

empv0 = langauerr0[:,0]
empv1 = langauerr1[:,0]

etime0, etimeerr0, etimechi2_fit0 = fit_etime(mpv0, empv0, TD)
etime1, etimeerr1, etimechi2_fit1 = fit_etime(mpv1, empv1, TD)

In [None]:
#DO THE ELECTRON LIFETIME CORRECTION TO DQ/DX

#Lifetime correction
df0['dqdxcorr'] = df0['dqdx']*np.exp(df0['drifttime']/etime0)
df1['dqdxcorr'] = df1['dqdx']*np.exp(df1['drifttime']/etime1)

#Error of dQdx from lifetime correction
df0['edqdxcorr'] = df0['dqdxcorr']*df0['drifttime']*etimeerr0/(etime0*etime0)
df1['edqdxcorr'] = df1['dqdxcorr']*df1['drifttime']*etimeerr1/(etime1*etime1)

In [None]:
#DEFINE VERIFICATION PLOTTING FUNCTIONS

def plot_verificaiton_graphs(df):
    
    #Plot dddx before correction
    _, _, _ =fit_langau(df['dqdx'], plotFig = True )
    
    #Plot dqdx after correction
    _, _, _ =fit_langau(df['dqdxcorr'], plotFig = True )
    
    #Plot fitting absolute errors on dqdx correction 
    df['edqdxcorr'].plot.hist(bins=50, alpha=0.5, edgecolor='k', density=True, label='Mean=%5.3f ADC/cm' % (df['edqdxcorr'].mean()))
    plt.xlabel('edQ/dx [ADC/cm]')
    plt.legend()
    plt.title('Absolute Fitting Errors on dQ/dx lifetime correction')
    plt.show()
    
    #Plot fitting percentage errors on dqdx correction 
    (df['edqdxcorr']/df['dqdxcorr']*100).plot.hist(bins=50, alpha=0.5, edgecolor='k', density=True, label='Mean=%5.3f %%' % ((df['edqdxcorr']/df['dqdxcorr']*100).mean()))
    plt.xlabel('% edQ/dx')
    plt.legend()
    plt.title('Percentage Fitting Errors on dQ/dx lifetime correction')
    plt.show()

In [None]:
plot_verificaiton_graphs(df0)
plot_verificaiton_graphs(df1)