In [2]:
import ROOT
import pandas as pd
import os
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate
from uncertainties import ufloat
from uncertainties.umath import *
# Get all target polarizations from RCDB
import glob
import sys
sys.path.append("/work/clas12/users/gmat/packages/clas12root/rcdb/python/")
import rcdb
from rcdb.provider import RCDBProvider
from rcdb.model import ConditionType

In [14]:
def DF(A,nA,nMT,nCH,nC,nF,sA,sMT,sCH,sC,sF):
    
    nA=ufloat(nA,sA)
    nMT=ufloat(nMT,sMT)
    nCH=ufloat(nCH,sCH)
    nC=ufloat(nC,sC)
    nF=ufloat(nF,sF)
    
    lC=1.5
    L=5.86
    lCH=3
    rhoHe=0.145/4
    rhoC=2/12
    rhoCH=1/14
    rhoA=0
    if(A=="NH3"):
        rhoA=0.92/17
    elif(A=="ND3"):
        rhoA=0.92/20
    else:
        print("ERROR: In DF(), A must equal either NH3 or ND3...Aborting...")
        return -1
    
    numerator = 9*(nA-nMT)*rhoA*(lC*L*(-nCH+nMT)*rhoC+lC*lCH*(nF-nMT)*(rhoC-rhoCH)+lCH*L*(nC-nMT)*rhoCH)
    denominator = nA*(9*lC*L*(-nCH+nMT)*rhoA*rhoC+2*lCH*L*(nC-nMT)*rhoA*rhoCH+lC*lCH*(nF-nMT)*(9*rhoA*rhoC-2*(rhoA+3*rhoC)*rhoCH))
    return (numerator/denominator).n , (numerator/denominator).s

In [15]:
# Grab location of rcdb.csv
def get_rcdb(rootdir = ""):
    rcdb_file = rootdir + "rcdb.csv"
    if(not os.path.exists(rcdb_file)):
        print("ERROR: get_rcdb() cannot find file",rcdb_file,". Aborting...")
        return -1
    return rcdb_file

# Extract the run number from root file name
def get_run_from_root(file):
    start = file.index('/run')
    end = file.index('.',start)
    runNumber = int(file[start+4:end])
    return runNumber

# Extract other column value based on runNumber
def get_colval_from_run(rcdb_csv,runNumber,col = ''):
    return rcdb_csv.query('Run=={}'.format(runNumber))[col].values[0]

In [94]:
def get_total_fcupgated(runList):
    
    total=0
    for r in runList:
        scaler_dir="/volatile/clas12/users/gmat/clas12analysis.sidis.data/rgc-scaler8.3.2/run{}_HELScaler-all.csv".format(r)
        df_hel = pd.read_csv(scaler_dir)
        total += df_hel.fcupgated_33ms.sum()
    return total

In [95]:
def get_files(rootdir = "", target = "",runMin = -1,runMax = -1, runList = [], consecutive=True, monteCarlo=False):
    files = []
    runs = []
    for path in Path(rootdir).glob("*.root"):
        runNumber = get_run_from_root(str(path))
        if((runMin < 0 and runMax < 0 and runList==[]) or (runNumber >= runMin and runNumber <= runMax) or (runNumber in runList) or monteCarlo==True):
            tfile = ROOT.TFile(str(path),"READ")
            if(tfile.GetListOfKeys().Contains("tree_postprocess")):
                files.append(str(path))
                runs.append(runNumber)
            else:
                print("ERROR: get_files() found a .root file (run",runNumber,") without a tree_reco...skipping...")
                continue
        else:
            continue
    
    # Sort by runNumber if specified
    if(consecutive==True):
        files = [x for y, x in sorted(zip(runs, files))]
    
    return files

In [105]:
def extract_dilution_factors(rootdir="",
                             cookType="TBT",
                             ft="On",
                             debug=False):
    assert(cookType in ["TBT","HBT"])
    assert(ft in ["On","Off"])
    
    # Get the rcdb.csv in the root file
    rcdb = get_rcdb(rootdir)
    df_rcdb = pd.read_csv(rcdb)
    df_rcdb=df_rcdb[df_rcdb.CookType==cookType]
    if(ft=="Off"):
        df_rcdb=df_rcdb[df_rcdb.Run>16843]
    else:
        df_rcdb=df_rcdb[df_rcdb.Run<=16843]
    df_rcdb=df_rcdb.sort_values(by="Run")
    
    # Generate list of runs for the analysis
    runList_NH3 = df_rcdb[df_rcdb.Target=="NH3"].Run.to_list()
    runList_ND3 = df_rcdb[df_rcdb.Target=="ND3"].Run.to_list()
    runList_C = df_rcdb[df_rcdb.Target=="C"].Run.to_list()
    runList_CH = df_rcdb[df_rcdb.Target=="CH2"].Run.to_list()
    runList_MT = df_rcdb[df_rcdb.Target=="Empty"].Run.to_list()
    assert(runList_NH3!=[])
    assert(runList_ND3!=[])
    assert(runList_C!=[])
    assert(runList_CH!=[])
    assert(runList_MT!=[])

    #fcupgated_NH3 = np.sum([get_colval_from_run(df_rcdb,"") for ])
    # Get files for each target type
    files_NH3=get_files(rootdir,runList=runList_NH3)
    files_ND3=get_files(rootdir,runList=runList_ND3)
    files_C=get_files(rootdir,runList=runList_C)
    files_CH=get_files(rootdir,runList=runList_CH)
    files_MT=get_files(rootdir,runList=runList_MT)
    
    # Generate RDataframes for each file list
    df_NH3=0
    # -----------------------------------------
    if(debug):
        df_NH3 = [ROOT.RDataFrame("tree_postprocess",f) for f in files_NH3]
    else:
        df_NH3 = ROOT.RDataFrame("tree_postprocess",files_NH3)
    # -----------------------------------------
    df_ND3 = ROOT.RDataFrame("tree_postprocess",files_ND3)
    df_C = ROOT.RDataFrame("tree_postprocess",files_C)
    df_CH = ROOT.RDataFrame("tree_postprocess",files_CH)
    df_MT = ROOT.RDataFrame("tree_postprocess",files_MT)
    
    # Basic DIS cut
    dis_cut = "p_e > 2.6 && th_e > 0.14 && th_e < 0.611 && vz_e > -9.5 && vz_e < 0.5"
    
    # Get total fcupgated for each target
    fcg_NH3=0
    if(debug):
        fcg_NH3=[get_total_fcupgated([r]) for r in runList_NH3]
    else:
        fcg_NH3 = get_total_fcupgated(runList_NH3); print("NH3 fcupgated calculated")
    fcg_ND3 = get_total_fcupgated(runList_ND3); print("ND3 fcupgated calculated")
    fcg_C = get_total_fcupgated(runList_C); print("C fcupgated calculated")
    fcg_CH = get_total_fcupgated(runList_CH); print("CH fcupgated calculated")
    fcg_MT = get_total_fcupgated(runList_MT); print("MT fcupgated calculated")
    if(not debug):
        assert(fcg_NH3>0)
    assert(fcg_ND3>0)
    assert(fcg_C>0)
    assert(fcg_CH>0)
    assert(fcg_MT>0)
    
    # Get the x-Q2 binning based on Sebastian
    df_seb = pd.read_csv("/work/clas12/users/gmat/CLAS12Analysis/macros/analysis/rg-c/toolkit/sebastian_A_LL_p.txt",sep=" ",names=["xmin","xmax","Q2min","Q2max","A_LL"])
    xbins = np.unique(np.array(df_seb["xmin"].to_list()+df_seb["xmax"].to_list()))
    #xbins = np.linspace(0,1,100)
    Q2bins = np.unique(np.array(df_seb["Q2min"].to_list()+df_seb["Q2max"].to_list()))
    # for each possible x-Q2 binning, calculate the DF
    cols=[]
    # -----------------------------------------
    if(debug):
        cols=["xmin","xmax","Q2min","Q2max"]
        for i in range(len(fcg_NH3)):
            cols+=["df_NH3_{}".format(i),"df_err_NH3_{}".format(i)]
        cols+=["df_ND3"]
        for i in range(len(fcg_NH3)):
            cols+=["N_NH3_{}".format(i)]
        cols+=["N_ND3","N_C","N_CH","N_MT","N_F"]
        for i in range(len(fcg_NH3)):
            cols+=["fcupgated_NH3_{}".format(i)]
        cols+=["fcupgated_ND3","fcupgated_C","fcupgated_CH","fcupgated_MT","fcupgated_F"]
        for i in range(len(fcg_NH3)):
            cols+=["n_NH3_{}".format(i)]
        cols+=["n_ND3","n_C","n_CH","n_MT","n_F",cookType]
    else:
        cols=["xmin","xmax","Q2min","Q2max","df_NH3","df_err_NH3","df_ND3","df_err_ND3",
                                   "N_NH3","N_ND3","N_C","N_CH","N_MT","N_F",
                                   "fcupgated_NH3","fcupgated_ND3","fcupgated_C","fcupgated_CH","fcupgated_MT","fcupgated_F",
                                   "n_NH3","n_ND3","n_C","n_CH","n_MT","n_F",
                                   "cookType"]
    # -----------------------------------------
    df_out = pd.DataFrame(columns=cols)
    
    h_NH3=0
    # -----------------------------------------
    if(debug):
        h_NH3 = [d.Filter(dis_cut).Histo2D(("h_NH3","h_NH3",len(xbins)-1,xbins,len(Q2bins)-1,Q2bins),"x","Q2") for d in df_NH3]
    else:
        h_NH3 = df_NH3.Filter(dis_cut).Histo2D(("h_NH3","h_NH3",len(xbins)-1,xbins,len(Q2bins)-1,Q2bins),"x","Q2")
    # -----------------------------------------
    h_ND3 = df_ND3.Filter(dis_cut).Histo2D(("h_ND3","h_ND3",len(xbins)-1,xbins,len(Q2bins)-1,Q2bins),"x","Q2")
    h_C = df_C.Filter(dis_cut).Histo2D(("h_C","h_C",len(xbins)-1,xbins,len(Q2bins)-1,Q2bins),"x","Q2")
    h_CH = df_CH.Filter(dis_cut).Histo2D(("h_CH","h_CH",len(xbins)-1,xbins,len(Q2bins)-1,Q2bins),"x","Q2")
    h_MT = df_MT.Filter(dis_cut).Histo2D(("h_MT","h_MT",len(xbins)-1,xbins,len(Q2bins)-1,Q2bins),"x","Q2")
    for i in range(len(xbins)-1):
        for j in range(len(Q2bins)-1):
            xmin=xbins[i]
            xmax=xbins[i+1]
            Q2min=Q2bins[j]
            Q2max=Q2bins[j+1]
            
            # -----------------------------------------
            N_NH3=0
            if(debug):
                N_NH3 = [h.GetBinContent(h.GetBin(i+1,j+1)) for h in h_NH3]
            else:
                N_NH3 = h_NH3.GetBinContent(h_NH3.GetBin(i+1,j+1))
            N_ND3 = h_ND3.GetBinContent(h_ND3.GetBin(i+1,j+1))
            
            
            N_C = h_C.GetBinContent(h_C.GetBin(i+1,j+1))
            N_CH = h_CH.GetBinContent(h_CH.GetBin(i+1,j+1))
            # DEBUG AS OFF 10/29/2022
            # Use Sebastian's hydrogen/deuteron ratios for N_CH
            # -----------------------------------------
            xc = 0.5*(xmin+xmax)
            yc = 0.5*(Q2min+Q2max)
            chcd=pd.read_csv("/work/clas12/users/gmat/CLAS12Analysis/macros/analysis/rg-c/toolkit/CH_CD.csv",sep="\t")
            df1=chcd[chcd.x==xc]
            if(len(df1)==0):
                continue
            df2=df1.iloc[(df1['Q2']-yc).abs().argsort()[:1]]
            factor=df2["CD2/CH2"].to_numpy()[0]
            N_CD = N_CH*factor
            # -----------------------------------------
        
            N_MT = h_MT.GetBinContent(h_MT.GetBin(i+1,j+1))
            # -----------------------------------------
            
            if(N_NH3==0 or N_ND3==0 or N_C==0 or N_CH==0 or N_MT==0):
                continue
            
            # -----------------------------------------
            n_NH3=0
            if(debug):
                n_NH3 = [N/fcg for N,fcg in zip(N_NH3,fcg_NH3)]
            else:
                n_NH3 = N_NH3/fcg_NH3
            n_ND3 = N_ND3/fcg_ND3
            n_C = N_C/fcg_C
            n_CH = N_CH/fcg_CH
            n_CD = N_CD/fcg_CH # DEBUG 10/29/2022 not sure what fcg to use
            n_MT = N_MT/fcg_MT
            
            s_nH3=0
            if(debug):
                s_NH3 = [np.sqrt(N)/fcg for N,fcg in zip(N_NH3,fcg_NH3)]
            else:
                s_NH3 = np.sqrt(N_NH3)/fcg_NH3
            s_ND3 = np.sqrt(N_ND3)/fcg_ND3
            s_C = np.sqrt(N_C)/fcg_C
            s_CH = np.sqrt(N_CH)/fcg_CH
            s_CD = np.sqrt(N_CD)/fcg_CH
            s_MT = np.sqrt(N_MT)/fcg_MT
            
            # -----------------------------------------
            
            # -----------------------------------------
            dilution_NH3=0
            if(debug):
                dilution_NH3,dilution_err_NH3 = np.array([DF("NH3",n,n_MT,n_CH,n_C,0,s,s_MT,s_CH,s_C,0) for n,s in zip(n_NH3,s_NH3)]).transpose()
            else:
                dilution_NH3,dilution_err_NH3 = DF("NH3",n_NH3,n_MT,n_CH,n_C,0,s_NH3,s_MT,s_CH,s_C,0)
                
            
            #dilution_ND3,dilution_err_ND3 = DF("ND3",n_ND3,n_MT,n_CH,n_C,0,s_ND3,s_MT,s_CH,s_C,0)
            # DEBUG 10/29/2022
            # Using N_CD
            dilution_ND3,dilution_err_ND3 = DF("ND3",n_ND3,n_MT,n_CD,n_C,0,s_ND3,s_MT,s_CD,s_C,0)
            # -----------------------------------------
            
            # -----------------------------------------
            row=0
            if(debug):
                row = [xmin,xmax,Q2min,Q2max]
                for d,de in zip(dilution_NH3,dilution_err_NH3):
                    row+=[d,de]
                row+=[dilution_ND3]
                row+=[N for N in N_NH3]
                row+=[N_ND3,N_C,N_CH,N_MT,0]
                row+=[fcg for fcg in fcg_NH3]
                row+=[fcg_ND3,fcg_C,fcg_CH,fcg_MT,0]
                row+=[n for n in n_NH3]
                row+=[n_ND3,n_C,n_CH,n_MT,0,cookType]
            else:
                row = [xmin,xmax,Q2min,Q2max,dilution_NH3,dilution_err_NH3,dilution_ND3,dilution_err_ND3,
                       N_NH3,N_ND3,N_C,N_CH,N_MT,0,
                       fcg_NH3,fcg_ND3,fcg_C,fcg_CH,fcg_MT,0,
                       n_NH3,n_ND3,n_C,n_CH,n_MT,0,cookType]
            # -----------------------------------------
            df_out.loc[len(df_out.index)] = row
            
    print("NH3 Runs:",runList_NH3)
    print("ND3 Runs:",runList_ND3)
    print("C Runs:",runList_C)
    print("CH Runs:",runList_CH)
    print("MT Runs:",runList_MT)
    return df_out

In [17]:
def f(x):
    return x, x+2

In [20]:
a,b=np.array([f(i) for i in range(3)]).transpose()

In [22]:
a,b

(array([0, 1, 2]), array([2, 3, 4]))

In [28]:
[(a,a+2) for a in range(2)]

[(0, 2), (1, 3)]