In [1]:
# Imports 
import os
import matplotlib.pyplot as plt
from array import array
from ROOT import TFile, TTree
import numpy as np
plt.rc('text', usetex=True)
import ROOT
import datetime
from tqdm import tqdm

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-c2gq3nwr because the default path (/home/jovyan/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


Welcome to JupyROOT 6.22/06


In [2]:
def get_dirs(PROJECT_NAME,MLmethod):
    pathtowork="/work/clas12/users/gmat/scipio/projects"
    pathtovolatile="/volatile/clas12/users/gmat/clas12analysis.sidis.data/rga/ML/projects"
    dirs = [d for d in os.listdir(f"{pathtovolatile}/{PROJECT_NAME}/{MLmethod}/postprocess_binned")]
    bindirs = []
    for d in dirs:
        if ("bru" in d or "sdbnd" in d):
            continue
        else:
            bindirs.append(f"{pathtovolatile}/{PROJECT_NAME}/{MLmethod}/postprocess_binned/{d}")
    outdir=f"{pathtowork}/{PROJECT_NAME}/{MLmethod}/acceptancePlots"
    return bindirs,outdir

In [3]:
pltfmt={
    "eE" : {"min": 0 , "max" : 10 , "bins": 100, "xtitle": "E(e') [GeV]"},
    "g1E" : {"min": 0, "max" : 1  , "bins": 100, "xtitle": "E(#gamma_{1}) [GeV]"},
    "g2E" : {"min": 0, "max" : 1  , "bins": 100, "xtitle": "E(#gamma_{2}) [GeV]"},
    "piE" : {"min": 0, "max" : 5  , "bins": 100, "xtitle": "E(#pi^{+}) [GeV]"},
    "eth" : {"min": 0 , "max" : 40*np.pi/180 , "bins": 100, "xtitle": "#theta(e')"},
    "g1th" : {"min": 0, "max" : 40*np.pi/180  , "bins": 100, "xtitle": "#theta(#gamma_{1})"},
    "g2th" : {"min": 0, "max" : 40*np.pi/180 , "bins": 100, "xtitle": "#theta(#gamma_{2})"},
    "pith" : {"min": 0, "max" : 40*np.pi/180 , "bins": 100, "xtitle": "#theta(#pi^{+})"},
    "ephi" : {"min": -np.pi  , "max" : np.pi , "bins": 100, "xtitle": "#phi(e')"},
    "g1phi" : {"min": -np.pi , "max" : np.pi   , "bins": 100, "xtitle": "#phi(#gamma_{1})"},
    "g2phi" : {"min": -np.pi , "max" : np.pi   , "bins": 100, "xtitle": "#phi(#gamma_{2})"},
    "piphi" : {"min": -np.pi , "max" : np.pi , "bins": 100, "xtitle": "#phi(#pi^{+})"},
    "trueeE" : {"min": 0 , "max" : 10 , "bins": 100, "xtitle": "True E(e') [GeV]"},
    "trueg1E" : {"min": 0, "max" : 1  , "bins": 100, "xtitle": "True E(#gamma_{1}) [GeV]"},
    "trueg2E" : {"min": 0, "max" : 1  , "bins": 100, "xtitle": "True E(#gamma_{2}) [GeV]"},
    "truepiE" : {"min": 0, "max" : 5  , "bins": 100, "xtitle": "True E(#pi^{+}) [GeV]"},
    "trueeth" : {"min": 0 , "max" : 40*np.pi/180, "bins": 100, "xtitle": "True #theta(e')"},
    "trueg1th" : {"min": 0, "max" : 40*np.pi/180  , "bins": 100, "xtitle": "True #theta(#gamma_{1})"},
    "trueg2th" : {"min": 0, "max" : 40*np.pi/180  , "bins": 100, "xtitle": "True #theta(#gamma_{2})"},
    "truepith" : {"min": 0, "max" : 40*np.pi/180 , "bins": 100, "xtitle": "True #theta(#pi^{+})"},
    "trueephi" : {"min": -np.pi  , "max" : np.pi  , "bins": 100, "xtitle": "True #phi(e')"},
    "trueg1phi" : {"min": -np.pi , "max" : np.pi   , "bins": 100, "xtitle": "True #phi(#gamma_{1})"},
    "trueg2phi" : {"min": -np.pi , "max" : np.pi   , "bins": 100, "xtitle": "True #phi(#gamma_{2})"},
    "truepiphi" : {"min": -np.pi , "max" : np.pi   , "bins": 100, "xtitle": "True #phi(#pi^{+})"},
    
    "x": {"min": 0 , "max": 1, "bins": 100, "xtitle": "x_{B}"},
    "Q2": {"min": 0 , "max": 10, "bins": 100, "xtitle": "Q^{2} [GeV^{2}]"},
    "Mx": {"min": -1 , "max": 4, "bins": 100, "xtitle": "M_{miss} [GeV]"},
    "z": {"min": 0 , "max": 1, "bins": 100, "xtitle": "z"},
    "Mgg": {"min": 0 , "max": 0.4, "bins": 100, "xtitle": "M_{#gamma#gamma} [GeV]"},
    "Mh": {"min": 0 , "max": 2, "bins": 100, "xtitle": "M_{#pi^{0}#pi^{+}} [GeV]"},
    "phi_h": {"min": -np.pi , "max": np.pi, "bins": 100, "xtitle": "#phi_{h}"},
    "phi_R0": {"min": -np.pi , "max": np.pi, "bins": 100, "xtitle": "#phi_{R}"},
    "phi_R1": {"min": -np.pi , "max": np.pi, "bins": 100, "xtitle": "#phi_{R}"},
    "th": {"min": 0 , "max": np.pi, "bins": 100, "xtitle": "#theta_{COM}"},
    "truex": {"min": 0 , "max": 1, "bins": 100, "xtitle": "True x_{B}"},
    "trueQ2": {"min": 0 , "max": 10, "bins": 100, "xtitle": "True Q^{2} [GeV^{2}]"},
    "trueMx": {"min": -1 , "max": 4, "bins": 100, "xtitle": "True M_{miss} [GeV]"},
    "truez": {"min": 0 , "max": 1, "bins": 100, "xtitle": "True z"},
    "trueMgg": {"min": 0 , "max": 0.4, "bins": 100, "xtitle": "True M_{#gamma#gamma} [GeV]"},
    "trueMh": {"min": 0 , "max": 2, "bins": 100, "xtitle": "True M_{#pi^{0}#pi^{+}} [GeV]"},
    "truephi_h": {"min": -np.pi , "max": np.pi, "bins": 100, "xtitle": "True #phi_{h}"},
    "truephi_R0": {"min": -np.pi , "max": np.pi, "bins": 100, "xtitle": "True #phi_{R}"},
    "truephi_R1": {"min": -np.pi , "max": np.pi, "bins": 100, "xtitle": "True #phi_{R}"},
    "trueth": {"min": 0 , "max": np.pi, "bins": 100, "xtitle": "True #theta_{COM}"}
}

In [4]:
def get_cut_from_datatype(datatypes):
    isMC=0
    for datatype in datatypes:
        if("MC" in datatype):
            isMC = 1
            break
            
    if(not isinstance(datatypes,list)):
        datatypes=[datatypes]
    cuts=[]
    for datatype in datatypes:
        if(datatype=="all" or datatype==""):
            cuts.append("")
        elif(datatype=="Fall2018_inbending"):
            cuts.append("run>=5032 && run<=5332")
        elif(datatype=="Fall2018_outbending"):
            cuts.append("run>=5333 && run<=5666")
        elif(datatype=="Spring2019_inbending"):
            cuts.append("run>=6616 && run<=6783")
        elif(datatype=="MC_inbending"):
            cuts.append("run==-11")
        elif(datatype=="MC_outbending"):
            cuts.append("run==11")
        else:
            print("ERROR in get_cut_from_datatype: Unknown datatype",datatype,"...setting no cut...")
            cuts.append("")
        if(cuts[-1]=="" and isMC==1):
            cuts[-1]="MCmatch==1&&Mgg>0.07&&Mgg<0.22"
        elif(cuts[-1]!="" and isMC==1):
            cuts[-1]+="&&MCmatch==1&&Mgg>0.07&&Mgg<0.22"
        elif(cuts[-1]=="" and isMC==0):
            cuts[-1]="Mgg>0.07&&Mgg<0.22"
        elif(cuts[-1]!="" and isMC==0):
            cuts[-1]+="&& Mgg>0.07&&Mgg<0.22"
    if(len(datatypes)==1):
        return cuts[0]
    else:
        return cuts

In [5]:
def get_sorted_plots(plots,skipEdges=False):
    # Create a new dict to store the sorted plots
    sorted_plots = []
    
    for plot in plots:
        sorted_plot={}
        # Get the sorted index of centers
        sorted_index = np.lexsort(tuple([plot["centers"][:,n] for n in range(plot["centers"].shape[1])]))
        if(skipEdges):
            ny = np.count_nonzero(plot["centers"][:,0]==plot["centers"][0][0])
            nx = int(len(plot["centers"])/ny)
            
            sorted_index=list(sorted_index)
            del sorted_index[::nx]
            del sorted_index[nx-2::nx-1]
            sorted_index=np.array(sorted_index)

        # Iterate through the keys in plots
        for key in plot.keys():
            if key == 'name':
                sorted_plot['name']=plot[key]
                continue
            if(key=="hists" or key=="hists_ML" or key=="hists_noML"):
                arr=[plot[key][si] for si in sorted_index]
                sorted_plot[key]=arr
            else:
                sorted_plot[key]=plot[key][sorted_index]
        sorted_plots.append(sorted_plot)
    
    return sorted_plots

In [35]:
def collect_many(input_dir,prefix,datatypes):
    
    nCuts=len(datatypes)
    
    name_list = []
    min_list = []
    max_list = []
    center_list = []
    hlist = []
    hlist_ML = []
    hlist_noML = []
    cuts = get_cut_from_datatype(datatypes)
    
    for file in os.listdir(f"{input_dir}/"):
        # Only analyze .root files
        if not file.endswith(".root"):
            continue
        if(not file.startswith(prefix)):
                continue
        
        # Open the .root file
        tfile = ROOT.TFile(f"{input_dir}/{file}","READ")
        print("Reading TFile",file)
        
        # Loop over all TTrees in the .root files
        treenames = [ttree.GetName().replace(f"{prefix}_","") for ttree in tfile.GetListOfKeys()]
        treenames = np.unique(treenames)
        
        
        for i,treename in enumerate(treenames):
            
            #print("\tTTree",i+1,"of",len(treenames))
            ttree = tfile.Get(treename)
            # From the tree name determine the binning
            treename = ttree.GetName()
            splitname = treename.split('_')
            name_list.append(splitname[0::3])
            min_list.append(np.array(splitname[1::3],dtype=float))
            max_list.append(np.array(splitname[2::3],dtype=float))
            center_list.append(0.5*(min_list[-1]+max_list[-1]))
            
            # Create an armada of histograms for each branch
            branches = [branch.GetName() for branch in ttree.GetListOfBranches()]
            branches = [branch for branch in branches if branch in list(pltfmt.keys())] # Only use branches declared in pltfmt
            
            hists={}
            hists_ML={}
            hists_noML={}
            #df = ROOT.RDataFrame(treename,f"{input_dir}/{file}")
            
            hhisto1ds=[]
            hhisto1ds_ML=[]
            hhisto1ds_noML=[]
            with tqdm(total=len(cuts)*len(branches),desc=f"TTree {i+1} of {len(treenames)}") as pbar:
                for cut in cuts:
                    histo1ds=[]
                    histo1ds_ML=[]
                    histo1ds_noML=[]
                    for branch in branches:
                        pbar.update(1)
                        NOW = datetime.datetime.now()
                        low = pltfmt[branch]["min"]
                        high = pltfmt[branch]["max"]
                        bins = pltfmt[branch]["bins"]
    #                     if(cut):
    #                         histo1ds.append(df.Filter(cut).Histo1D((f"{branch}_{treename}_{NOW}_{cut}","",bins,low,high),branch))
    #                         histo1ds_ML.append(df.Filter(cut+"&&prob_g1>0.9&&prob_g2>0.9").Histo1D((f"{branch}_{treename}_{NOW}_{cut}_ML","",bins,low,high),branch))
    #                         histo1ds_noML.append(df.Filter(cut+"&&isGoodEventWithoutML==1").Histo1D((f"{branch}_{treename}_{NOW}_{cut}_noML","",bins,low,high),branch))
    #                     else:
    #                         histo1ds.append(df.Histo1D((f"{branch}_{treename}_{NOW}_all","",bins,low,high),branch))
    #                         histo1ds_ML.append(df.Filter("prob_g1>0.9&&prob_g2>0.9").Histo1D((f"{branch}_{treename}_{NOW}_{cut}_ML","",bins,low,high),branch))
    #                         histo1ds_noML.append(df.Filter("isGoodEventWithoutML==1").Histo1D((f"{branch}_{treename}_{NOW}_{cut}_noML","",bins,low,high),branch))
                        if(cut):
                            histo1ds.append(ROOT.TH1F(f"{branch}_{treename}_{NOW}_{cut}","",bins,low,high))
                            ttree.Draw(f"{branch}>>{branch}_{treename}_{NOW}_{cut}",cut,"goff")     
                            histo1ds[-1].SetDirectory(0)
                            histo1ds[-1].Sumw2()
                            
                            histo1ds_ML.append(ROOT.TH1F(f"{branch}_{treename}_{NOW}_{cut}_ML","",bins,low,high))
                            ttree.Draw(f"{branch}>>{branch}_{treename}_{NOW}_{cut}_ML",cut+"&&prob_g1>0.9&&prob_g2>0.9","goff")
                            histo1ds_ML[-1].SetDirectory(0)
                            histo1ds_ML[-1].Sumw2()
                            
                            histo1ds_noML.append(ROOT.TH1F(f"{branch}_{treename}_{NOW}_{cut}_noML","",bins,low,high))
                            ttree.Draw(f"{branch}>>{branch}_{treename}_{NOW}_{cut}_noML",cut+"&&isGoodEventWithoutML==1","goff")
                            histo1ds_noML[-1].SetDirectory(0)
                            histo1ds_noML[-1].Sumw2()
                            
                        else:
                            histo1ds.append(ROOT.TH1F(f"{branch}_{treename}_{NOW}_all","",bins,low,high))
                            ttree.Draw(f"{branch}>>{branch}_{treename}_{NOW}_all","","goff")
                            histo1ds[-1].SetDirectory(0)
                            histo1ds[-1].Sumw2()
                            
                            histo1ds_ML.append(ROOT.TH1F(f"{branch}_{treename}_{NOW}_{cut}_ML","",bins,low,high))
                            ttree.Draw(f"{branch}>>{branch}_{treename}_{NOW}_{cut}_ML","prob_g1>0.9&&prob_g2>0.9","goff")
                            histo1ds_ML[-1].SetDirectory(0)
                            histo1ds_ML[-1].Sumw2()
                            
                            histo1ds_noML.append(ROOT.TH1F(f"{branch}_{treename}_{NOW}_{cut}_noML","",bins,low,high))
                            ttree.Draw(f"{branch}>>{branch}_{treename}_{NOW}_{cut}_noML","isGoodEventWithoutML==1","goff")
                            histo1ds_noML[-1].SetDirectory(0)
                            histo1ds_noML[-1].Sumw2()
                        
                    hhisto1ds.append(histo1ds)
                    hhisto1ds_ML.append(histo1ds_ML)
                    hhisto1ds_noML.append(histo1ds_noML)
                for cut,suffix,_histo1ds in zip(cuts,datatypes,hhisto1ds):
                    for branch,histo1d in zip(branches,_histo1ds):
                        #h = histo1d.GetValue()
                        #h.Sumw2()
                        #h.SetDirectory(0)
                        if(cut):
                            hists[f"{branch}_{suffix}"]=histo1d
                        else:
                            hists[f"{branch}_all"]=histo1d
                for cut,suffix,_histo1ds in zip(cuts,datatypes,hhisto1ds_ML):
                    for branch,histo1d in zip(branches,_histo1ds):
                        #h = histo1d.GetValue()
                        #h.Sumw2()
                        #h.SetDirectory(0)
                        if(cut):
                            hists_ML[f"{branch}_{suffix}"]=histo1d
                        else:
                            hists_ML[f"{branch}_all"]=histo1d
                for cut,suffix,_histo1ds in zip(cuts,datatypes,hhisto1ds_noML):
                    for branch,histo1d in zip(branches,_histo1ds):
                        #h = histo1d.GetValue()
                        #h.Sumw2()
                        #h.SetDirectory(0)
                        if(cut):
                            hists_noML[f"{branch}_{suffix}"]=histo1d
                        else:
                            hists_noML[f"{branch}_all"]=histo1d
            hlist.append(hists)
            hlist_ML.append(hists_ML)
            hlist_noML.append(hists_noML)
        #tfile.Close()

    min_list=np.array(min_list)
    max_list=np.array(max_list)
    center_list=np.array(center_list)
    
    unique_name_list = list(set([tuple(sublist) for sublist in name_list]))
    
    plots=[]
    for uname in unique_name_list:
        name=list(uname)
        plotdict={}
        plotdict["name"]=name
        BOOL =  [True if n == name else False for n in name_list]
        left = min_list[BOOL]
        right = max_list[BOOL]
        center = center_list[BOOL]
        hists = []
        hists_ML = []
        hists_noML = []
        for i,b in enumerate(BOOL):
            if b:
                hists.append(hlist[i])
                hists_ML.append(hlist_ML[i])
                hists_noML.append(hlist_noML[i])
        plotdict["leftEdge"]=left
        plotdict["rightEdge"]=right
        plotdict["centers"]=center
        plotdict["hists"]=hists
        plotdict["hists_ML"]=hists_ML
        plotdict["hists_noML"]=hists_noML
        plots.append(plotdict)
    
    full_plots=get_sorted_plots(plots,False)
    abbrev_plots=get_sorted_plots(plots,True)
    return full_plots, abbrev_plots

In [36]:
#ROOT.EnableImplicitMT(4)

In [37]:
PROJECT_NAME="pipluspi0_noresonance_micro"
#PROJECT_NAME="pipluspi0_prelim"
MLmethod="catboost"
bin_dirs, outdir = get_dirs(PROJECT_NAME,MLmethod)
histnames = [key for key in pltfmt.keys()]
DATATYPE=["all", "Fall2018_inbending", "Fall2018_outbending", "Spring2019_inbending"]
MCTYPE=["all", "Fall2018_inbending", "Fall2018_outbending", "Spring2019_inbending"]

In [28]:
#full_plots_data[1]["hists_noML"][5]["x_all"].GetEntries()

In [29]:


def th1_clone(h):
    name = h.GetName()
    title = h.GetTitle()
    bins = h.GetNbinsX()
    xmin = h.GetXaxis().GetXmin()
    xmax = h.GetXaxis().GetXmax()
    
    hclone = ROOT.TH1F(name+"_clone",title,bins,xmin,xmax)
    hclone.SetDirectory(0)
    for i in range(bins):
        hclone.SetBinContent(i+1,h.GetBinContent(i+1))
        hclone.SetBinError(i+1,h.GetBinError(i+1))
    clonehist.append(hclone)
    return hclone

In [30]:
def make_binned_acceptance_plots(params_data=0,params_MC=0,histname="",histname_data="",histname_MC="",OUTDIR="",skipEdges=False,dodiff=False,rescale_y=True,boxy=True):
    if("true" in histname):
        return
    
    global clonehist
    clonehist=[]
    ROOT.gStyle.SetOptStat(0)
    
    for idx_data in range(len(params_data)):
        
        plot=params_data[idx_data]
        
        # Make output directory
        namelist = "dim{}".format(len(plot["name"]))
        for name in plot["name"]:
            namelist+=f"_{name}"
        outdir=OUTDIR+"/"+namelist+"/"+("ratio" if dodiff else "overlay")+"/"+("abbrev" if skipEdges else "full")+"/"+("rescale" if rescale_y else "default")
        
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        
        dim=len(plot["leftEdge"][0])
        if(dim>2):
            print("Can only do Mdiphoton plotting for dim<=2...Aborting...")
            break

        # Get xmin and xmax
        leftEdge=np.unique(plot["leftEdge"][:,0])
        rightEdge=np.unique(plot["rightEdge"][:,0])
        xmin=leftEdge[0]
        xmax=rightEdge[-1]
        if(dim==2):
            bottomEdge=np.unique(plot["leftEdge"][:,1])
            topEdge=np.unique(plot["rightEdge"][:,1])
            ymin = bottomEdge[0]
            ymax = topEdge[-1]
            ybins = sorted(np.unique(np.concatenate([bottomEdge,topEdge])))
        # Set number of xbins
        nxbins=len(leftEdge)
        # Set number of ybins
        nybins=1
        if(dim==2):
            nybins=len(topEdge)


        # Set global parameters
        cleft=-20
        cright=20
        cup=0.6
        cdown=-1
        xaxis_h = -0.8
        if(dim==1):
            xaxis_l = -18
        else:
            xaxis_l = -15
        
        xaxis_r = 14
        padtop=0.8
        padbot=0.2

        padtopscale=(cup-cdown)*padtop+cdown
        padbotscale=(cup-cdown)*padbot+cdown
        scalexaxis_h = (xaxis_h-cdown)/(cup-cdown)

        
        #Create TCanvas
        xsizeperbox=200
        ysizeperbox=250
        if(dim==2):
            c=ROOT.TCanvas("c","c",xsizeperbox*nxbins,ysizeperbox*nybins)
        else:
            c=ROOT.TCanvas("c","c",2000,800)
        c.Range(cleft,cdown,cright,cup)
        
        
        # Set TPad box size
        boxheight=(padtop-padbot)/nybins
        boxheightscale=(padtopscale-padbotscale)/nybins

        # Get bins
        bins = sorted(np.unique(np.concatenate([leftEdge,rightEdge])))
        truebins = bins
        if(boxy):
            bins = np.linspace(bins[0],bins[-1],len(bins),endpoint=True)
        # Create axes
        # If dimension == 2, make a y axis as well
        if(boxy):
            xaxis=ROOT.TGaxis(xaxis_l,xaxis_h,xaxis_r,xaxis_h,0,nxbins,500+nxbins,"<")
            for ib,b in enumerate(truebins):
                xaxis.ChangeLabel(ib+1,-1,-1,-1,-1,-1,str(np.round(b,3)))
        else:
            xaxis=ROOT.TGaxis(xaxis_l,xaxis_h,xaxis_r,xaxis_h,xmin,xmax,510,"<")
        xaxis.SetTitle(plot["name"][0])
        xaxis.Draw()

        if(dim==2):
            if(boxy):
                yaxis=ROOT.TGaxis(xaxis_l-1.75,padbotscale,xaxis_l-1.75,padtopscale,0,nybins,500+nybins,"")
                for ib,b in enumerate(ybins):
                    yaxis.ChangeLabel(ib+1,-1,-1,-1,-1,-1,str(np.round(b,3)))
            else:
                yaxis=ROOT.TGaxis(xaxis_l-1.75,padbotscale,xaxis_l-1.75,padtopscale,ymin,ymax,508,"")
            if(plot["name"][1]=="Mh"):
                yaxis.SetTitle("M_{#pi^{+}#pi^{0}}[GeV]")
            else:
                yaxis.SetTitle(plot["name"][1])
            yaxis.Draw("same")

        # Scale bins to axes
        bins_scaled =(( (bins - np.min(bins)) * (xaxis_r - (xaxis_l)) / (np.max(bins) - np.min(bins)) + (xaxis_l)) - cleft)/ (cright-cleft) 
        bins_scaled_v2 =( (bins - np.min(bins)) * (xaxis_r - (xaxis_l)) / (np.max(bins) - np.min(bins)) + (xaxis_l))
        
        latex=ROOT.TLatex()
        
   
        # Find the ymax for the fits
        ymin=0
        ymax=1
        if(rescale_y):
            for hh in plot["hists"]:
                h=hh[histname_data]
                if(h.GetMaximum()>ymax):
                    ymax=h.GetMaximum()*1.1
        if(dodiff):
            ymin=1e-3
            ymax=2
        
        # For each mdiphoton
        xxaxis=[]
        yyaxis=[]
        lines=[]
        i=0
        inc=0
        ix,iy=0,0
        for hh_idx in range(len(plot["hists"])):
            hh=plot["hists"][hh_idx]
            if(params_MC!=0):
                for idx_mc in range(len(params_MC)):
                    # Since the index of the MC plot may not be aligned with the index of the data plot
                    # we have this extra code to match them
                    if(params_MC[idx_mc]["name"]==params_data[idx_data]["name"]):
                        h_mc=th1_clone(params_MC[idx_mc]["hists"][hh_idx][histname_MC]) # w/ detector smearing
                        h_mc_gen=th1_clone(params_MC[idx_mc]["hists"][hh_idx]["true"+histname_MC]) # w/o detector smearing
                        h_mc_ML=th1_clone(params_MC[idx_mc]["hists_ML"][hh_idx][histname_MC])
                        h_mc_noML=th1_clone(params_MC[idx_mc]["hists_noML"][hh_idx][histname_MC])
            
            if(dodiff==False):
                h_mc_gen.SetTitle("")
                h_mc_gen.GetXaxis().SetLabelSize(0)
                h_mc_gen.GetXaxis().SetNdivisions(0)
                h_mc_gen.GetYaxis().SetNdivisions(0)
            else:
                h_mc_ML.SetTitle("")
                h_mc_ML.GetXaxis().SetLabelSize(0)
                h_mc_ML.GetXaxis().SetNdivisions(0)
                h_mc_ML.GetYaxis().SetNdivisions(0)
            
            h=hh[histname_data]
            if(ix==nxbins):
                ix=0
                iy+=1
                
            xxaxis.append(ROOT.TGaxis(bins_scaled_v2[ix],padbotscale,bins_scaled_v2[ix+1],padbotscale,h.GetXaxis().GetXmin(),h.GetXaxis().GetXmax(),506,"S"))
            if(dodiff):
                yyaxis.append(ROOT.TGaxis(bins_scaled_v2[ix],padbotscale+boxheightscale*iy,bins_scaled_v2[ix],padbotscale+boxheightscale*(iy+1),ymin,ymax,505,"SG"))
            else:
                yyaxis.append(ROOT.TGaxis(bins_scaled_v2[ix],padbotscale+boxheightscale*iy,bins_scaled_v2[ix],padbotscale+boxheightscale*(iy+1),ymin,ymax,505,"S"))
            xxaxis[i].SetLabelSize(0.03)
            yyaxis[i].SetLabelFont(42)
            yyaxis[i].SetLabelSize(0)
            yyaxis[i].SetTickLength(0.02)
            if(ix!=nxbins-1):
                xxaxis[i].SetLabelSize(0)
                
                
            tpad = ROOT.TPad("","",bins_scaled[ix],padbot+boxheight*iy,bins_scaled[ix+1],padbot+boxheight*(iy+1))
            tpad.Draw("same")
            tpad.cd()
            tpad.SetLeftMargin(0)
            tpad.SetRightMargin(0)

            tpad.SetTopMargin(0)
            tpad.SetBottomMargin(0)

            if(dodiff):
                if(h_mc.Integral()!=0):
                    h_mc_ML.Divide(h_mc)
                    h_mc_noML.Divide(h_mc)
                h_mc_ML.SetLineColor(ROOT.kViolet+1)
                h_mc_noML.SetLineColor(ROOT.kGreen+2)
                h_mc_ML.Draw("hist E1")
                h_mc_noML.Draw("hist E1 same")
                h_mc_ML.GetYaxis().SetRangeUser(ymin,ymax)
                ROOT.gPad.SetLogy()
                lines.append(ROOT.TLine(h_mc_ML.GetXaxis().GetXmin(),1,h_mc_ML.GetXaxis().GetXmax(),1))
                lines[i].SetLineStyle(7)
                lines[i].SetLineColor(8)
                lines[i].Draw("same")
            else:
                h_mc.SetLineColor(ROOT.kOrange-3)
                h_mc_gen.SetLineColor(ROOT.kOrange+4)
                h_mc_ML.SetLineColor(ROOT.kAzure-3)
                h_mc_noML.SetLineColor(ROOT.kAzure+3)
                MAX = 0
                for k in range(h_mc_gen.GetNbinsX()):
                    if(MAX<h_mc_gen.GetBinContent(k+1)):
                        MAX=h_mc_gen.GetBinContent(k+1)
                    if(MAX<h_mc.GetBinContent(k+1)):
                        MAX=h_mc.GetBinContent(k+1)
                
                h_mc_gen.GetYaxis().SetRangeUser(ymin,MAX*1.1)
                h_mc_gen.Draw("hist")
                h_mc.Draw("hist same")
                h_mc_ML.Draw("hist same")
                h_mc_noML.Draw("hist same")
                
                
            
            
            c.cd()
            xxaxis[i].SetLabelFont(42)
            xxaxis[i].SetTickLength(0.06)
            xxaxis[i].Draw("same")
            if(dodiff):
                if(ix==0 and iy==0):
                    yyaxis[i].SetLabelSize(0.03)
            yyaxis[i].Draw("same")
            
            i=i+1
            inc=inc+1
            ix+=1
            
        latex.SetTextFont(42)
        latex.DrawLatexNDC(0.87,padbot-0.02,pltfmt[histname]["xtitle"])
        legend=ROOT.TLegend(0.86,padbot+0.05,0.99,padbot+0.2)
        hclone=h.Clone()
        h_mcclone=h_mc.Clone()
        h_mc_genclone=h_mc_gen.Clone()
        h_mc_MLclone=h_mc_ML.Clone()
        h_mc_noMLclone=h_mc_noML.Clone()
        hclone.SetLineWidth(2)
        h_mcclone.SetLineWidth(2)
        h_mc_genclone.SetLineWidth(2)
        h_mc_MLclone.SetLineWidth(2)
        h_mc_noMLclone.SetLineWidth(2)
        legend.SetTextSize(0.04)
        if(dodiff==False):
            legend.AddEntry(h_mc_genclone,"True MC","l")
            legend.AddEntry(h_mcclone,"Smear MC ","l")
            legend.AddEntry(h_mc_MLclone,"CatBoost Signal","l")
            legend.AddEntry(h_mc_noMLclone,"Traditional Signal ","l")
        else:
            legend.AddEntry(h_mc_MLclone,"ML sig/Data","l")
            legend.AddEntry(h_mc_noMLclone,"Trad. sig/Data","l")
        legend.SetBorderSize(0)
        legend.Draw("same")
        
        subdir = ""
        for n in plot["name"]:
            subdir+=n+"_"
        if(skipEdges):
            subdir+="abbrev"
        else:
            subdir+="full"
        if(dodiff):
            subdir+="_diff"
        HISTNAME=histname
        if(boxy):
            HISTNAME="boxy_"+histname
        # Create subdirectory to store plots
        c.SaveAs(f"{outdir}/{HISTNAME}.png")

In [31]:
PROJECT_NAME="pipluspi0_noresonance_micro"
MLmethod="catboost"
bin_dirs, outdir = get_dirs(PROJECT_NAME,MLmethod)
histnames = [key for key in pltfmt.keys()]
DATATYPE=["all", "Fall2018_inbending", "Fall2018_outbending", "Spring2019_inbending"]
MCTYPE=["all", "Fall2018_inbending", "Fall2018_outbending", "Spring2019_inbending"]

for d in bin_dirs:
    full_plots_data, abbrev_plots_data = collect_many(d,
                                                      "nSidis",
                                                      DATATYPE)
#     full_plots_MC, abbrev_plots_MC = collect_many(d,
#                                                   "MC",
#                                                   MCTYPE)

    for datatype,mctype in zip(DATATYPE,MCTYPE):
        # mkdir
        if(not os.path.exists(f"{outdir}/{datatype}/")):
                os.makedirs(f"{outdir}/{datatype}")
        for histname in histnames:
                for B in [False,True]: # Boxy
                    for R in [False]: # Determine whether to rescale the y axis for all plots
                        for X in [False, True]: # Determine whether to make each plot an equal grid size
                            make_binned_acceptance_plots(params_data=full_plots_data,
                                              params_MC=full_plots_data,   # could refactor later, but just using params_MC
                                              histname=histname,
                                              histname_data=histname+"_"+datatype,
                                              histname_MC=histname+"_"+mctype,
                                              OUTDIR=outdir+"/"+datatype,
                                              skipEdges=False, # Since params is full
                                              dodiff=True,
                                              rescale_y=R,boxy=X)
                            make_binned_acceptance_plots(params_data=abbrev_plots_data,
                                              params_MC=abbrev_plots_data,
                                              histname=histname,
                                              histname_data=histname+"_"+datatype,
                                              histname_MC=histname+"_"+mctype,
                                              OUTDIR=outdir+"/"+datatype,
                                              skipEdges=True, # Since params is abbreviated
                                              dodiff=True,
                                              rescale_y=R,boxy=B)

TTree 1 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1507.82it/s]
TTree 2 of 24:   0%|                                   | 0/176 [00:00<?, ?it/s]

Reading TFile nSidis_x_Mh_binned.root


TTree 2 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1564.32it/s]
TTree 3 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1639.44it/s]
TTree 4 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1492.06it/s]
TTree 5 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1468.14it/s]
TTree 6 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1601.40it/s]
TTree 7 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1388.31it/s]
TTree 8 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1415.14it/s]
TTree 9 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1654.17it/s]
TTree 10 of 24: 100%|██████████████████████| 176/176 [00:00<00:00, 1472.03it/s]
TTree 11 of 24: 100%|██████████████████████| 176/176 [00:00<00:00, 1456.23it/s]
TTree 12 of 24: 100%|██████████████████████| 176/176 [00:00<00:00, 1647.16it/s]
TTree 13 of 24: 100%|██████████████████████| 176/176 [00:00<00:00, 1328.33it/s]
TTree 14 of 24: 100%|███████████████████

Reading TFile nSidis_z_Mh_binned.root


TTree 2 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1210.87it/s]
TTree 3 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1636.15it/s]
TTree 4 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1298.06it/s]
TTree 5 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1295.49it/s]
TTree 6 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1633.83it/s]
TTree 7 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1470.44it/s]
TTree 8 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1399.50it/s]
TTree 9 of 24: 100%|███████████████████████| 176/176 [00:00<00:00, 1608.48it/s]
TTree 10 of 24: 100%|██████████████████████| 176/176 [00:00<00:00, 1524.60it/s]
TTree 11 of 24: 100%|██████████████████████| 176/176 [00:00<00:00, 1498.92it/s]
TTree 12 of 24: 100%|██████████████████████| 176/176 [00:00<00:00, 1652.81it/s]
TTree 13 of 24: 100%|██████████████████████| 176/176 [00:00<00:00, 1523.64it/s]
TTree 14 of 24: 100%|███████████████████

Reading TFile nSidis_z_binned.root


TTree 1 of 8: 100%|█████████████████████████| 176/176 [00:00<00:00, 798.49it/s]
TTree 2 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1034.40it/s]
TTree 3 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1207.31it/s]
TTree 4 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1288.16it/s]
TTree 5 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1268.76it/s]
TTree 6 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1378.63it/s]
TTree 7 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1421.43it/s]
TTree 8 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1435.77it/s]
TTree 1 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1354.51it/s]
TTree 2 of 8:   0%|                                    | 0/176 [00:00<?, ?it/s]

Reading TFile nSidis_x_binned.root


TTree 2 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1395.84it/s]
TTree 3 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1223.91it/s]
TTree 4 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1192.38it/s]
TTree 5 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1025.17it/s]
TTree 6 of 8: 100%|█████████████████████████| 176/176 [00:00<00:00, 931.15it/s]
TTree 7 of 8: 100%|█████████████████████████| 176/176 [00:00<00:00, 935.33it/s]
TTree 8 of 8: 100%|████████████████████████| 176/176 [00:00<00:00, 1471.73it/s]
TTree 1 of 9: 100%|████████████████████████| 176/176 [00:00<00:00, 1014.30it/s]
TTree 2 of 9:   0%|                                    | 0/176 [00:00<?, ?it/s]

Reading TFile nSidis_Mh_binned.root


TTree 2 of 9: 100%|█████████████████████████| 176/176 [00:00<00:00, 919.13it/s]
TTree 3 of 9: 100%|████████████████████████| 176/176 [00:00<00:00, 1000.34it/s]
TTree 4 of 9: 100%|████████████████████████| 176/176 [00:00<00:00, 1065.27it/s]
TTree 5 of 9: 100%|████████████████████████| 176/176 [00:00<00:00, 1195.60it/s]
TTree 6 of 9: 100%|████████████████████████| 176/176 [00:00<00:00, 1352.08it/s]
TTree 7 of 9: 100%|████████████████████████| 176/176 [00:00<00:00, 1535.49it/s]
TTree 8 of 9: 100%|████████████████████████| 176/176 [00:00<00:00, 1613.47it/s]
TTree 9 of 9: 100%|████████████████████████| 176/176 [00:00<00:00, 1645.66it/s]
Info in <TCanvas::Print>: png file /work/clas12/users/gmat/scipio/projects/pipluspi0_noresonance_micro/catboost/acceptancePlots/all/dim2_z_Mh/ratio/full/default/eE.png has been created
Info in <TCanvas::Print>: png file /work/clas12/users/gmat/scipio/projects/pipluspi0_noresonance_micro/catboost/acceptancePlots/all/dim2_x_Mh/ratio/full/default/eE.png has bee

In [12]:
full_plots_data

NameError: name 'full_plots_data' is not defined

In [None]:
PROJECT_NAME="piminuspi0_prelim"
MLmethod="catboost"
bin_dirs, outdir = get_dirs(PROJECT_NAME,MLmethod)
histnames = [key for key in pltfmt.keys()]
DATATYPE=["all", "Fall2018_inbending", "Fall2018_outbending", "Spring2019_inbending"]
MCTYPE=["all", "Fall2018_inbending", "Fall2018_outbending", "Spring2019_inbending"]

for d in bin_dirs:
    full_plots_data, abbrev_plots_data = collect_many(d,
                                                      "nSidis",
                                                      DATATYPE)
#     full_plots_MC, abbrev_plots_MC = collect_many(d,
#                                                   "MC",
#                                                   MCTYPE)

    for datatype,mctype in zip(DATATYPE,MCTYPE):
        # mkdir
        if(not os.path.exists(f"{outdir}/{datatype}/")):
                os.makedirs(f"{outdir}/{datatype}")
        for histname in histnames:
                for B in [False,True]: # Boxy
                    for R in [False]: # Determine whether to rescale the y axis for all plots
                        for X in [False, True]: # Determine whether to make each plot an equal grid size
                            make_binned_acceptance_plots(params_data=full_plots_data,
                                              params_MC=full_plots_data,   # could refactor later, but just using params_MC
                                              histname=histname,
                                              histname_data=histname+"_"+datatype,
                                              histname_MC=histname+"_"+mctype,
                                              OUTDIR=outdir+"/"+datatype,
                                              skipEdges=False, # Since params is full
                                              dodiff=True,
                                              rescale_y=R,boxy=X)
                            make_binned_acceptance_plots(params_data=abbrev_plots_data,
                                              params_MC=abbrev_plots_data,
                                              histname=histname,
                                              histname_data=histname+"_"+datatype,
                                              histname_MC=histname+"_"+mctype,
                                              OUTDIR=outdir+"/"+datatype,
                                              skipEdges=True, # Since params is abbreviated
                                              dodiff=True,
                                              rescale_y=R,boxy=B)

Reading TFile nSidis_Mh_binned.root
	TTree 1 of 9
	TTree 2 of 9
	TTree 3 of 9
	TTree 4 of 9
	TTree 5 of 9
	TTree 6 of 9
	TTree 7 of 9
	TTree 8 of 9
	TTree 9 of 9
Reading TFile nSidis_x_binned.root
	TTree 1 of 8
	TTree 2 of 8
	TTree 3 of 8
	TTree 4 of 8
	TTree 5 of 8
	TTree 6 of 8
	TTree 7 of 8
	TTree 8 of 8
Reading TFile nSidis_z_binned.root
	TTree 1 of 8
	TTree 2 of 8
	TTree 3 of 8
	TTree 4 of 8
	TTree 5 of 8
	TTree 6 of 8
	TTree 7 of 8
	TTree 8 of 8
Reading TFile nSidis_z_Mh_binned.root
	TTree 1 of 24
	TTree 2 of 24
	TTree 3 of 24
	TTree 4 of 24
	TTree 5 of 24
	TTree 6 of 24
	TTree 7 of 24
	TTree 8 of 24
	TTree 9 of 24
	TTree 10 of 24
	TTree 11 of 24
	TTree 12 of 24
	TTree 13 of 24
	TTree 14 of 24
	TTree 15 of 24
	TTree 16 of 24
	TTree 17 of 24
	TTree 18 of 24
	TTree 19 of 24
	TTree 20 of 24
	TTree 21 of 24
	TTree 22 of 24
	TTree 23 of 24
	TTree 24 of 24
Reading TFile nSidis_x_Mh_binned.root
	TTree 1 of 24
	TTree 2 of 24
	TTree 3 of 24
	TTree 4 of 24
	TTree 5 of 24
	TTree 6 of 24
	TT

In [None]:
PROJECT_NAME="pipluspi0_prelim"
MLmethod="catboost"
bin_dirs, outdir = get_dirs(PROJECT_NAME,MLmethod)
histnames = [key for key in pltfmt.keys()]
DATATYPE=["all", "Fall2018_inbending", "Fall2018_outbending", "Spring2019_inbending"]
MCTYPE=["all", "Fall2018_inbending", "Fall2018_outbending", "Spring2019_inbending"]

for d in bin_dirs:
    full_plots_data, abbrev_plots_data = collect_many(d,
                                                      "nSidis",
                                                      DATATYPE)
#     full_plots_MC, abbrev_plots_MC = collect_many(d,
#                                                   "MC",
#                                                   MCTYPE)

    for datatype,mctype in zip(DATATYPE,MCTYPE):
        # mkdir
        if(not os.path.exists(f"{outdir}/{datatype}/")):
                os.makedirs(f"{outdir}/{datatype}")
        for histname in histnames:
                for B in [False,True]: # Boxy
                    for R in [False]: # Determine whether to rescale the y axis for all plots
                        for X in [False, True]: # Determine whether to make each plot an equal grid size
                            make_binned_acceptance_plots(params_data=full_plots_data,
                                              params_MC=full_plots_data,   # could refactor later, but just using params_MC
                                              histname=histname,
                                              histname_data=histname+"_"+datatype,
                                              histname_MC=histname+"_"+mctype,
                                              OUTDIR=outdir+"/"+datatype,
                                              skipEdges=False, # Since params is full
                                              dodiff=True,
                                              rescale_y=R,boxy=X)
                            make_binned_acceptance_plots(params_data=abbrev_plots_data,
                                              params_MC=abbrev_plots_data,
                                              histname=histname,
                                              histname_data=histname+"_"+datatype,
                                              histname_MC=histname+"_"+mctype,
                                              OUTDIR=outdir+"/"+datatype,
                                              skipEdges=True, # Since params is abbreviated
                                              dodiff=True,
                                              rescale_y=R,boxy=B)

TTree 1 of 8:   1%|▎                           | 2/176 [00:03<05:08,  1.77s/it]

Reading TFile nSidis_x_binned.root


TTree 1 of 8: 100%|██████████████████████████| 176/176 [08:16<00:00,  2.82s/it]
TTree 2 of 8: 100%|██████████████████████████| 176/176 [15:20<00:00,  5.23s/it]
TTree 3 of 8: 100%|██████████████████████████| 176/176 [18:43<00:00,  6.39s/it]
TTree 4 of 8:  84%|█████████████████████▊    | 148/176 [16:12<02:34,  5.52s/it]