In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import mplhep as hep
plt.style.use([hep.style.ROOT, hep.style.firamath])
from scipy.special import erf,erfinv
import ROOT
import os
import itertools
from scipy.optimize import curve_fit
from scipy.interpolate import UnivariateSpline
from scipy.interpolate import LinearNDInterpolator

ROOT.gStyle.SetOptStat(0)
ROOT.gStyle.SetOptTitle(0)

_basedir='/eos/cms/store/cmst3/group/hgcal/CMG_studies/psilva/DigiTester/2023Jun19/'

_outdir='/eos/user/p/psilva/www/HGCal/TimingStudies/2023Oct21'
os.system(f'mkdir -p {_outdir}')

pdgid_dict={22:r'$\gamma$',130:r'$K_L^0$',211:r'$\pi^\pm$'}
isStartUp=False
agetag='startup' if isStartUp else '3iab'
agetitle='Startup' if isStartUp else r'EOL (3 ab$^{-1}$)'

# 1. Parametrization of the time tagging efficiencies

In [None]:
from toa_param import _params as toa_param

def _evalParam(qtoa,eta,key=('SingleK0L','eff_W'),isStartUp=True):

    #build array of available pseudo-rapidities
    shower,pname=key

    available_eta=np.array([1.8,2.0,2.5,2.8])
    nrow,ncol=qtoa.shape[0],eta.shape[0]
    xy,z=[],[]
    for i,j in itertools.product(range(nrow),range(ncol)):
        ix=qtoa[i]
        iy=eta[j]
        xy.append( (ix,iy) )
        idx = (np.abs(iy - available_eta)).argmin()
        iy=available_eta[idx]
        shower,pname=key
        z.append( toa_param(q=ix, isStartUp=isStartUp)[shower][pname][iy] )

    
    #return array    
    z=np.array(z) 
    return xy,z

#build the dict of itnerpolators
interp={}
qtoa=np.linspace(12,100,100)
eta=np.array([1.5,1.8,2.0,2.2,2.5,2.6,2.8,2.9,3.0])
for pdgid,pname in itertools.product([22,130,211],
                                     ['eff_M','eff_O','eff_W','res_A','res_B','res_C','res_minE']):
    
        shower='SinglePhoton' if pdgid==22 else ('SingleK0L' if pdgid==130 else 'SinglePion')
        
        #interpolate
        xy,z = _evalParam(qtoa,eta,key=(shower,pname),isStartUp=isStartUp)
        interp[(shower,pname)] = LinearNDInterpolator(xy, z)
        X = np.linspace(qtoa[0],qtoa[-1])
        Y = np.linspace(1.5,3.0)
        X, Y = np.meshgrid(X, Y)  # 2D grid for interpolation
        Z = interp[(shower,pname)](X, Y)
                        
        #show plot
        fig,ax=plt.subplots(1,2,figsize=(16,8))
        
        for ieta in [1.8,2.0,2.5,2.8]:
            pvals=[toa_param(iq, isStartUp=isStartUp)[shower][pname][ieta] for iq in qtoa]
            ax[0].plot(qtoa,pvals,label=rf'$\eta={ieta:3.1f}$')
        ax[0].grid()
        ax[0].legend(title=shower)
        ax[0].set_xlabel(r'$q_{TOA}$ [fC]')
        ptitle=pname.replace('_','_{')+'}'
        ptitle=ptitle.replace('eff','\epsilon')
        ptitle=ptitle.replace('res','\sigma')
        ax[0].set_ylabel(rf"${ptitle}$")
        img=ax[1].pcolormesh(X, Y, Z, shading='auto')
        plt.colorbar(img)
        ax[1].grid()
        ax[1].set_xlabel(r'$q_{TOA}$ [fC]')
        ax[1].set_ylabel('Pseudo-rapidity')
        hep.cms.label(loc=0,rlabel='', ax=ax[0]) 
        ax[1].text(1.0,1.0,agetitle,va='bottom',ha='right',transform=ax[1].transAxes)
        fig.tight_layout()
        plt.savefig(f'{_outdir}/{shower}_{pname}_{agetag}.png')
        plt.close()

In [None]:
def timeTagEff(en,q=12,h=2.0,pdgid=22):

    """estimates the efficiency for toa measurement"""
    
    shower='SinglePhoton' if pdgid==22 else ('SingleK0L' if pdgid==130 else 'SinglePion')
    eff_M = interp[(shower,'eff_M')](q,h)
    eff_O = interp[(shower,'eff_O')](q,h)
    eff_W = interp[(shower,'eff_W')](q,h)
    
    x=((en-eff_O)/eff_W + 1)*erfinv(0.9);
    w=0.5*eff_M*(1+erf(x))
    return w


def timeResol(en,q=12,h=2.0,pdgid=22):
    
    """estimates the toa resolution"""

    shower='SinglePhoton' if pdgid==22 else ('SingleK0L' if pdgid==130 else 'SinglePion')

    res_A = interp[(shower,'res_A')](q,h)
    res_B = interp[(shower,'res_B')](q,h)
    res_C = interp[(shower,'res_C')](q,h)
    
    return np.sqrt(res_A*res_A/en + (res_B/en)**2 + res_C**2)


def timeResolWgt(en,q=12,h=2.0,pdgid=22):
    
    """returns a weight for time resolution"""
    
    dt=timeResol(en,q=q,h=h,pdgid=pdgid)
    eff_plateau=1.0;
    t=(90.-dt)/20;
    w=0.5*(1+eff_plateau*erf(t))
    return w

In [None]:
def drawParameterizedWeights(draw_eta=2.0):
    eta_label=rf' $|\eta|$={draw_eta:3.1f}'

    fig,ax=plt.subplots(1,3,figsize=(18,6),sharey=True)

    i=0
    for pdgid,label in pdgid_dict.items():
        xlin=np.linspace(0.5,50 if pdgid==22 else 75,500)
        for q0 in [12,50,75]:
            g=ax[i].plot(xlin,timeTagEff(xlin,q0,draw_eta,pdgid),label=rf'$\varepsilon_{{{q0}~fC}}$')[0]
            ax[i].plot(xlin,timeResolWgt(xlin,q0,draw_eta,pdgid),ls='--',c=g.get_color(),label=rf'$\delta t_{{{q0}~fC}}$')
        ax[i].legend(title=label + eta_label, loc='upper center',ncol=3,fontsize=16)
        ax[i].grid()
        ax[i].set_xlabel(r'Energy [GeV]')
        i+=1
    ax[0].set_ylabel('Weight')
    hep.cms.label(loc=0,rlabel='', ax=ax[0]) 
    ax[-1].text(1.0,1.0,agetitle,va='bottom',ha='right',transform=ax[-1].transAxes)        
    plt.ylim(0,1.5)
    plt.tight_layout()
    plt.savefig(f'{_outdir}/timetageff_{draw_eta*10:3.0f}_{agetag}.png')
    plt.close()
    
    #
    fig,ax=plt.subplots(1,3,figsize=(18,6),sharey=True)
    i=0
    for pdgid,label in pdgid_dict.items():
        xlin=np.linspace(0.5,25,500)
        ax[i].plot(xlin,timeResol(xlin,12,draw_eta,pdgid),label='12 fC')
        ax[i].plot(xlin,timeResol(xlin,24,draw_eta,pdgid),label='24 fC')
        ax[i].plot(xlin,timeResol(xlin,48,draw_eta,pdgid),label='48 fC')
        title=label + ' showers' + eta_label
        ax[i].legend(title=title,loc='upper right',fontsize=16)
        ax[i].grid()
        ax[i].set_xlabel(r'Energy [GeV]')
        ax[i].plot(xlin,90*np.ones_like(xlin),ls='--',c='gray')
        i+=1
        
    ax[0].set_ylabel('ToA resolution')
    hep.cms.label(loc=0,rlabel='', ax=ax[0]) 
    ax[-1].text(1.0,1.0,agetitle,va='bottom',ha='right',transform=ax[-1].transAxes)
    plt.ylim(0,200)
    plt.tight_layout()
    plt.savefig(f'{_outdir}/toaresol_{draw_eta*10:3.0f}_{agetag}.png')
    plt.close()
    
for draw_eta in [1.8,2.0,2.5,2.8]:
    drawParameterizedWeights(draw_eta)

In [None]:
#generate the 2D graph map to be used in the jet analysis
gr2d_eff = {}
for pdgid in pdgid_dict.keys():
    for q0 in [12,20,24,30,40,50,60,70,80,100]:
        grkey=f'pdg{pdgid}_{q0:d}'
        gr2d_eff[f'{grkey}_timetagwgt']=ROOT.TGraph2D()
        gr2d_eff[f'{grkey}_timeresolwgt']=ROOT.TGraph2D()
        for en in np.logspace(-1,3,100):
            for h in np.linspace(1.5,3.0,100):
                n=gr2d_eff[f'{grkey}_timetagwgt'].GetN()
                gr2d_eff[f'{grkey}_timetagwgt'].SetPoint(n,en,h,timeTagEff(en,q0,h,pdgid))
                gr2d_eff[f'{grkey}_timeresolwgt'].SetPoint(n,en,h,timeResolWgt(en,q0,h,pdgid))

#save all to file to use in jet analysis
fOut=ROOT.TFile.Open(f'{_outdir}/timeeff_{agetag}.root','RECREATE')
fOut.cd()
for k,gr in gr2d_eff.items():
    gr.Write(k)
fOut.Close()

# 2. Performance plots for the jet analysis

In [None]:
def compareConstituentsSpectra(tags=['pu','sk','fulltt_sk'],
                               tagTitles=['Raw','Soft-Killer','SK+Time tag'],
                               r=4,
                               qtoa=12,
                               basedir=_basedir+'/StartUp'):

    canvas=[]
    garbage=[]
    color_list=['#969696','#1a9641','#fdae61','#2c7bb6','#ffffff']
    for c,ctitle in [('chf','Charged hadrons'),('emf','e.m. particles'),('nhf','Neutral hadrons')]:

        #canvas.append( ROOT.TCanvas(c,c,1200,600))
        #canvas[-1].Divide(2,1)
        canvas.append( ROOT.TCanvas(c,c,800,800))
    
        cdfframe=None
        frame=None
        leg=None
        fIn=ROOT.TFile.Open(f'{basedir}/jets_ak{r}_vbfhgg_{qtoa}fC.root')

        
        for i,tag in enumerate(tags):
            
            color=ROOT.TColor.GetColor(color_list[i])
            title=tagTitles[i]
            
            hsig=fIn.Get(f'{tag}{c}_en')
            hsig.Rebin()
            hpu=fIn.Get(f"{tag}pu{c}_en")
            hpu.Rebin()
    
            if frame is None:
                #p=canvas[-1].cd(1)
                p=canvas[-1].cd()
                garbage.append( hpu.Clone(r'frame{c}') )
                garbage[-1].Reset('ICE')
                garbage[-1].SetDirectory(0)
                garbage[-1].Draw()
                garbage[-1].GetYaxis().SetRangeUser(1,1.2*max(hsig.GetMaximum(),hpu.GetMaximum()/10))
                frame=garbage[-1]
                frame.GetYaxis().SetRangeUser(1,1e5)
                frame.GetYaxis().SetTitleSize(0.04)
                frame.GetXaxis().SetTitleSize(0.04)
                p.SetLogy()

                garbage.append(ROOT.TLegend(0.6,0.65,0.9,0.92,ctitle))
                garbage[-1].SetBorderSize(0)
                garbage[-1].SetFillStyle(0)
                leg=garbage[-1]
            
                #p=canvas[-1].cd(2)
                #garbage.append(garbage[-2].Clone())
                #garbage[-1].SetDirectory(0)
                #garbage[-1].GetYaxis().SetRangeUser(0,1)
                #garbage[-1].Draw()
                #cdfframe=garbage[-1]
                #cdfframe.GetYaxis().SetTitle('CDF')
                #cdfframe.GetYaxis().SetTitleSize(0.04)
                #cdfframe.GetXaxis().SetTitleSize(0.04)

            def _cloneAndFormat(h,name,title,width,color,ls):
                h=h.Clone(name)
                h.Rebin()
                h.SetDirectory(0)
                h.SetTitle(title)
                h.SetLineWidth(width)
                h.SetLineColor(color)
                h.SetLineStyle(ls)
                return h
        
        
            #p=canvas[-1].cd(1)
            p=canvas[-1].cd()
            garbage.append( _cloneAndFormat(hsig,f'{hsig.GetName()}{r}',f'Sig', 3, color,1) )
            garbage[-1].SetLineWidth(3)
            garbage[-1].Draw('histsame')
            leg.AddEntry(garbage[-1],title,'l')
            garbage.append( _cloneAndFormat(hpu,f'{hpu.GetName()}{r}',f'PU / 10', 3, color,7) )
            garbage[-1].Scale(1./10)
            garbage[-1].Draw('histsame')
            leg.AddEntry(garbage[-1],title+'(PU)','l')
            p.SetGridx()
            p.SetGridy()
            p.SetLeftMargin(0.15)
            p.SetRightMargin(0.05)
            p.SetTopMargin(0.05)
        
            #p=canvas[-1].cd(2)
            #garbage.append( _cloneAndFormat(hsig,f'{hsig.GetName()}{r}_cdf',f'Sig', 2,color,1) )
            #garbage[-1].Scale(1./garbage[-1].Integral(0,garbage[-1].GetNbinsX()+1))
            #garbage[-1]=garbage[-1].GetCumulative()
            #garbage[-1].SetDirectory(0)
            #garbage[-1].Draw('histsame')
            #garbage.append( _cloneAndFormat(hpu,f'{hpu.GetName()}{r}_cdf',f'PU', 2,color,7) )
            #garbage[-1].Scale(1./garbage[-1].Integral(0,garbage[-1].GetNbinsX()+1))
            #garbage[-1]=garbage[-1].GetCumulative()
            #garbage[-1].SetDirectory(0)
            #garbage[-1].Draw('histsame')
            #p.SetGridx()
            #p.SetGridy()
            #p.SetLeftMargin(0.15)
            #p.SetRightMargin(0.05)
            #p.SetTopMargin(0.05)
        
        p=canvas[-1].cd(1)
        leg.Draw()
        txt=ROOT.TLatex()
        txt.SetNDC()
        txt.SetTextFont(43)
        txt.SetTextSize(24)
        garbage.append(txt)
        garbage.append(txt.DrawLatexNDC(0.15,0.96,'#bf{CMS} #it{Simulation Preliminary}'))
        garbage.append(txt.DrawLatexNDC(0.25,0.89,'VBF H#rightarrow #gamma#gamma, <PU>=140'))
        garbage.append(txt.DrawLatexNDC(0.25,0.83,f'q={qtoa}fC R=0.{r}'))
        txt.SetTextAlign(30)
        garbage.append(txt.DrawLatexNDC(0.95,0.96,'Phase-2 HGCal'))        
        canvas[-1].SaveAs(f'{_outdir}/{c}spectra_{qtoa}fC.png')
        canvas[-1].SaveAs(f'{_outdir}/{c}spectra_{qtoa}fC.pdf')
    return canvas,garbage

for qtoa in [12,24,50]:
    compareConstituentsSpectra(qtoa=qtoa)

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import mplhep as hep
plt.style.use([hep.style.ROOT, hep.style.firamath])
from scipy.special import erf,erfinv
import ROOT
import os
import itertools
from scipy.optimize import curve_fit
from scipy.interpolate import UnivariateSpline
from scipy.interpolate import LinearNDInterpolator

ROOT.gStyle.SetOptStat(0)
ROOT.gStyle.SetOptTitle(0)

_basedir='/eos/cms/store/cmst3/group/hgcal/CMG_studies/psilva/DigiTester/2023Jun19/'


_outdir='/eos/user/p/psilva/www/HGCal/TimingStudies/2023Oct21'
os.system(f'mkdir -p {_outdir}')

pdgid_dict={22:r'$\gamma$',130:r'$K_L^0$',211:r'$\pi^\pm$'}
isStartUp=False
agetag='startup' if isStartUp else '3iab'
agetitle='Startup' if isStartUp else r'EOL (3 ab$^{-1}$)'

In [None]:
%%time

def getDataFrameFrom(url=f'{_basedir}/StartUp/jets_ak4_vbfhgg.root',R=0.4):
    
    rdf=ROOT.RDataFrame('Events',url)
    rdf=rdf.Define('GoodJet','NoPuJet_pt>20 && abs(GenJet_eta)>1.6 && abs(GenJet_eta)<2.8') \
           .Define('nGoodJet','Sum(GoodJet)') \
           .Filter('nGoodJet>0')
    
    columns=['nPU','toaThr','sk_thr','nGoodJet']
    for tag in ['NoPu','Pu','','NeutralTimeTag','FullTimeTag']:
        rdf=rdf.Define(f'{tag}Jet_R',f'{tag}Jet_en/NoPuJet_en') \
               .Define(f'{tag}Jet_deta',f'{tag}Jet_eta-NoPuJet_eta') \
               .Define(f'{tag}Jet_pz',f'{tag}Jet_pt*sinh({tag}Jet_eta)') \
               .Define(f'{tag}Jet_mass',f'sqrt({tag}Jet_en*{tag}Jet_en-{tag}Jet_pt*{tag}Jet_pt-{tag}Jet_pz*{tag}Jet_pz)') \
               .Define(f'{tag}mjj',f'InvariantMass({tag}Jet_pt,{tag}Jet_eta,{tag}Jet_phi,{tag}Jet_mass)') \
               .Define(f'{tag}detajj',f'nGoodJet>1 ? {tag}Jet_eta[1]-{tag}Jet_eta[0] : -9999') \
               .Define(f'{tag}dphijj',f'nGoodJet>1 ? TVector2::Phi_mpi_pi({tag}Jet_phi[1]-{tag}Jet_phi[0]) : -9999')        
        columns += [f'{tag}mjj',f'{tag}detajj',f'{tag}dphijj']
        
        for i in range(0,2):
            rdf=rdf.Define(f'{tag}Jet{i+1}_pt',f'nGoodJet>{i} ? {tag}Jet_pt[{i}] : -9999') \
                   .Define(f'{tag}Jet{i+1}_en',f'nGoodJet>{i} ? {tag}Jet_en[{i}] : -9999') \
                   .Define(f'{tag}Jet{i+1}_eta',f'nGoodJet>{i} ? {tag}Jet_eta[{i}] : -9999') \
                   .Define(f'{tag}Jet{i+1}_R',f'nGoodJet>{i} ? {tag}Jet_R[{i}] : -9999') \
                   .Define(f'{tag}Jet{i+1}_deta',f'nGoodJet>{i} ? {tag}Jet_deta[{i}] : -9999')
            columns+=[f'{tag}Jet{i+1}_{x}' for x in ['en','pt','eta','R','deta']]
        
    df=pd.DataFrame(rdf.AsNumpy(columns=columns))
        
    return df 


jdf=[]
for q,aging in itertools.product([12,24,50],['StartUp','Aged']):
    jdf.append( getDataFrameFrom(f'{_basedir}/{aging}/jets_ak4_vbfhgg_{q}fC.root',0.4) )
    jdf[-1]['isAged']=True if aging=='Aged' else False
jdf=pd.concat(jdf)
jdf.to_hdf(f'{_outdir}/jetsummary.h5',key='Events',mode='w')
jdf.head()

In [None]:
jet_dict={'PuJet':'Raw',
          'Jet':'SK',
          'NeutralTimeTagJet':'SK+Neut. time tag',
          'FullTimeTagJet':'SK+Time tag',
          'NoPuJet':'PU=0'}

In [None]:
def displayReponse(df,toaThr=12,isAged=False,
                   proc=r'VBF $H\rightarrow\gamma\gamma$',
                   outname=None):

    #energy,response,deltaEta
    pt_bins=np.linspace(0,200,25)
    r_bins=np.linspace(0.,4,50)
    eta_bins=np.linspace(1.6,2.8,20)
    deta_bins=np.linspace(0,0.2,60)
        
    fig,ax=plt.subplots(2,2,figsize=(16,16))
    
    for tag,tagTitle in jet_dict.items():
        
        mask=(df['toaThr']==toaThr) & (df['isAged']==isAged) & (df['NoPuJet1_pt']>20)
        pt=df[mask][f'{tag}1_pt'].values
        eta=np.abs(df[mask][f'{tag}1_eta'].values)
        r=df[mask][f'{tag}1_R'].values
        deta=df[mask][f'{tag}1_deta'].values
        
        mask2=(df['toaThr']==toaThr) & (df['isAged']==isAged) & (df['NoPuJet2_pt']>20) 
        pt2=df[mask2][f'{tag}2_pt'].values
        eta2=np.abs(df[mask2][f'{tag}2_eta'].values)
        r2=df[mask2][f'{tag}2_R'].values
        deta2=df[mask2][f'{tag}2_deta'].values
        
        pt=np.concatenate((pt,pt2))
        eta=np.concatenate((eta,eta2))
        r=np.concatenate((r,r2))
        deta=np.concatenate((deta,deta2))
        
    
        hist_fmt={'histtype':'step','lw':3}
        if tag=='NoPuJet': hist_fmt['ec']='k'
        ax[0][0].hist(pt,label=tagTitle,**hist_fmt,bins=pt_bins)
        ax[1][0].hist(eta,**hist_fmt,bins=eta_bins)
        if tag=='NoPuJet' : continue
        ax[0][1].hist(r,**hist_fmt,bins=r_bins)
        ax[1][1].hist(deta,**hist_fmt,bins=deta_bins)
        

    ax[0][0].legend(fontsize=18,ncol=2)
    ax[0][0].set_ylim(0,800)
    ax[0][1].text(1,1,proc,ha='right',va='bottom',transform=ax[0][1].transAxes)
    ax[0][0].set_xlabel('Transverse momentum [GeV]')
    ax[0][1].set_xlabel(r'Response = $E / E_{ref}$')
    ax[1][0].set_xlabel(r'$|\eta|$')
    ax[1][1].set_xlabel(r'$\Delta\eta = \eta - \eta_{ref}$')
    for i,j in itertools.product([0,1],[0,1]):
        ax[i][j].set_ylabel('Jets')
        ax[i][j].grid()

    hep.cms.label(loc=0,rlabel=r'3 ab$^{-1}$' if isAged else 'Startup',ax=ax[0][0])
    fig.tight_layout()
    if outname is None:
        plt.show()
    else:
        plt.savefig(f'{_outdir}/{outname}.png')
        plt.close()
    
displayReponse(jdf,outname='response_inclusive_startup',isAged=False)
displayReponse(jdf,outname='response_inclusive_aged',isAged=True)

In [None]:
def show2Dresponse(df,qtoa=12,isAged=False,outname=None):

    rlabel=f'{qtoa}fC '
    rlabel+= r'3 ab$^{-1}$' if isAged else 'Startup'
    mask=(df['toaThr']==qtoa) & (df['isAged']==isAged)
    
    for tag,tagTitle in jet_dict.items():
        
        if tag=='NoPuJet' : continue

        bins=(np.linspace(0,300,30),np.linspace(0,2,50))
        etabins=(np.linspace(1.5,3.0,30),np.linspace(0,2,50))
        if tag=='PuJet':
            bins=(np.linspace(0,300,30),np.linspace(0,4,50))
            etabins=(np.linspace(1.5,3.0,30),np.linspace(0,4,50))
            
        mask1=mask & (df['NoPuJet1_pt']>20)
        pt1=df[mask1][f'NoPuJet1_pt'].values
        eta1=np.abs(df[mask1][f'NoPuJet1_eta'].values)
        r1=df[mask1][f'{tag}1_R'].values

        mask2=mask & (df['NoPuJet2_pt']>20)
        pt2=df[mask2][f'NoPuJet2_pt'].values
        eta2=np.abs(df[mask2][f'NoPuJet2_eta'].values)
        r2=df[mask2][f'{tag}2_R'].values
        
        pt=np.concatenate((pt1,pt2))
        eta=np.abs(np.concatenate((eta1,eta2)))
        r=np.concatenate((r1,r2))
        
        fig, ax = plt.subplots(figsize=(8,8))
        H, xedges, yedges = np.histogram2d(pt,r,bins=bins) 
        hep.hist2dplot(H, xedges, yedges, labels=False,cmin=1)
        ax.set_xlabel('Transverse momentum [GeV]')
        ax.set_ylabel('E(PU=140) / E(PU=0)')
        hep.cms.label(loc=0,rlabel=rlabel)
        ax.text(0.9,0.9,tagTitle,ha='right',transform=ax.transAxes)
        ax.grid()
        fig.tight_layout()
        if outname is None:
            plt.show()
        else:
            plt.savefig(f'{_outdir}/{outname}_{tag}_vspt.png')
            plt.close()
    
        fig, ax = plt.subplots(figsize=(8,8))
        H, xedges, yedges = np.histogram2d(eta,r,bins=etabins) 
        hep.hist2dplot(H, xedges, yedges, labels=False,cmin=1)
        ax.set_xlabel('Pseudo-rapidity')
        ax.set_ylabel('E(PU=140) / E(PU=0)')
        hep.cms.label(loc=0,rlabel=rlabel)
        ax.text(0.9,0.9,tagTitle,ha='right',transform=ax.transAxes)
        ax.grid()
        fig.tight_layout()
        if outname is None:
            plt.show()
        else:
            plt.savefig(f'{_outdir}/{outname}_{tag}_vsabseta.png')
            plt.close()


show2Dresponse(jdf,qtoa=12,isAged=False,outname=f'response2d_12fC_startup')
show2Dresponse(jdf,qtoa=12,isAged=True,outname=f'response2d_12fC_aged')

In [None]:
from scipy.stats import crystalball

def response_model(x,a, mu,sigma, beta,m):
    t = (x - mu)/sigma
    return a*crystalball.pdf(t, beta, m)

"""
def gaussian(x, a, rinv, sigma):
    t = (x - rinv)/sigma
    return (a/(sigma*np.sqrt(2*np.pi))) * np.exp( - 0.5*(t**2) )
"""

def fitResponseWithModel(r):

    #build the histogram 
    r=r[~np.isnan(r)]
    r=r[r>0.2]
    
    q=np.percentile(r,q=[5,16,50,84,99])
    med=q[2]
    wid=0.5*(q[3]-q[1])
    #bins=np.linspace(q[0],q[-1],20)
    bins=np.linspace(max(med*0.5,q[0]),min(med*2,q[-1]),20)

    h, e = np.histogram(r, bins=bins)
    c = e[:-1] + np.diff(e) / 2
    #fit a gaussian to the bin centers
    #(normalization, mean, sigma, beta, m)
    #beta is the point in which the power law <-> gaussian transition occurs
    #m is the power of the power law 1/(B-x)**m
    bounds=((0,   max(med*0.8,q[1]), wid*0.8, q[1],  1),
            (1e6, min(med*1.2,q[3]), wid*1.2, q[-1], 10))
    try:
        popt,pcov = curve_fit(response_model, c[1:-1], h[1:-1],bounds=bounds,method='trf')
        isgood = True
    except:
        popt, pcov, isgood = None, None, False
        
    fit_report={'popt':popt,
                'pcov':pcov,
                'hist':h,
                'bin_edges':e, 
                'median':med,
                'sigmaeff':wid,
                'isgood':isgood}
    return fit_report


def calibrateEnergyResponse(df, toaThr,isAged, algo,
                            profq=np.linspace(0,95,15), doPlots=True, 
                            title='',outname=None):

    mask=(df['toaThr']==toaThr) & (df['isAged']==isAged)

    #profiling variable is the reconstructed energy
    mask1 = mask & (df[f'{algo}1_pt']>20)
    x1=df[mask1][f'{algo}1_en'].values
    x1ref=df[mask1][f'NoPuJet1_en'].values
    pt1ref=df[mask1][f'NoPuJet1_pt'].values
    r1=df[mask1][f'{algo}1_R'].values
    mask2 = mask & (df[f'{algo}2_pt']>20)
    x2=df[mask2][f'{algo}2_en'].values
    x2ref=df[mask2][f'NoPuJet2_en'].values
    pt2ref=df[mask2][f'NoPuJet2_pt'].values
    r2=df[mask2][f'{algo}2_R'].values
    x=np.concatenate((x1,x2))
    xref=np.concatenate((x1ref,x2ref))
    ptref=np.concatenate((pt1ref,pt2ref))
    r=np.concatenate((r1,r2))
    bins=np.percentile(x,q=profq)
    
    #correct the jet response as function of the reconstructed energy
    if doPlots:
        ncols=int(len(profq)/2)
        fig,ax=plt.subplots(2,ncols,figsize=(30,10),sharex=True,sharey=True)
        
    fit_results=[]
    
    #divide equally the data to calibrate
    ymax=1
    for i in range(len(bins)-1):
        
        mask_prof=(x>=bins[i]) & (x<bins[i+1])
        
        #fit response
        fit_report = fitResponseWithModel(r=r[mask_prof])
        ymax=max(ymax,fit_report['hist'].max())
        goodFit = fit_report['isgood']
        Ravg = fit_report['median']
        Ravg_unc = fit_report['sigmaeff']/np.sqrt(mask.sum())
        fit_results.append([i,
                            toaThr,isAged,
                            ptref[mask_prof].mean(),ptref[mask_prof].std(),
                            xref[mask_prof].mean(),xref[mask_prof].std(),
                            x[mask_prof].mean(),x[mask_prof].std(),
                            1/Ravg, Ravg_unc/(Ravg**2), 0, 0])
        
        #plot histogram
        if not doPlots: continue
        irow=int(i/ncols)
        icol=i%int(ncols)
        iax=ax[irow][icol]
        bin_edges=fit_report['bin_edges']
        hep.histplot(fit_report['hist'], bin_edges, lw=1, color='gray', histtype='fill', ax=iax)
        if goodFit:
            iax.plot(bin_edges,response_model(bin_edges,*fit_report['popt']),color='blue',lw=1,ls='--')
        iax.text(0.1,0.9,rf'${bins[i]:3.0f}<E<{bins[i+1]:3.0f}$',ha='left',transform=iax.transAxes,fontsize=18)
        iax.text(0.1,0.8,f'1/R={1./Ravg:3.3f}',ha='left',transform=iax.transAxes,fontsize=18)       
        iax.set_xlabel(fr'$R=E_{{{title}}}/E_{{PU=0}}$')


    #derive simple jet energy scale corrections
    jes=pd.DataFrame( fit_results,
                      columns=['i','toaThr','isAged',
                               f'NoPuJet_pt_mean',f'NoPuJet_pt_std',
                               f'NoPuJet_en_mean',f'NoPuJet_en_std',
                               'Jet_en_mean','Jet_en_std',
                               'Rinv','Rinv_unc','sigma','sigma_unc'] ) 
    spl = UnivariateSpline(x=jes[f'Jet_en_mean'], y=jes['Rinv'], w=jes['Rinv_unc'], s=2, ext=3)
    
    #re-fit calibrated estimator to get final resolution
    jes['sigma']=0.
    jes['sigma_unc']=0. 
    xcalib=x*spl(x)
    rcalib=xcalib/xref
    for i in range(len(bins)-1):
        
        mask_jes = (jes['i']==i)
        if mask_jes.sum()!=1 : continue
        
        mask_prof=(x>=bins[i]) & (x<bins[i+1])
        fit_report = fitResponseWithModel(rcalib[mask_prof])  
        ymax=max(ymax,fit_report['hist'].max())
        goodFit = fit_report['isgood']
        sigmaR = fit_report['sigmaeff']
        sigmaR_unc =  np.sqrt((fit_report['sigmaeff']**2)/(2*mask.sum()))
        jes.loc[mask_jes,'sigma'] = sigmaR
        jes.loc[mask_jes,'sigma_unc'] = sigmaR_unc
            
        if not doPlots: continue
            
        #show calibrated response
        irow=int(i/ncols)
        icol=i%int(ncols)
        iax=ax[irow][icol]
        bin_edges=fit_report['bin_edges']
        hep.histplot(fit_report['hist'], bin_edges, lw=2, color='black', histtype='step', ax=iax)
        if goodFit:
            iax.plot(bin_edges,response_model(bin_edges,*fit_report['popt']),color='red',lw=2,ls='-')
        iax.text(0.1,0.7,rf'$\sigma={sigmaR:3.3f}$',ha='left',transform=iax.transAxes,fontsize=18)
            
    if doPlots:
            
        #ensure same scale for all abcissa
        xmin=min([ax[i%2][int(i/2)].get_xlim()[0] for i in range(len(bins)-1)])
        xmax=max([ax[i%2][int(i/2)].get_xlim()[1] for i in range(len(bins)-1)])
        for i in range(len(bins)-1):
            ax[i%2][int(i/2)].set_xlim(xmin,xmax)
            ax[i%2][int(i/2)].grid()
            ax[i%2][int(i/2)].set_ylim(0,ymax*1.5)
        hep.cms.label(loc=0,rlabel='',ax=ax[0][0])
        ax[0][1].text(0,1.1,title,transform=ax[0][1].transAxes)
        ax[0][2].text(0,1.1,f'q_{{TOA}}>{toaThr} fC',transform=ax[0][2].transAxes)
        ageTitle=r'3 ab$^{-1}$' if isAged else 'Startup'
        ax[0][-1].text(0,1.1,ageTitle,transform=ax[0][-1].transAxes)
        
        plt.subplots_adjust(wspace=0, hspace=0)
        fig.tight_layout()
        if outname:
            plt.savefig(f'{_outdir}/{outname}.png')
            plt.close()
        else:
            plt.show()
    
    #return JES summary
    jes=jes.drop(columns=['i'])
    return jes

#loop over parameters of interest
jec=[]
for toaThr,isAged,algo in itertools.product([12,24,50],[False],jet_dict.keys()):
    
    if algo=='NoPuJet' : continue
    print(f'Running naive JEC for {toaThr}fC aged={isAged} algo={algo}')
    
    outname=f'jec_{toaThr}_{isAged}_{algo}'
    jec.append( 
        calibrateEnergyResponse(jdf,toaThr,isAged,algo,doPlots=True,title=jet_dict[algo],outname=outname) 
    )
    jec[-1]['toaThr']=toaThr
    jec[-1]['isAged']=isAged
    jec[-1]['algo']=algo

#join
jec=pd.concat(jec)
fname=f'{_outdir}/jec_summary.h5'
jec.to_hdf(fname,key='jec')
print(f'\t...output in {fname}')

In [None]:
jec.head()

In [None]:
#plot response
def plotFinal(jec,jec_keys, labels, outname=None,ylim=None,var='r'):

    if var=='r':
        fig, (axup,ax) = plt.subplots(2, 1, sharex=True,figsize=(8,8),gridspec_kw={'height_ratios': [1, 1.5]})
    else:
        fig,ax=plt.subplots(figsize=(8,8)) 
    
    markers=['o','^','v','D','P']
    for i,k in enumerate(jec_keys):
        
        algo,toaThr,isAged=k
        mask=(jec['algo']==algo) & (jec['toaThr']==toaThr) & (jec['isAged']==isAged)
        if mask.sum()==0: 
            print(f'Something wrong with {k}')
            continue
            
        ebar_style={'marker':markers[i],'ms':8,'elinewidth':1,'capsize':1,'ls':'none'}

        x=jec[mask][f'NoPuJet_pt_mean']
        if var=='r':
            y=1./jec[mask]['Rinv']
            yerr=y*(jec[mask]['Rinv_unc']/jec[mask]['Rinv'])
            axup.set_ylabel('R = $E(PU=140)~/~E_{ref}$',labelpad=15)
            axup.errorbar(x,y,yerr=yerr,**ebar_style,label=labels[i])
        else:
            y=jec[mask]['sigma']
            yerr=jec[mask]['sigma_unc']
            ax.set_ylabel(r'Gaussian $\sigma_E/E$')
        ax.errorbar(x,y,yerr=yerr,**ebar_style,label=labels[i])
        
    if var=='r':
        ax.set_xlabel(r'<CaloJet $p_{T}$> (PU=0) [GeV]')
        ax.grid()
        axup.grid()
        hep.cms.label(loc=0,label='Preliminary', rlabel='Phase-2 HGCal',ax=axup, fontsize=18)
        #ax.set_xscale('log')
        #axup.set_xscale('log')

        axup.set_ylim(1.5, 6.) 
        ax.set_ylim(0.8, 1.1)  

        # hide the spines between ax and axup
        axup.spines['bottom'].set_visible(False)
        ax.spines['top'].set_visible(False)
        axup.xaxis.tick_top()
        axup.tick_params(labeltop=False)  # don't put tick labels at the top
        ax.xaxis.tick_bottom()
        axup.legend(loc='upper right',ncol=2,fontsize=18)
        ax.plot(ax.get_xlim(),[1,1],ls='-',color='gray',lw=2)

        d = .01  # how big to make the diagonal lines in axes coordinates
        # arguments to pass to plot, just so we don't keep repeating them
        kwargs = dict(transform=axup.transAxes, color='k', clip_on=False)
        axup.plot((-d, +d), (-d, +d), **kwargs)        # top-left diagonal
        axup.plot((1 - d, 1 + d), (-d, +d), **kwargs)  # top-right diagonal

        d=.01
        kwargs.update(transform=ax.transAxes)  # switch to the bottom axes
        ax.plot((-d, +d), (1 - d, 1 + d), **kwargs)  # bottom-left diagonal
        ax.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)  # bottom-right diagonal
        
    else:
        ax.legend(ncol=2,loc='upper left')
        if not ylim is None:
            ax.set_ylim(*ylim)
        
    fig.tight_layout()
    if outname:
        for ext in ['png','pdf']:
            print(f'{_outdir}/{outname}.{ext}')
            plt.savefig(f'{_outdir}/{outname}.{ext}')
        plt.close()
    else:
        plt.show()
    

jec_keys=[('PuJet',12,False),('Jet',12,False),('NeutralTimeTagJet',12,False),('FullTimeTagJet',12,False)]
labels=[jet_dict[x[0]] for x in jec_keys]
#plotFinal(jec,jec_keys,labels,outname=None,ylim=(0,5),var='r')
plotFinal(jec,jec_keys,labels,outname='rinv_profile_algos',ylim=(0,3),var='r')
plotFinal(jec,jec_keys,labels,outname='sigma_profile_algos',ylim=(0,0.5),var='sigma')

jec_keys=[('FullTimeTagJet',12,False),('FullTimeTagJet',24,False),('FullTimeTagJet',50,False)]
labels=['12 fC','24 fC','50 fC']
plotFinal(jec,jec_keys,labels,outname='rinv_profile_qtoa',ylim=(0.8,1.1),var='r')
plotFinal(jec,jec_keys,labels,outname='sigma_profile_qtoa',ylim=(0,0.2),var='sigma')

In [None]:
#profile jet energy resolution as function of the generator jet pT
jer={}
refjet='GenJet'
for r,df in df_dict.items():
    
    print(f'Running naive JER for {r}')
    
    jer[r]=[]
    mask_list=[(0,r[1],1)]
    if r in [(0.2,12,False),(0.4,12,False)]:
        mask_list.append( (0,-1,-1) )
    
    for imask_def,df_data in df.groupby(['puMode','toaThr','toaWgtCat']):
        if not imask_def in mask_list: continue
        puMode,toaThr,toaWgtCat = imask_def
        ptbins=np.percentile(df_data[f'{refjet}_en'].values,q=np.linspace(0,100,11))
        for i in range(len(ptbins)-1):
            mask=(df_data[f'{refjet}_en']>=ptbins[i]) & (df_data[f'{refjet}_en']<ptbins[i+1])
            x=df_data[mask]['CalibPuJet_en']
            x0=df_data[mask][f'{refjet}_en']
            fit_report = fitResponseWithModel(x=x,x0=x0) 
            sigmaR = fit_report['sigmaeff']
            sigmaR_unc =  np.sqrt((fit_report['sigmaeff']**2)/(2*mask.sum()))
            jer[r].append( [
                puMode,toaThr,toaWgtCat,
                df_data[mask][f'{refjet}_en'].mean(),
                sigmaR,
                sigmaR_unc] )
    jer[r] = pd.DataFrame(jer[r],columns=['puMode','toaThr','toaWgtCat',f'{refjet}_en_mean','sigma','sigma_unc'])

In [None]:
#plot resolution
def plotFinalResol(jer_keys, raw_mode, labels, refjet='GenJet', outname=None):

    fig,ax=plt.subplots(figsize=(8,8)) 
    
    for i,k in enumerate(jer_keys):
        ijer=jer[k]
        ebar_style={'marker':'o','elinewidth':1,'capsize':1,'ls':'none'}
        if raw_mode[i]:
            mask=(ijer['puMode']==0) & (ijer['toaThr']==-1) & (ijer['toaWgtCat']==-1) 
            ebar_style['color']='k'
        else:
            mask=(ijer['puMode']==0) & (ijer['toaThr']==k[1]) & (ijer['toaWgtCat']==1) 
        ax.errorbar(ijer[mask][f'{refjet}_en_mean'],
                    ijer[mask]['sigma'], 
                    yerr=ijer[mask]['sigma_unc'], 
                    **ebar_style, 
                    label=labels[i])

    plt.ylim(0.,0.4)
    plt.ylabel(r'Eff. width = $1/2\cdot(q_{86}-q_{16})$')
    plt.xlabel(r'GenJet energy [GeV]')
    plt.legend(ncol=2,loc='upper left')
    plt.grid()
    hep.cms.label(loc=0,rlabel='(14 TeV)')
    #ax.set_xscale('log')
    ax.set_xlim(100,700)
    fig.tight_layout()
    if outname:
        plt.savefig(f'{_outdir}/{outname}.png')
        plt.close()
    else:
        plt.show()
    
jer_keys=[(0.2,12,False),(0.2,12,False),(0.2,24,False),(0.2,50,False)]
raw_mode=[True,False,False,False]
labels=['Raw (R=0.2)',r'$w_{12}$',r'$w_{24}$',r'$w_{50}$']
plotFinalResol(jer_keys,raw_mode,labels,outname='jer_profile_02')

jer_keys=[(0.4,12,False),(0.4,12,False),(0.4,24,False),(0.4,50,False)]
raw_mode=[True,False,False,False]
labels=['Raw (R=0.4)',r'$w_{12}$',r'$w_{24}$',r'$w_{50}$']
plotFinalResol(jer_keys,raw_mode,labels,outname='jer_profile_04')