# File: exercise_1b.py
## Analyze the following Run I CMS $H \rightarrow 4 \ell$ data

 - Created: 18-Dec-2015 CMSDAS 2016, LPC Fermilab HBP
 - Updated: 10-Jan-2019 CMSDAS 2019, LPC Fermilab JMD
 - Updated: 03-Jan-2022 CMSDAS 2019, LPC Fermilab DRY
 - Updated: 07-Jan-2024 CMSDAS 2024, LPC Fermilab HBP

```
   N        = 25
   b_hat_zz =  6.8 +/- 0.3
   b_hat_zx =  2.6 +/- 0.4
   s_hat    = 17.3 +/- 1.3 (mH = 125 GeV)
            = 19.6 +/- 1.3 (mH = 126 GeV)
```
 which pertains to data in the range $121.5 \le m_{4\ell} \le 130.5\text{ GeV}$
 for 7 and 8 TeV data. The main backgrounds are $p p \rightarrow ZZ \rightarrow 4l$ and $p p \rightarrow Z + X \rightarrow 4l$, where $X$ is typically one or ore jets. 
 
## Introduction

See **exercise_1a**.

## Statistical Model

As before, we'll model event counts $X$ with a $\text{Poisson}(X, n)$ distribution, where the parameter $n$ is the mean count. In this version of **exercise_1**, we'll keep things simple and model the signal and background estimates with Gaussians though this is potentially problematic. Using Gaussians here is potentially problematic because signal and background estimates are constrained to be $\ge 0$, while the domain of a $\text{Gaussian}(x, \mu, \sigma)$ is $-\infty < x < +\infty$. 
What saves us is that the estimated signal and background uncertainties are small and, therefore, the supports of the Gaussians are far enough from zero to avoid problems with negative estimates. 

The statistical model in this version of the single-count analysis is given by


\begin{align}
    p(X, Y| \mu, \nu) & = \text{Poisson}(X, \mu  s + b_{ZZ} + b_{ZX}) \nonumber\\
                    & \times \text{Gaussian}(\hat{s}, s, \delta s) \nonumber\\
                    & \times \text{Gaussian}(\hat{b}_{ZZ}, b_{ZZ}, \delta b_{ZZ}) \nonumber\\
                    & \times \text{Gaussian}(\hat{b}_{ZX}, b_{ZX}, \delta b_{ZX}) ,
\end{align}

In [1]:
import os, sys
import ROOT

Welcome to JupyROOT 6.28/06


In [2]:
def createWorkspace(wsname, wsfilename):
    # The most convenient way to use RooFit/RooStats is to 
    # make a workspace so that we can use its factory method
    wspace = ROOT.RooWorkspace(wsname)

    #-----------------------------------------------------
    # Create parameters
    #
    # Use the factory method of the RooWorkspace to create
    # parameters
    #
    # syntax:
    #        <name>[value, min-value, max-value]
    #-----------------------------------------------------
    # observations
    params = [('N',       25,     0,  50),
    # ZZ background estimate        
              ('b_hat_zz', 6.8,   0,  15),
              ('db_zz',    0.3,   0,   5),
    # Z+X background estimate
              ('b_hat_zx', 2.6,   0,  15),
              ('db_zx',    0.3,   0,   5),
    # signal estimate (mH=125 GeV)       
              ('s_hat',   17.3,   0,  25),
              ('ds',       1.3,   0,   5),
    # nuisance parameters
              ('b_zx',    2.6,    0,  10),
              ('b_zz',    6.8,    0,  15),
              ('s',      17.3,    0,  25),
    # parameter of interest
              ('mu',      1.0,    0,   4)]

    for t in params:
        cmd = '{}[{}, {}, {}]'.format(t[0], t[1], t[2], t[3]) # Could also do .format(*t)?
        wspace.factory(cmd)        
    wspace.var('mu').SetTitle('#mu')

    # fix all background and signal parameters
    for t in params[1:-4]:
        name = t[0]
        print('=> make {:8s} = {:5.1f} constant'.format(name, wspace.var(name).getVal()))
        wspace.var(name).setConstant()

    #-----------------------------------------------------
    # Create expression for the mean count n of the
    # Poisson distribution
    #
    # syntax:
    #        expr::<name>("expression", var1, var2, ...)
    #-----------------------------------------------------
    cmd  = 'expr::n("mu*s + b_zz + b_zx", mu, s, b_zz, b_zx)'
    wspace.factory(cmd)

    #-----------------------------------------------------
    # Create pdfs
    #
    # syntax:
    #        pdf_name::<name>(var1, var2, ...)
    #
    # where the "Roo" prefix is dropped in pdf_name, e.g.
    #-----------------------------------------------------
    pdfs = [('Poisson', 'pN',    '(N, n)'),
            # Truncated Gaussian constraints (in general, bad! See comments above.)
            ('Gaussian','pB_zz', '(b_zz, b_hat_zz, db_zz)'),
            ('Gaussian','pB_zx', '(b_zx, b_hat_zx, db_zx)'),
            ('Gaussian','pS', '(s, s_hat, ds)'), 
           ]
    
    prodpdf = ''
    for pdfargs in pdfs:
        wspace.factory('{}::{}{}'.format(pdfargs[0], pdfargs[1], pdfargs[2]))
        name = pdfargs[1]
        prodpdf += "{}, ".format(name)
    prodpdf = prodpdf[:-2] # remove last ", "
    
    # multiply the pdfs together. use upper case PROD to
    # do this
    wspace.factory('PROD::model({})'.format(prodpdf))

    # create a prior, since one is needed in Bayesian
    # calculations
    wspace.factory('Uniform::prior({mu, s, b_zz, b_zx})')

    #-----------------------------------------------------
    # Define a few useful sets. We'll treat the signal
    # and background estimates as constants rather than
    # auxiliary
    #-----------------------------------------------------
    sets = [('obs',  'N'),           # observations
            ('poi',  'mu'),          # parameter of interest
            ('nuis', 's,b_zz,b_zx')] # nuisance parameters (leave no spaces)
    for t in sets:
        name, parlist = t
        wspace.defineSet(name, parlist)
    
    #-----------------------------------------------------        
    # create a dataset
    #-----------------------------------------------------    
    data = ROOT.RooDataSet('data', 'data', wspace.set('obs'))
    data.add(wspace.set('obs'))
    
    # import dataset into workspace
    wspace.Import(data)
        
    #-----------------------------------------------------
    # Create model configuration. This is needed for the
    # statistical analyses
    #-----------------------------------------------------
    cfg = ROOT.RooStats.ModelConfig('cfg')
    cfg.SetWorkspace(wspace)
    cfg.SetPdf(wspace.pdf('model'))
    cfg.SetPriorPdf(wspace.pdf('prior'))
    cfg.SetParametersOfInterest(wspace.set('poi'))
    cfg.SetNuisanceParameters(wspace.set('nuis'))

    # import model configuration into workspace
    wspace.Import(cfg)

    wspace.Print()
    
    # write out workspace
    wspace.writeToFile(wsfilename)

In [None]:
createWorkspace('CMSDAS', 'single_count_1.2.root')

In [5]:
def analyzeWorkspace(wsname, wsfilename):

    # Open workspace file
    wsfile = ROOT.TFile.Open(wsfilename)

    # Get workspace
    wspace = wsfile.Get(wsname) 

    # Get data
    data = wspace.data('data')

    # Get model configuration    
    cfg  = wspace.obj('cfg')

    #-----------------------------------------------------    
    # Fit model to data
    #-----------------------------------------------------
    results = wspace.pdf('model').fitTo(data, ROOT.RooFit.Save())
    results.Print()
    
    #-----------------------------------------------------    
    # Compute interval based on profile likelihood
    #-----------------------------------------------------
    # suppress some (apparently) innocuous warnings
    msgservice = ROOT.RooMsgService.instance()
    msgservice.setGlobalKillBelow(ROOT.RooFit.FATAL)
        
    print('compute interval using profile likelihood')
    plc = ROOT.RooStats.ProfileLikelihoodCalculator(data, cfg)
    CL  = 0.683
    plc.SetConfidenceLevel(CL)
    plcInterval= plc.GetInterval()
    lowerLimit = plcInterval.LowerLimit(wspace.var('mu'))
    upperLimit = plcInterval.UpperLimit(wspace.var('mu'))

    print('\tPL {:4.1f}% CL interval = [{:5.2f}, {:5.2f}]'.format(100*CL, lowerLimit, upperLimit))

    plcplot = ROOT.RooStats.LikelihoodIntervalPlot(plcInterval)      
    plccanvas = ROOT.TCanvas('fig_PL_1.2', 'PL', 800, 400)
    plccanvas.Divide(2, 1)
    plccanvas.cd(1)
    plcplot.SetRange(0,4)
    plcplot.SetMaximum(3)
    plcplot.Draw()
    
    #----------------------------------------------------- 
    # compute an 95% upper limit on mu by
    # computing a 90% central interval and
    # ignoring the lower limit
    #----------------------------------------------------- 
    CL = 0.90
    plc.SetConfidenceLevel(CL)
    plcInterval = plc.GetInterval()
    upperLimit  = plcInterval.UpperLimit(wspace.var('mu'))

    CL = 0.95
    print('\tPL {:4.1f}% upper limit = {:5.2f}\n'.format(100*CL, upperLimit))
      
    plccanvas.cd(2)
    plcplot2 = ROOT.RooStats.LikelihoodIntervalPlot(plcInterval)
    plcplot2.SetRange(0,4)
    plcplot2.SetMaximum(3)
    plcplot2.Draw()
    plccanvas.Update()
    
    #-----------------------------------------------------    
    # Compute interval based on Bayesian calculator
    #-----------------------------------------------------
    print('compute interval using Bayesian calculator')
    bc = ROOT.RooStats.BayesianCalculator(data, cfg)
    CL  = 0.683
    bc.SetConfidenceLevel(CL)
    bcInterval = bc.GetInterval()
    lowerLimit = bcInterval.LowerLimit()
    upperLimit = bcInterval.UpperLimit()

    print('\tBayes {:4.1f}% CL interval = [{:5.2f}, {:5.2f}]'.format(100*CL, lowerLimit, upperLimit))

    # calculate posterior density at 50 points
    print("\t\t...be patient...!")
    bc.SetScanOfPosterior(50)
    bcplot = bc.GetPosteriorPlot()
    bccanvas = ROOT.TCanvas('fig_Bayes_1.2', 'Bayes', 800, 400)
    bccanvas.Divide(2, 1)
    bccanvas.cd(1)
    bcplot.Draw()
    bccanvas.Update()

    # compute an 95% upper limit on mu
    CL  = 0.950
    bc.SetConfidenceLevel(CL)
    # 0   = upper limit
    # 0.5 = central limits (default)
    # 1   = lower limit
    bc.SetLeftSideTailFraction(0)
    bcInterval = bc.GetInterval()
    upperLimit = bcInterval.UpperLimit()

    print('\tBayes {:4.1f}% upper limit = {:5.2f}\n'.format(100*CL, upperLimit))

    # calculate posterior density at 50 points
    bc.SetScanOfPosterior(50)
    bcplot2 = bc.GetPosteriorPlot()
    bccanvas.cd(2)
    bcplot2.Draw()
    bccanvas.Update()

    # save canvases
    plccanvas.Draw()
    bccanvas.Draw()
    plccanvas.SaveAs('.png')
    bccanvas.SaveAs('.png')
    return plccanvas, bccanvas

In [None]:
plccanvas, bccanvas = analyzeWorkspace('CMSDAS', 'single_count_1.2.root')