# File: exercise_1.py
Description: Analyze the following Run I CMS H -> 4l data:
```
   N        = 25
   b_hat_zz =  6.8 +/- 0.3
   b_hat_zx =  2.6 +/- 0.4
   s_hat    = 17.3 +/- 1.3 (mH = 125 GeV)
            = 19.6 +/- 1.3 (mH = 126 GeV)
```
 which pertains to data in the range 121.5 <= mH <= 130.5 GeV
 for 7 and 8 TeV data.
 
Make sure to follow the [twiki](https://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCMSDataAnalysisSchoolLPC2022StatisticsExercise#Exercise_1_Analyzing_a_single_co) along with this notebook!
 
 - Created: 18-Dec-2015 CMSDAS 2016, LPC Fermilab HBP
 - Updated: 10-Jan-2019 CMSDAS 2019, LPC Fermilab JMD
 - Updated: 03-Jan-2022 CMSDAS 2019, LPC Fermilab DRY

In [None]:
import os, sys, re
from time import sleep
import math
import ROOT

In [None]:
#-------------------------------------------------------------        
def createWorkspace(wsname, wsfilename):
    # The most convenient way to use RooFit/RooStats is to 
    # make a workspace so that we can use its factory method
    wspace = ROOT.RooWorkspace(wsname)

    #-----------------------------------------------------
    # Create parameters
    #
    # Use the factory method of the RooWorkspace to create
    # parameters
    #
    # syntax:
    #        <name>[value, min-value, max-value]
    #-----------------------------------------------------
    # observations
    params = [('N',       25,     0,  50),
    # ZZ background estimate        
              ('b_hat_zz', 6.8,   0,  15),
              ('db_zz',    0.3,   0,   5),
    # Z+X background estimate
              ('b_hat_zx', 2.6,   0,  15),
              ('db_zx',    0.3,   0,   5),
    # signal estimate (mH=125 GeV)       
              ('s_hat',   17.3,   0,  25),
              ('ds',       1.3,   0,   5),
    # nuisance parameters
              ('b_zx',    2.6,    0,  10),
              ('b_zz',    6.8,    0,  15),
              ('s',      17.3,    0,  25),
    # parameter of interest
              ('mu',      1.0,    0,   4)]

    for t in params:
        cmd = '{}[{}, {}, {}]'.format(t[0], t[1], t[2], t[3]) # Could also do .format(*t)?
        wspace.factory(cmd)        
    wspace.var('mu').SetTitle('#mu')

    # fix all background and signal parameters
    for t in params[1:-4]:
        name = t[0]
        print('=> make {:8s} = {:5.1f} constant'.format(name, wspace.var(name).getVal()))
        wspace.var(name).setConstant()

    #-----------------------------------------------------
    # Create expressions
    #
    # syntax:
    #        expr::<name>("expression", var1, var2, ...)
    #-----------------------------------------------------
    exprs = [#'B_zz("(b_hat_zz/db_zz)^2", b_hat_zz, db_zz)',
               #'tau_zz("b_hat_zz/db_zz^2", b_hat_zz, db_zz)',
               
               #'B_zx("(b_hat_zx/db_zx)^2", b_hat_zx, db_zx)',
               #'tau_zx("b_hat_zx/db_zx^2", b_hat_zx, db_zx)',
                              
               #'S("(s_hat/ds)^2",   s_hat, ds)',
               #'tau_s("s_hat/ds^2", s_hat, ds)',

               #'tau_zzb_zz("tau_zz*b_zz", tau_zz, b_zz)',
               #'tau_zxb_zx("tau_zx*b_zx", tau_zx, b_zx)',
               #'tau_ss("tau_s*s", tau_s, s)',
               'n("mu*s + b_zz + b_zx", mu, s, b_zz, b_zx)']
        
    for expr in exprs:
        cmd = 'expr::{}'.format(expr)
        wspace.factory(cmd)

    #print('\neffective counts and scale factors')
    #print('B_zz = {:8.2f}, tau_zz = {:8.2f}'.format(wspace.function('B_zz').getVal(),
    #                                        wspace.function('tau_zz').getVal()))

    #print('B_zx = {:8.2f}, tau_zx = {:8.2f}'.format(wspace.function('B_zx').getVal(),
    #                                        wspace.function('tau_zx').getVal()))

    #print('S    = {:8.2f}, tau_s  = {:8.2f}' % (wspace.function('S').getVal(),
    #                                        wspace.function('tau_s').getVal()))

    #-----------------------------------------------------
    # Create pdfs
    #
    # syntax:
    #        pdf_name::<name>(var1, var2, ...)
    #
    # where the "Roo" prefix is dropped in pdf_name, e.g.
    #-----------------------------------------------------
    pdfs = [('Poisson', 'pN',    '(N, n)'),
            # Truncated Gaussian constraints (in general bad!)
            ('Gaussian','pB_zz', '(b_zz, b_hat_zz, db_zz)'),
            ('Gaussian','pB_zx', '(b_zx, b_hat_zx, db_zx)'),
            ('Gaussian','pS', '(s, s_hat, ds)'), 
            # scaled Poisson constraints (allowing non-integer B_zz)
            #('Poisson', 'pB_zz', '(B_zz, tau_zzb_zz, 1)'), 
            #('Poisson', 'pB_zx', '(B_zx, tau_zxb_zx, 1)'),
            #('Poisson', 'pS',    '(S,    tau_ss,     1)'),
           ]
    
    prodpdf = ''
    for pdfargs in pdfs:
        wspace.factory('{}::{}{}'.format(pdfargs[0], pdfargs[1], pdfargs[2]))
        name = pdfargs[1]
        prodpdf += "{}, ".format(name)
    prodpdf = prodpdf[:-2] # remove last ", "
    
    # multiply the pdfs together. use upper case PROD to
    # do this
    wspace.factory('PROD::model({})'.format(prodpdf))

    # create a prior, since one is needed in Bayesian
    # calculations
    wspace.factory('Uniform::prior({mu, s, b_zz, b_zx})')

    #-----------------------------------------------------
    # Define a few useful sets. Now we need to decide
    # whether or not to include B and S in the set obs of
    # observations. 
    #-----------------------------------------------------
    sets = [('obs',  'N'),           # observations
            ('poi',  'mu'),          # parameter of interest
            ('nuis', 's,b_zz,b_zx')] # nuisance parameters (leave no spaces)
    for t in sets:
        name, parlist = t
        wspace.defineSet(name, parlist)
    
    #-----------------------------------------------------        
    # create a dataset
    #-----------------------------------------------------    
    data = ROOT.RooDataSet('data', 'data', wspace.set('obs'))
    data.add(wspace.set('obs'))
    # import dataset into workspace
    # need last argument to workaround a PyROOT "feature".
    # the last argument ensures the correct version of
    # the import method is called.
    getattr(wspace, 'import')(data, ROOT.RooCmdArg())
        
    #-----------------------------------------------------
    # Create model configuration. This is needed for the
    # statistical analyses
    #-----------------------------------------------------
    cfg = ROOT.RooStats.ModelConfig('cfg')
    cfg.SetWorkspace(wspace)
    cfg.SetPdf(wspace.pdf('model'))
    cfg.SetPriorPdf(wspace.pdf('prior'))
    cfg.SetParametersOfInterest(wspace.set('poi'))
    cfg.SetNuisanceParameters(wspace.set('nuis'))

    # import model configuration into workspace
    getattr(wspace, 'import')(cfg)

    wspace.Print()
    
    # write out workspace
    wspace.writeToFile(wsfilename)

In [None]:
def analyzeWorkspace(wsname, wsfilename):

    # Open workspace file
    wsfile = ROOT.TFile.Open(wsfilename)

    # Get workspace
    wspace = wsfile.Get(wsname) 

    # Get data
    data = wspace.data('data')

    # Get model configuration    
    cfg  = wspace.obj('cfg')

    #-----------------------------------------------------    
    # Fit model to data
    #-----------------------------------------------------
    results = wspace.pdf('model').fitTo(data, ROOT.RooFit.Save())
    results.Print()
    
    #-----------------------------------------------------    
    # Compute interval based on profile likelihood
    #-----------------------------------------------------
    # suppress some (apparently) innocuous warnings
    msgservice = ROOT.RooMsgService.instance()
    msgservice.setGlobalKillBelow(ROOT.RooFit.FATAL)
        
    print('compute interval using profile likelihood')
    plc = ROOT.RooStats.ProfileLikelihoodCalculator(data, cfg)
    CL  = 0.683
    plc.SetConfidenceLevel(CL)
    plcInterval= plc.GetInterval()
    lowerLimit = plcInterval.LowerLimit(wspace.var('mu'))
    upperLimit = plcInterval.UpperLimit(wspace.var('mu'))

    print('\tPL {:4.1f}% CL interval = [{:5.2f}, {:5.2f}]'.format(100*CL, lowerLimit, upperLimit))

    plcplot = ROOT.RooStats.LikelihoodIntervalPlot(plcInterval)      
    plccanvas = ROOT.TCanvas('fig_PL', 'PL', 500, 10, 850, 400)
    plccanvas.Divide(2, 1)
    plccanvas.cd(1)
    plcplot.SetRange(0,4)
    plcplot.SetMaximum(3)
    plcplot.Draw()
    
    
    
    
    # compute an 95% upper limit on mu by
    # computing a 90% central interval and
    # ignoring the lower limit
    CL = 0.90
    plc.SetConfidenceLevel(CL)
    plcInterval = plc.GetInterval()
    upperLimit = plcInterval.UpperLimit(wspace.var('mu'))

    CL = 0.95
    print('\tPL {:4.1f}% upper limit = {:5.2f}\n'.format(100*CL, upperLimit))
      
    plccanvas.cd(2)
    plcplot2 = ROOT.RooStats.LikelihoodIntervalPlot(plcInterval)
    plcplot2.SetRange(0,4)
    plcplot2.SetMaximum(3)
    plcplot2.Draw()
    plccanvas.Update()
    
    #-----------------------------------------------------    
    # Compute interval based on Bayesian calculator
    #-----------------------------------------------------
    print('compute interval using Bayesian calculator')
    bc = ROOT.RooStats.BayesianCalculator(data, cfg)
    CL  = 0.683
    bc.SetConfidenceLevel(CL)
    bcInterval = bc.GetInterval()
    lowerLimit = bcInterval.LowerLimit()
    upperLimit = bcInterval.UpperLimit()

    print('\tBayes {:4.1f}% CL interval = [{:5.2f}, {:5.2f}]'.format(100*CL, lowerLimit, upperLimit))

    # calculate posterior density at 50 points
    print("\t\t...be patient...!")
    bc.SetScanOfPosterior(50)
    bcplot = bc.GetPosteriorPlot()
    bccanvas = ROOT.TCanvas('fig_Bayes', 'Bayes', 500, 10, 850, 400)
    bccanvas.Divide(2, 1)
    bccanvas.cd(1)
    bcplot.Draw()
    bccanvas.Update()

    # compute an 95% upper limit on mu
    CL  = 0.950
    bc.SetConfidenceLevel(CL)
    # 0   = upper limit
    # 0.5 = central limits (default)
    # 1   = lower limit
    bc.SetLeftSideTailFraction(0)
    bcInterval = bc.GetInterval()
    upperLimit = bcInterval.UpperLimit()

    print('\tBayes {:4.1f}% upper limit = {:5.2f}\n'.format(100*CL, upperLimit))

    # calculate posterior density at 50 points
    bc.SetScanOfPosterior(50)
    bcplot2 = bc.GetPosteriorPlot()
    bccanvas.cd(2)
    bcplot2.Draw()
    bccanvas.Update()

    # save canvases
    plccanvas.Draw()
    bccanvas.Draw()
    plccanvas.SaveAs('.png')
    bccanvas.SaveAs('.png')
    return plccanvas, bccanvas

In [None]:
createWorkspace('CMSDAS', 'single_count.root')

In [None]:
plccanvas, bccanvas = analyzeWorkspace('CMSDAS', 'single_count.root')