## Estimating individual systematic contributions

The effect of any individual systematic uncertainty is somewhat complicated by it's mutual covariance with the POI (i.e., the W branching fractions) and any other systematic uncertainty.  To get a rough idea of the percent-wise contribution to the total uncertainty from each individual systematic, I use the following scheme:

   * the fit is carried out as in the nominal case and $\sigma_{0}$ is estimated
   * the fit is carried out for for each of the $n$ nuisance parameters $\sigma_{theta}$
   * the difference between the nominal case and the $n-1$ case is calculated,
   * this quantity is normalized to $\sum_{\theta} \sigma_{\theta}^{2}$

In [1]:
## imports and configuration
%cd '/home/naodell/work/wbr/analysis'
#%load_ext autoreload

from multiprocessing import Pool
from functools import partial
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.optimize import minimize
from tqdm.notebook import tqdm

import scripts.plot_tools as pt
import scripts.fit_helpers as fh

np.set_printoptions(precision=3)
rc_params = {
             'figure.figsize': (10, 10),
             'axes.labelsize': 20,
             'axes.facecolor': 'white',
             'axes.titlesize':'x-large',
             'legend.fontsize': 20,
             'xtick.labelsize':18,
             'ytick.labelsize':18,
             'font.size':18,
             'font.sans-serif':['Arial', 'sans-serif'],
             'mathtext.sf':'Arial',
             'lines.markersize':8.,
             'lines.linewidth':2.5,
            }
matplotlib.rcParams.update(rc_params)

%connect_info

/home/naodell/work/wbr/analysis
{
  "shell_port": 47665,
  "iopub_port": 45897,
  "stdin_port": 34439,
  "control_port": 41775,
  "hb_port": 59995,
  "ip": "127.0.0.1",
  "key": "b1af99e8-22621fb4710c87176d42c32e",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-cd56101a-37e8-4a42-a9bd-a2dc100f0478.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


In [2]:
# configure, get the input data, and do any additional processing that is needed
# initialize fit data
scenario = 'fake_factorization_tight'
infile = open(f'local_data/fit_data_{scenario}.pkl', 'rb')
fit_data = pickle.load(infile)
infile.close()

# get fit parameters
parameters = fit_data._parameters.copy() 

# fit configuration #
# minimizer options
#steps = 4*[1e-4, ] 
#steps += list(0.05*fit_data._perr_init[4: fit_data._nnorm + 7])
#steps += list(0.05*fit_data._perr_init[fit_data._npoi + fit_data._nnorm:])
#steps = np.array(steps)

min_options = dict(
                   #finite_diff_rel_step=steps,
                   #verbose=3,
                   #eps=1e-9, 
                   gtol = 1e-3,
                   disp = True
                  )

# configure the objective
sample = None
fobj = partial(fit_data.objective,
               data = sample,
               do_bb_lite = True,
               lu_test = 2
              )

fobj_jac = partial(fit_data.objective_jacobian,
                   data = sample,
                   do_bb_lite = True,
                   lu_test = 2
                  )

# test fit, should be the same as parameters.val_fit
res_mle = minimize(fobj, parameters.val_init.values[fit_data._pmask],
                   jac     = fobj_jac,
                   #hess    = 'cs',
                   #method  = 'trust-constr', 
                   method  = 'BFGS', 
                   options = min_options,
                  )
print(res_mle.x[:4]*100, parameters.val_fit[:4].values*100)

         Current function value: 258.679862
         Iterations: 74
         Function evaluations: 137
         Gradient evaluations: 125
[10.832 10.935 10.768 67.464] [10.832 10.935 10.768 67.464]


In [3]:
# calculate impacts for each nuisance parameter

# initialize parameter data
#fit_data._pmask     = parameters['active'].values.astype(bool)
#fit_data._pval_init = parameters['val_fit'].values.copy()

impacts_up, impacts_down = dict(), dict()
iparam = 4
mask   = fit_data._pmask
p_init = fit_data._pval_fit
p_fit  = parameters.val_fit.values
p_err  = parameters.err_fit.values
for pname, pdata in tqdm(parameters.iloc[4:].iterrows(), total=parameters['active'].sum() - 4):
    if not pdata.active:
        iparam += 1
        continue
    
    tqdm.write(f'{iparam}, {pname}, {pdata.val_init:.3f}, {pdata.err_init:.3f}, {pdata.val_fit:.3f}, {pdata.err_fit:.3f}, {fobj(p_init[mask]):.3f}')

    # calculate impacts from up/down variations of n.p. on p.o.i.
    p_tmp = p_init.copy()
    mask[iparam] = False
    p_init[iparam] = pdata.val_fit + pdata.err_fit
    res = minimize(fobj, p_fit[mask],
                   jac     = fobj_jac,
                   #hess    = 'cs',
                   #method  = 'trust-constr', 
                   method  = 'BFGS', 
                   options = min_options,
                  )
    p_tmp[mask] = res.x
    impacts_up[pname] = p_tmp - p_fit
    tqdm.write(f'{(res.x[:4] - p_fit[:4])/p_err[:4]}, {res.fun}')
                    
    p_init[iparam] = pdata.val_fit - pdata.err_fit
    res = minimize(fobj, p_fit[mask],
                   jac     = fobj_jac,
                   #hess    = 'cs',
                   #method  = 'trust-constr', 
                   method  = 'BFGS', 
                   options = min_options,
                  )
    p_tmp[mask] = res.x
    impacts_down[pname] = p_tmp - p_fit
    tqdm.write(f'{(res.x[:4] - p_fit[:4])/p_err[:4]}, {res.fun}')
    
    mask[iparam] = True
    p_init[iparam] = pdata.val_init
    iparam += 1
    
# convert impacts to dataframes 
df_up = pd.DataFrame(impacts_up, index=parameters.index).add_suffix('_up')
df_down = pd.DataFrame(impacts_down, index=parameters.index).add_suffix('_down')

  0%|          | 0/120 [00:00<?, ?it/s]

4, br_tau_e, 0.177, 0.000, 0.177, 0.000, 948.794
         Current function value: 259.180292
         Iterations: 4
         Function evaluations: 61
         Gradient evaluations: 49
[-0.007 -0.001 -0.004  0.005], 259.18029181528436
         Current function value: 259.180195
         Iterations: 7
         Function evaluations: 61
         Gradient evaluations: 49
[-0.005  0.01  -0.01   0.006], 259.18019479227775
5, br_tau_mu, 0.173, 0.000, 0.173, 0.000, 948.794
         Current function value: 259.190198
         Iterations: 5
         Function evaluations: 57
         Gradient evaluations: 47
[-0.004 -0.    -0.006  0.006], 259.19019794917904
         Current function value: 259.186276
         Iterations: 7
         Function evaluations: 52
         Gradient evaluations: 40
[-0.005  0.006 -0.002  0.001], 259.18627587203247
6, br_tau_h, 0.650, 0.001, 0.649, 0.001, 948.794
         Current function value: 259.202908
         Iterations: 4
         Function evaluations: 66
         Gr

In [4]:
# save impacts
impacts_np = pd.concat([df_up, df_down], axis=1)
impacts_np.to_csv(f'local_data/impacts_{scenario}.csv')

pd.set_option('display.float_format', lambda x: '%.4f' % x)
pd.set_option('display.max_columns', parameters.shape[0])
pd.set_option('display.max_rows', parameters.shape[0])

impacts_np[['eff_reco_e_up', 'eff_reco_e_down']]*100

Unnamed: 0_level_0,eff_reco_e_up,eff_reco_e_down
name,Unnamed: 1_level_1,Unnamed: 2_level_1
beta_e,-0.0526,0.05
beta_mu,0.0111,-0.0102
beta_tau,0.0051,0.0093
beta_h,0.0364,-0.049
br_tau_e,0.0,0.0003
br_tau_mu,0.0002,-0.0001
br_tau_h,-0.0003,-0.0003
lumi,-0.0884,0.178
xs_gjets,0.5015,-1.1367
xs_diboson,1.3701,-0.2622


In [5]:
# convert to errors
err_no_bb = parameters['err_no_bb'].values[mask]
errs_np = np.array([np.concatenate([e[:i+4], [0], e[i+4:]]) for i, e in enumerate(errs)])
errs_np = err_no_bb**2 - errs_np**2
#errs_np[errs_np < 0] = 0
#errs_np = np.sqrt(errs_np)
##errs_np = np.vstack([errs_np, err_stat, err_mc_stat, err_syst, stderr])
#
errs_np = pd.DataFrame(errs_np[:,:4], columns=['beta_e', 'beta_mu', 'beta_tau', 'beta_h'], index=list(p_labels))
##errs_np = pd.DataFrame(errs_np[:,:4], columns=['beta_e', 'beta_mu', 'beta_tau', 'beta_h'], index=list(p_labels_fancy[4:]) + ['stat.', 'MC stat.', 'syst. total', 'total'])
##beta_stderr = stderr.iloc[:,:4].multiply(100)/params_init[:4]
100*errs_np

NameError: name 'errs' is not defined

In [None]:
# print table

#beta_errs.divide(params_init[:4]/100, axis=1).to_latex('local_data/errors.tex')
#beta_errs.divide(params_init[:4]/100, axis=1).to_csv('local_data/errors.csv')
#beta_errs.divide(params_init[:4]/100, axis=1)
beta_errs.to_latex('local_data/summary_errors.tex', escape=False)
beta_errs

In [None]:
jes_mask = np.array([True if ('jes' in pname and 'btag' not in pname) else False for pname in beta_errs.index])
btag_mask = np.array([True if 'btag' in pname else False for pname in beta_errs.index])
tau_misid_mask = np.array([True if ('misid_tau' in pname and pname not in ['misid_tau_e', 'misid_tau_h']) else False for pname in beta_errs.index])

btag_errs = beta_errs[btag_mask]
jes_errs = beta_errs[jes_mask]
tau_misid_errs = beta_errs[tau_misid_mask]

summary_errs = beta_errs[~btag_mask&~jes_mask&~tau_misid_mask].copy()
summary_errs.index = [fit_data._parameters.loc[p].label if p in fit_data._parameters.index else p for p in summary_errs.index]
summary_errs.loc['b-tag',:] = np.sqrt(np.sum(btag_errs**2))
summary_errs.loc['JES',:]  = np.sqrt(np.sum(jes_errs**2))
summary_errs.loc[r'$\sf jet\rightarrow\tau$',:]  = np.sqrt(np.sum(tau_misid_errs**2))

summary_errs = summary_errs.divide(params_init[:4]/100, axis=1)
summary_errs.to_latex('local_data/summary_errors.tex', escape=False)
summary_errs.to_csv('local_data/summary_errors.csv')
summary_errs