# bias studies with full systematics

To assess the impact of various sources of systematic, we will rely on an Asimov dataset.

In [1]:
## imports and configuration
%cd '/home/naodell/work/wbr/analysis'
#%load_ext autoreload

from functools import partial
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
from scipy.optimize import minimize
from scipy.stats import poisson, norm, chi2
from tqdm import tqdm_notebook

import scripts.plot_tools as pt
import scripts.fit_helpers as fh
from nllfit.nllfitter import ScanParameters

np.set_printoptions(precision=4)
rc_params = {
             'figure.figsize': (10, 10),
             'axes.labelsize': 20,
             'axes.facecolor': 'white',
             'axes.titlesize':'x-large',
             'legend.fontsize': 20,
             'xtick.labelsize':20,
             'ytick.labelsize':20,
             'font.size':18,
             'font.sans-serif':['Arial', 'sans-serif'],
             'mathtext.sf':'Arial',
             'lines.markersize':8.,
             'lines.linewidth':2.5,
            }
matplotlib.rcParams.update(rc_params)

%connect_info

/home/naodell/work/wbr/analysis
{
  "shell_port": 45759,
  "iopub_port": 43783,
  "stdin_port": 35009,
  "control_port": 44465,
  "hb_port": 46823,
  "ip": "127.0.0.1",
  "key": "94b4cf33-d67d0aa6b745c60dd62dadb1",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-f3afdb77-1936-4582-b005-c0b4c99f41ac.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


In [2]:
# configure, get the input data, and do any additional processing that is needed
input_dir  = f'local_data/templates/test_new/'
processes = ['ttbar', 't', 'ww', 'wjets', 'zjets_alt', 'diboson', 'fakes'] 
selections = [
              'ee', 'emu', 'mumu',  
              'mutau', 'etau', 
              'mu4j', 'e4j'
             ]

# initialize fit data
fit_data = fh.FitData(input_dir, selections, processes, process_cut=0.1)

In [None]:
# generate toy data and fit w/ alt and null hypotheses
params = fit_data._parameters
params_pre = fit_data.get_params_init().values
nparams = params.shape[0] 

# test systematic uncertainties
pval, perr = params['val_init'].values, params['err_init'].values
#prand = pval[:4] + np.random.randn(4, ntoys).T*perr[:4] 

# fit toy data
ntoys = 100
nll_null, nll_alt = [], []
params_null, params_alt = [], []
for itoy in tqdm_notebook(range(ntoys)):
    
    # generate toy data
    sample = dict()
    for category in fit_data._model_data.keys():
        model_val, model_var = fit_data.mixture_model(params_pre, category)
        sample[category] = poisson.rvs(model_val), model_var #+ np.sqrt(model_var)*np.random.randn(model_var.size)
     
    # null hypothesis: lepton universality
    pinit = np.concatenate([[params_pre[0]], params_pre[4:]])
    objective_null = partial(fh.objective_lu, objective = fit_data.objective)
    result_null = minimize(objective_null, pinit,
                           method = 'SLSQP',
                           options = dict(maxiter=500, ftol=1e-6, disp=False),
                           args = (sample)
                          )
    
    # alt hypothesis: independent bf
    pinit = params_pre
    result_alt = minimize(fit_data.objective, pinit,
                          method = 'SLSQP',
                          options = dict(maxiter=500, ftol=1e-6, disp=False),
                          args = (sample)
                         )
    
    print(fit_data.objective(params_pre, sample), result_null.fun, result_alt.fun, result_null.x[0], result_alt.x[:4])
    if result_alt.success and result_null.success:
        nll_null.append(result_null.fun)
        params_null.append(result_null.x)
        nll_alt.append(result_alt.fun)
        params_alt.append(result_alt.x)
    else:
        continue
        
nll_null = np.array(nll_null)
nll_alt = np.array(nll_alt)
params_null = np.array(params_null)
params_alt = np.array(params_alt)

HBox(children=(IntProgress(value=0), HTML(value='')))

205.77467961689655 191.33687022539215 189.98604930419074 0.10781761480184984 [0.107  0.108  0.1083 0.6767]
183.52666630575416 173.77743076591935 172.04847027970513 0.1079376678398209 [0.107  0.1084 0.1069 0.6777]
200.8811099336991 188.16952681067642 187.11497197441207 0.10816705270010209 [0.1078 0.1081 0.1096 0.6745]
214.21979582403605 197.35369378218013 196.78358915403834 0.10815085282409044 [0.1077 0.1082 0.1088 0.6753]
192.1878522972769 176.17239537816397 175.1954401753372 0.10871252113840822 [0.1086 0.1085 0.1103 0.6726]
162.80938307646065 144.64374237576556 144.5211350045541 0.10866307810043192 [0.1084 0.1088 0.1083 0.6745]
204.7692473451037 190.68159182222072 190.3277900257286 0.10771972606148505 [0.1074 0.1078 0.1083 0.6765]
191.0358618648506 172.22085427959425 171.62442059822763 0.10877955164610252 [0.1084 0.1088 0.1096 0.6732]
186.94299387313413 171.29508992957037 187.75127739455962 0.10765529740543568 [0.108 0.108 0.108 0.676]
174.52433095047806 157.19844129317326 156.9132999

204.82432593226903 185.40665799733145 182.92140275855672 0.10723995930880849 [0.1072 0.1076 0.1049 0.6804]
200.94199765640943 186.34206360682538 186.2990386549659 0.1083305637132189 [0.1084 0.1083 0.1087 0.6747]
195.8651885032593 180.70884674190472 180.31377028911544 0.10768884454922603 [0.1082 0.1075 0.1077 0.6766]


In [None]:
# make some plots

fig, ax = plt.subplots(1, 1, figsize=(10, 10), facecolor='white')

x = np.linspace(0, 10, 1000)
ax.hist(2*(nll_null - nll_alt), bins=x[::50], density=True)
ax.plot(x, chi2.pdf(x, 2), 'r--')

plt.tight_layout()
plt.show()