# bias studies with full systematics

To assess the impact of various sources of systematic, we will rely on an Asimov dataset.

In [72]:
## imports and configuration
%cd '/home/naodell/work/wbr/analysis'
#%load_ext autoreload

from functools import partial
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.optimize import minimize
from tqdm import tqdm_notebook

import scripts.plot_tools as pt
import scripts.fit_helpers as fh
from nllfit.nllfitter import ScanParameters

np.set_printoptions(precision=3)
rc_params = {
             'figure.figsize': (10, 10),
             'axes.labelsize': 20,
             'axes.facecolor': 'white',
             'axes.titlesize':'x-large',
             'legend.fontsize': 20,
             'xtick.labelsize':18,
             'ytick.labelsize':18,
             'font.size':18,
             'font.sans-serif':['Arial', 'sans-serif'],
             'mathtext.sf':'Arial',
             'lines.markersize':8.,
             'lines.linewidth':2.5,
            }
matplotlib.rcParams.update(rc_params)

%connect_info

/home/naodell/work/wbr/analysis
{
  "shell_port": 34155,
  "iopub_port": 43731,
  "stdin_port": 51245,
  "control_port": 36901,
  "hb_port": 41795,
  "ip": "127.0.0.1",
  "key": "b20c3550-cabdd91c0aa753ba4d4a3801",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-11c7b718-12d4-47a8-94a1-c10200df1996.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


In [73]:
# configure, get the input data, and do any additional processing that is needed
input_dir  = f'local_data/templates/test_new/'
processes = ['ttbar', 't', 'ww', 'wjets', 'zjets_alt', 'diboson', 'fakes'] 
selections = [
              'ee', 'emu', 'mumu',  
              'mutau', 'etau', 
              'mu4j', 'e4j'
             ]
plot_labels = fh.fancy_labels

# initialize fit data
fit_data = fh.FitData(input_dir, selections, processes, process_cut=0.1)

In [None]:
# make some control plots 
params = fit_data._parameters
params_pre = fit_data.get_params_init().values

# test systematic uncertainties
ntoys = 100
pval, perr = params['val_init'].values[4:], params['err_init'].values[4:]
prand = pval + np.random.randn(pval.size, ntoys).T*perr 
prand = np.concatenate([np.outer(np.ones(ntoys), fit_data._beta_init), prand], axis=1)
toy_data = np.array([{category: fit_data.mixture_model(p, category) for category in fit_data._model_data.keys()} for p in prand]) 

# calculate covari ance matrix from the inverse of the Hessian of the NLL
results = []
for i, sample in enumerate(tqdm_notebook(toy_data)):
    result = minimize(fit_data.objective, params_pre, method = 'TNC', args = (sample))
    results.append(result.x)
    #dchi2.append(fit_data.objective(p_init, sample) - result.fun)
    #fobj = partial(fit_data.objective, data=sample, cost_type='poisson')
    #cov.append(fh.calculate_covariance(fobj, result.x))


HBox(children=(IntProgress(value=0), HTML(value='')))

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(30, 10), facecolor='white')

diff = (results - prand)
ax.plot(np.outer(np.ones(10), np.arange(17)), diff[:,:17], marker='o')
ax.grid(axis='y')
plt.show()