# systematics with the Asimov dataset

To assess the impact of various sources of systematic, we will rely on an Asimov dataset.

In [4]:
## imports and configuration
%cd '/home/naodell/work/wbr/analysis'

from functools import partial
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from scipy.optimize import minimize
from tqdm import tqdm_notebook

import scripts.plot_tools as pt
import scripts.fit_helpers as fh
from nllfit.nllfitter import ScanParameters

np.set_printoptions(precision=3)
matplotlib.style.use('default')
params = {'legend.fontsize': 20,
          'axes.labelsize': 20,
          'figure.figsize': (8, 8),
          'axes.facecolor': 'white',
          'axes.titlesize':'x-large',
          'xtick.labelsize':18,
          'ytick.labelsize':18,
         }
matplotlib.rcParams.update(params)
%matplotlib inline
%connect_info

/home/naodell/work/wbr/analysis
{
  "shell_port": 48151,
  "iopub_port": 45795,
  "stdin_port": 46073,
  "control_port": 53565,
  "hb_port": 45271,
  "ip": "127.0.0.1",
  "key": "b6aa3a10-bc09ae9d97d122698e4b917d",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-ac776fe5-7381-400b-a4ab-3657b9c42b71.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


In [5]:
# configure, get the input data, and do any additional processing that is needed
input_dir  = f'local_data/templates/updated_e_sf/'
selections = [
              'ee', 'mumu',
              'emu',
              'mutau', 'etau',
              'mu4j', 'e4j'
             ]
processes = ['ttbar', 't', 'ww', 'wjets', 'zjets_alt', 'diboson', 'fakes'] 
n_selection = len(selections)
plot_labels = dict(
                   mumu  = [r'$\sf p_{T, \mu}$', r'$\mu\mu$'],
                   ee    = [r'$\sf p_{T, e}$', r'$ee$'],
                   emu   = [r'$\sf p_{T, \ell}$', r'$e\mu$'],
                   mutau = [r'$\sf p_{T, \tau}$', r'$\mu\tau$'],
                   etau  = [r'$\sf p_{T, \tau}$', r'$e\tau$'],
                   mu4j  = [r'$\sf p_{T, \mu}$', r'$\mu$ + jets'],
                   e4j   = [r'$\sf p_{T, e}$', r'$e$ + jets']
                  )

# initialize fit data
fit_data   = fh.FitData(input_dir, selections, processes)
model_data = fit_data._model_data
#parameters = fit_data._parameters

# get post fit uncertainties
parameters = pd.read_csv('local_data/pulls.csv', index_col='name')
#parameters.loc[parameters.type == 'norm', 'err_init'] /= 100
#parameters.loc[parameters.type == 'norm', 'err_fit'] /= 100
parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_init'] *= parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_init']/100
parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_fit'] *= parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_fit']/100

In [6]:
# plot uncertainties

param_init = parameters['val_init'].values
stat_bar1 = tqdm_notebook(model_data.items(), leave=True)
for category, data in stat_bar1:
    stat_bar1.set_description(category)
    lep_category, jet_category = category.split('_', 1) 
    
    if jet_category not in pt.categories.keys():
        continue
        
    sdata = fit_data.get_selection_data(lep_category)
    bins = sdata[jet_category]['bins']
    
    template_nominal = fit_data.mixture_model(param_init, category)
    stat_bar2 = tqdm_notebook(enumerate(parameters.iterrows()), leave=False)
    for ix, (pname, pdata) in stat_bar2:
        stat_bar2.set_description(pname)
        if not pdata[lep_category] or ix < 4:
            continue
            
        # prior uncertainties
        pval = param_init.copy()
        err_pre = pdata['err_init']
        pval[ix] = param_init[ix] + err_pre
        template_up = fit_data.mixture_model(pval, category)
        pval[ix] = param_init[ix] - err_pre
        template_down = fit_data.mixture_model(pval, category)
        
        var_pre_up = (template_up[0] - template_nominal[0])/template_nominal[0]
        var_pre_down = (template_down[0] - template_nominal[0])/template_nominal[0]
        
        # post-fit uncertainties
        err_post = pdata['err_fit']
        if pname in ['br_tau_e', 'br_tau_mu', 'br_tau_h']:
            err_post *= param_init[ix]/100
            
        #print(pname, err_post)
        pval[ix] = param_init[ix] + err_post
        template_up = fit_data.mixture_model(pval, category)
        pval[ix] = param_init[ix] - err_post
        template_down = fit_data.mixture_model(pval, category)
        
        var_post_up = (template_up[0] - template_nominal[0])/template_nominal[0]
        var_post_down = (template_down[0] - template_nominal[0])/template_nominal[0]
        
        # statistical uncertainties
        err_stat_mc = np.sqrt(template_nominal[1])/template_nominal[0]
        err_stat_data = np.sqrt(template_nominal[0])/template_nominal[0]
        
        ymin = np.min([-err_stat_mc[:-1].max(), -err_stat_data[:-1].max(), var_pre_up.min(), var_pre_down.min()])
        ymax = np.max([err_stat_mc[:-1].max(), err_stat_data[:-1].max(), var_pre_up.max(), var_pre_down.max()])
        
        dx = (bins[1:] - bins[:-1])
        x  = bins[:-1] + dx/2            

        fig, ax = plt.subplots(1, 1, figsize=(10, 5), facecolor='white')
        ax.fill_between(x, -err_stat_mc, err_stat_mc, color='r', step='post', hatch='\\', alpha=0.15, label=r'$\sigma_{stat. MC}$')
        ax.fill_between(x, -err_stat_data, err_stat_data, color='r', step='post', hatch='/', alpha=0.15, label=r'$\sigma_{stat. data}$')
        ax.plot(x, var_pre_up, drawstyle='steps-post', c='C0', linestyle='--', linewidth=2., label=r'$\sigma_{\theta}$ (pre-fit)')
        ax.plot(x + dx/2, var_pre_up, c='C0', marker='^', markersize=8, linestyle='None', label='_nolegend_')
        ax.plot(x, var_pre_down, drawstyle='steps-post', c='C0', linestyle='--', linewidth=2., label='_nolegend_')
        ax.plot(x + dx/2, var_pre_down, c='C0', marker='v', markersize=8, linestyle='None', label='_nolegend_')
        
        ax.plot(x, var_post_up, drawstyle='steps-post', c='C1', linestyle='--', linewidth=2., label=r'$\sigma_{\theta}$ (post-fit)')
        ax.plot(x + dx/2, var_post_up, c='C1', marker='^', markersize=8, linestyle='None', label='_nolegend_')
        ax.plot(x, var_post_down, drawstyle='steps-post', c='C1', linestyle='--', linewidth=2., label='_nolegend_')
        ax.plot(x + dx/2, var_post_down, c='C1', marker='v', markersize=8, linestyle='None', label='_nolegend_')
        
        ax.set_xlabel(f'{plot_labels[lep_category][0]}', fontsize=20)
        ax.set_ylabel(r'$\frac{\delta N}{N}$')
        ax.set_xlim(x[0], x[-1])
        ax.set_ylim(1.1*ymin, 1.1*ymax)
        ax.set_title(pdata['label'], fontsize=24)
        ax.grid()
        
        txt = ax.text(0.2, 0.9, f'{plot_labels[lep_category][1]}, {pt.categories[jet_category].label}', 
                      horizontalalignment='center', 
                      verticalalignment='center', 
                      transform=ax.transAxes, 
                      fontsize=22
                     )
        txt.set_bbox(dict(facecolor='white', alpha=0.8, edgecolor='red'))
        
        #ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=14)
        plt.tight_layout(rect=[0, 0, 0.98, 0.96])
        
        #plt.tight_layout()
        outdir = f'plots/systematics/sources/{lep_category}/{jet_category}'
        pt.make_directory(outdir, clear=False)
        plt.savefig(f'{outdir}/{pname}.png')
        plt.close()
    

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


