# systematics with the Asimov dataset

To assess the impact of various sources of systematic, we will rely on an Asimov dataset.

In [1]:
## imports and configuration
%cd '/home/naodell/work/wbr/analysis'

from functools import partial
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from scipy.optimize import minimize
from tqdm.notebook import tqdm

import scripts.plot_tools as pt
import scripts.fit_helpers as fh
from nllfit.nllfitter import ScanParameters

np.set_printoptions(precision=3)
matplotlib.style.use('default')
params = {'legend.fontsize': 20,
          'axes.labelsize': 20,
          'figure.figsize': (8, 8),
          'axes.facecolor': 'white',
          'axes.titlesize':'x-large',
          'xtick.labelsize':18,
          'ytick.labelsize':18,
         }
matplotlib.rcParams.update(params)
%matplotlib inline
%connect_info

/home/naodell/work/wbr/analysis
{
  "shell_port": 51057,
  "iopub_port": 53481,
  "stdin_port": 40147,
  "control_port": 38815,
  "hb_port": 59849,
  "ip": "127.0.0.1",
  "key": "57ef5bbc-1a7d1712d4b1b96cbf417829",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-212f6734-8a14-46e0-a08f-cdc09662844f.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


In [2]:
# configure, get the input data, and do any additional processing that is needed
processes = ['ttbar', 't', 'ww', 'wjets', 'zjets_alt', 'diboson', 'fakes'] 
selections = [#'emu',
              'ee', 'mumu',
              'emu',
              'mutau', 'etau',
              'mu4j', 'e4j'
             ]

# initialize fit data
input_dir  = f'local_data/templates/nominal_smooth/'
fit_data   = fh.FitData(input_dir, selections, processes)
model_data = fit_data._model_data
parameters = fit_data._parameters

# get post fit uncertainties
parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_init'] *= parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_init']/100
#parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_fit'] *= parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_fit']/100
param_init = parameters['val_init'].values.copy()

In [3]:
# plot uncertainties
plot_labels = fh.fancy_labels
categories = model_data.keys()
for category in tqdm(categories, leave=True):
    lep_category, jet_category = category.split('_', 1) 
    if jet_category not in pt.categories.keys():
        continue
        
    sdata = fit_data.get_selection_data(lep_category)
    bins = sdata[jet_category]['bins']
    template_nominal = fit_data.mixture_model(param_init, category)
    for ix, (pname, pdata) in tqdm(enumerate(parameters.iterrows()), 
                                   desc = category,
                                   leave=False, 
                                   total=parameters.shape[0]
                                  ):
        if not pdata[lep_category] or ix < 4:
            continue
            
        # prior uncertainties
        pval = param_init.copy()
        err_pre = pdata['err_init']
        pval[ix] = param_init[ix] + err_pre
        template_up = fit_data.mixture_model(pval, category)
        pval[ix] = param_init[ix] - err_pre
        template_down = fit_data.mixture_model(pval, category)
        var_pre_up = (template_up[0] - template_nominal[0])/template_nominal[0]
        var_pre_down = (template_down[0] - template_nominal[0])/template_nominal[0]
        
        # post-fit uncertainties
        err_post = pdata['err_init']
        pval[ix] = param_init[ix] + err_post
        template_up = fit_data.mixture_model(pval, category)
        pval[ix] = param_init[ix] - err_post
        template_down = fit_data.mixture_model(pval, category)
        
        var_post_up = (template_up[0] - template_nominal[0])/template_nominal[0]
        var_post_down = (template_down[0] - template_nominal[0])/template_nominal[0]
        
        #if pname == 'top_pt':
        #    var_pre_down = np.zeros(var_pre_down.size)
        #    var_post_down = np.zeros(var_post_down.size)
        
        # statistical uncertainties
        err_stat_mc = np.sqrt(template_nominal[1])/template_nominal[0]
        err_stat_data = np.sqrt(template_nominal[0])/template_nominal[0]
        
        # setting axis limits
        ymin = np.min([var_pre_up.min(), var_pre_down.min(), var_post_up.min(), var_post_down.min()])
        ymax = np.max([var_pre_up.max(), var_pre_down.max(), var_post_up.max(), var_post_down.max()])
        ylim = np.max(np.abs([ymin, ymax])) 
        
        # make the plots
        fig, ax = plt.subplots(1, 1, figsize=(14, 5), facecolor='white')
        x = bins[:-1]
        dx = bins[1:] - bins[:-1]
        ax.fill_between(x, -err_stat_mc, err_stat_mc, edgecolor='r', facecolor='none', step='post', hatch='\\', alpha=0.5, label=r'$\sigma_{stat. MC}$')
        ax.fill_between(x, -err_stat_data, err_stat_data, edgecolor='r', facecolor='none', step='post', hatch='/', alpha=0.5, label=r'$\sigma_{stat. data}$')
        
        ax.plot(x, var_pre_up, drawstyle='steps-post', c='C0', linestyle='--', linewidth=2., label=r'$\sigma_{\theta}$ (pre-fit)')
        ax.plot(x + dx/2, var_pre_up, c='C0', marker='^', markersize=8, linestyle='None', label='_nolegend_')
        ax.plot(x, var_pre_down, drawstyle='steps-post', c='C0', linestyle='--', linewidth=2., label='_nolegend_')
        ax.plot(x + dx/2, var_pre_down, c='C0', marker='v', markersize=8, linestyle='None', label='_nolegend_')
        
        #ax.plot(x, var_post_up, drawstyle='steps-post', c='C1', linestyle='--', linewidth=2., label=r'$\sigma_{\theta}$ (post-fit)')
        #ax.plot(x + dx/2, var_post_up, c='C1', marker='^', markersize=8, linestyle='None', label='_nolegend_')
        #ax.plot(x, var_post_down, drawstyle='steps-post', c='C1', linestyle='--', linewidth=2., label='_nolegend_')
        #ax.plot(x + dx/2, var_post_down, c='C1', marker='v', markersize=8, linestyle='None', label='_nolegend_')
        
        ax.set_xlabel(f'{plot_labels[lep_category][0]}', fontsize=20)
        ax.set_ylabel(r'$\frac{\delta N}{N}$')
        ax.set_xlim(x[0], x[-1])
        if ylim > 0.:
            ax.set_ylim(-1.25*ylim, 1.25*ylim)
        ax.set_title(pdata['label'], fontsize=24)
        ax.grid()
        
        txt = ax.text(0.2, 0.9, f'{plot_labels[lep_category][1]}, {pt.categories[jet_category].label}', 
                      horizontalalignment='center', 
                      verticalalignment='center', 
                      transform=ax.transAxes, 
                      fontsize=22
                     )
        txt.set_bbox(dict(facecolor='white', alpha=0.8, edgecolor='red'))
        
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=14)
        plt.tight_layout(rect=[0, 0, 0.98, 0.96])
        
        #plt.tight_layout()
        outdir = f'plots/systematics/sources/{lep_category}/{jet_category}'
        pt.make_directory(outdir, clear=False)
        plt.savefig(f'{outdir}/{pname}.png')
        plt.close()
    

HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='ee_cat_gt2_eq0', max=117.0, style=ProgressStyle(descripti…

HBox(children=(FloatProgress(value=0.0, description='ee_cat_gt2_eq1_b', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='ee_cat_gt2_gt2_b', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='mumu_cat_gt2_eq0', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='mumu_cat_gt2_eq1_b', max=117.0, style=ProgressStyle(descr…

HBox(children=(FloatProgress(value=0.0, description='mumu_cat_gt2_gt2_b', max=117.0, style=ProgressStyle(descr…

HBox(children=(FloatProgress(value=0.0, description='emu_cat_gt2_eq0', max=117.0, style=ProgressStyle(descript…

HBox(children=(FloatProgress(value=0.0, description='emu_cat_eq0_eq0_a', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='emu_cat_eq1_eq0_a', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='emu_cat_eq1_eq1_a', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='emu_cat_gt2_eq1_a', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='emu_cat_gt2_gt2_a', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='mutau_cat_gt2_eq0', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='mutau_cat_eq0_eq0', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='mutau_cat_eq1_eq0', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='mutau_cat_eq1_eq1', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='mutau_cat_eq2_eq1', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='mutau_cat_gt3_eq1', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='mutau_cat_eq2_gt2', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='mutau_cat_gt3_gt2', max=117.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='etau_cat_gt2_eq0', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='etau_cat_eq0_eq0', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='etau_cat_eq1_eq0', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='etau_cat_eq1_eq1', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='etau_cat_eq2_eq1', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='etau_cat_gt3_eq1', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='etau_cat_eq2_gt2', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='etau_cat_gt3_gt2', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='mu4j_cat_gt4_eq1', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='mu4j_cat_gt4_gt2', max=117.0, style=ProgressStyle(descrip…

HBox(children=(FloatProgress(value=0.0, description='e4j_cat_gt4_eq1', max=117.0, style=ProgressStyle(descript…

HBox(children=(FloatProgress(value=0.0, description='e4j_cat_gt4_gt2', max=117.0, style=ProgressStyle(descript…


