# systematics with the Asimov dataset

To assess the impact of various sources of systematic, we will rely on an Asimov dataset.

In [1]:
## imports and configuration
%cd '/home/naodell/work/wbr/analysis'

from functools import partial
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.optimize import minimize
from tqdm import tqdm_notebook

import scripts.plot_tools as pt
import scripts.fit_helpers as fh
from nllfit.nllfitter import ScanParameters

np.set_printoptions(precision=3)
matplotlib.style.use('default')
params = {'legend.fontsize': 20,
          'axes.labelsize': 20,
          'figure.figsize': (8, 8),
          'axes.facecolor': 'white',
          'axes.titlesize':'x-large',
          'xtick.labelsize':18,
          'ytick.labelsize':18,
         }
matplotlib.rcParams.update(params)
%matplotlib inline
%connect_info

/home/naodell/work/wbr/analysis
{
  "shell_port": 32833,
  "iopub_port": 47523,
  "stdin_port": 37035,
  "control_port": 44419,
  "hb_port": 58543,
  "ip": "127.0.0.1",
  "key": "5f259790-844b651ed7eb242add05d9db",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-5e464d4a-f6c1-490c-98d1-d9edde29b761.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


In [2]:
# configure, get the input data, and do any additional processing that is needed
input_dir  = f'local_data/templates/test/'
selections = [
              'ee',   'emu',  'mumu',  
              'mutau', 'etau', 
              'mu4j', 'e4j'
             ]
n_selection = len(selections)
plot_labels = dict(
                   mumu  = [r'$\sf p_{T, \mu}$', r'$\mu\mu$'],
                   ee    = [r'$\sf p_{T, e}$', r'$ee$'],
                   emu   = [r'$\sf p_{T, \ell}$', r'$e\mu$'],
                   mutau = [r'$\sf p_{T, \tau}$', r'$\mu\tau$'],
                   etau  = [r'$\sf p_{T, \tau}$', r'$e\tau$'],
                   mu4j  = [r'$\sf p_{T, \mu}$', r'$\mu$ + jets'],
                   e4j   = [r'$\sf p_{T, e}$', r'$e$ + jets']
                  )

# initial values for W branching fraction (beta_e, beta_mu, beta_tau, beta_h) and tau branching fraction (b_e, b_mu, b_h)
br_tau    = [0.1783, 0.1741, 0.6476]
beta_init = [0.108, 0.108, 0.108, 1 - 3*0.108] 
var_beta  = [0.0009**2, 0.0009**2, 0.0009**2, 0.0031**2]

# initialize fit data
fit_data = fh.FitData(input_dir, selections)
parameters = fit_data._parameters

In [3]:
# plot the systematic sources
selection = 'mumu'
dataset = 'ttbar'
subdataset = 'wmu_wmu'

for group in groups:
    categories = ['cat_gt2_eq0', 'cat_gt2_eq1_b', 'cat_gt2_gt2_b']
    n_categories = len(categories)
    fig, axes = plt.subplots(n_categories, 1, figsize=(16, 4*n_categories), facecolor='white')
    for ix, category in enumerate(categories):

        sdata = fit_data.get_selection_data('mumu')
        templates = sdata[category]['templates']
        if subdataset == None:
            template = templates[dataset]
        else:
            template = templates[dataset][subdataset]

        y = template['val']
        mc_stat_err = np.sqrt(template['var'])
        data_stat_err = np.sqrt(y)

        bins = sdata[category]['bins'][:-1]
        dx = (bins[1:] - bins[:-1])
        dx = np.append(dx, dx[-1]) 
        x  = bins + dx/2            

        if n_categories > 1:
            ax = axes[ix]
        else:
            ax = axes

        ax.plot([x[0], x[-1]], [0, 0], drawstyle='steps-post', c='k', linestyle=':', linewidth=2., label='_nolegend_')
        ax.fill_between(x, -mc_stat_err/y, mc_stat_err/y, color='r', step='post', hatch='\\', alpha=0.15, label='MC stat.')
        ax.fill_between(x, -data_stat_err/y, data_stat_err/y, color='b', step='post', hatch='/', alpha=0.15, label='Data stat.')
        for jx, (param, param_data) in enumerate(parameter_groups.query(f'group == {group})').iterrows()):
            if param not in ['eff_id_mu', 'eff_iso_mu', 'eff_id_e', 'eff_reco_e', 'pileup']:
                continue

            if not param_data[dataset] or not param_data[selection] or f'{param}_up' not in template.columns: 
                continue

            if param_data.type == 'shape':
                color = f'C{jx%10}'
                y_up, y_down = template[f'{param}_up'] - template['val'], template[f'{param}_down'] - template['val']
                ax.plot(x, y_up/y, drawstyle='steps-post', c=color, linestyle='--', linewidth=2., label=param_data['label'])
                ax.plot(x + dx/2, y_up/y, c=color, marker='^', markersize=8, linestyle='None', label='_nolegend_')
                ax.plot(x, y_down/y, drawstyle='steps-post', c=color, linestyle='--', linewidth=2., label='_nolegend_')
                ax.plot(x + dx/2, y_down/y, c=color, marker='v', markersize=8, linestyle='None', label='_nolegend_')

        if ix == 0:
            ax.set_title(f'{plot_labels[selection][1]}: {dataset} ({subdataset})')
            ax.legend(loc='center left', bbox_to_anchor=(1.04,0.5), borderaxespad=0)

        ax.text(0.1, 0.9, category, horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=14)
        ax.set_xlabel(plot_labels[selection][0])
        ax.set_ylabel(r'$\frac{\delta N}{N}$')
        ax.set_xlim(x[0], x[-2])
        ax.set_ylim(-0.1, 0.1)
        ax.grid()

    plt.tight_layout()
    plt.show()

NameError: name 'groups' is not defined