# ttbar systematics with the Asimov dataset

To assess the impact of various sources of systematic, we will rely on an Asimov dataset.

In [1]:
## imports and configuration
%cd '/home/naodell/work/wbr/analysis'

from functools import partial
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from scipy.optimize import minimize
from tqdm.notebook import tqdm
import statsmodels.api as sm    
lowess = sm.nonparametric.lowess

import scripts.plot_tools as pt
import scripts.fit_helpers as fh
from nllfit.nllfitter import ScanParameters

np.set_printoptions(precision=3)
matplotlib.style.use('default')
params = {'legend.fontsize': 20,
          'axes.labelsize': 20,
          'figure.figsize': (8, 8),
          'axes.facecolor': 'white',
          'axes.titlesize':'x-large',
          'xtick.labelsize':18,
          'ytick.labelsize':18,
         }
matplotlib.rcParams.update(params)
%matplotlib inline
%connect_info

/home/naodell/work/wbr/analysis
{
  "shell_port": 51421,
  "iopub_port": 55919,
  "stdin_port": 41243,
  "control_port": 57193,
  "hb_port": 40425,
  "ip": "127.0.0.1",
  "key": "c6207323-a1fd9c179caaf11f54b4f794",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-6c68bf69-e384-469e-991f-beabd497af4a.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


In [2]:
# configure, get the input data, and do any additional processing that is needed
input_dir  = f'local_data/templates/nominal_nosmooth/'
selections = [
              'ee', 'mumu',
              'emu',
              'mutau', 'etau',
              'mu4j', 'e4j'
             ]
processes = ['ttbar', 't', 'ww', 'wjets', 'zjets_alt', 'diboson', 'fakes'] 
n_selection = len(selections)
plot_labels = dict(
                   mumu  = [r'$\sf p_{T, \mu}$', r'$\mu\mu$'],
                   ee    = [r'$\sf p_{T, e}$', r'$ee$'],
                   emu   = [r'$\sf p_{T, \ell}$', r'$e\mu$'],
                   mutau = [r'$\sf p_{T, \tau}$', r'$\mu\tau$'],
                   etau  = [r'$\sf p_{T, \tau}$', r'$e\tau$'],
                   mu4j  = [r'$\sf p_{T, \mu}$', r'$\mu$ + jets'],
                   e4j   = [r'$\sf p_{T, e}$', r'$e$ + jets']
                  )

# initialize fit data
fit_data   = fh.FitData(input_dir, selections, processes)
model_data = fit_data._model_data
#parameters = fit_data._parameters

# get post fit uncertainties
parameters = pd.read_csv('data/model_parameters_asimov.csv', index_col='name')
#parameters.loc[parameters.type == 'norm', 'err_init'] /= 100
#parameters.loc[parameters.type == 'norm', 'err_fit'] /= 100
parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_init'] *= parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_init']/100
parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_fit'] *= parameters.loc[['br_tau_e', 'br_tau_mu', 'br_tau_h'], 'err_fit']/100

In [7]:
# per template breakdown

decay_map = fit_data._decay_map
for channel in tqdm(selections):
    sdata = fit_data.get_selection_data(channel)
    for category, cdata in sdata.items():
        templates = cdata['templates']['ttbar']
        data = sdata[category]['templates']['data']['val']
        bins = sdata[category]['bins']
        #print(0.1*np.sqrt(data))

        fig, axes = plt.subplots(2, 4, figsize=(24, 10), facecolor='white', sharey=False, sharex='col')
        fig.suptitle(f'{plot_labels[channel][1]}: {pt.categories[category].label}', fontsize=22)
        for ix, syst_type in enumerate(['isr', 'fsr', 'hdamp', 'tune']):

            color = 0
            count = 0
            do_plot = False
            for dm, dmdata in templates.items():
                if np.all(dmdata['val'].values < 0.5*np.sqrt(data)) or dm not in decay_map.query(f'{channel} == 1').decay.values:
                    continue
                    
                count += 1
                label = decay_map[decay_map.decay == dm]['alt_label'].values[0]

                ax = axes[0][ix]
                x = (bins[:-1] + bins[1:])/2
                #diff_up = (dmdata[f'{syst_type}_up'].values - dmdata['val'].values)/dmdata['val'].values
                diff_up = (dmdata[f'{syst_type}_up'].values - dmdata['val'].values)
                diff_up_smooth = lowess(diff_up, x, frac=0.6, return_sorted=False)
                ax.plot(bins[:-1], diff_up, f'C{color}--', alpha=1., drawstyle='steps-post', label=f'{label}')
                ax.plot(x, diff_up, c=f'C{color}', marker='^', markersize=10, linestyle='None', label='_nolegend_')

                #diff_down = (dmdata[f'{syst_type}_down'].values - dmdata['val'].values)/dmdata['val'].values
                diff_down = (dmdata[f'{syst_type}_down'].values - dmdata['val'].values)
                ax.plot(bins[:-1], diff_down, f'C{color}--', alpha=1., drawstyle='steps-post', label='_nolegend_')
                ax.plot(x, diff_down, c=f'C{color}', marker='v', markersize=10, linestyle='None', label='_nolegend_')

                ax = axes[1][ix]
                diff_up_smooth = lowess(diff_up, x, frac=0.5, return_sorted=False)
                ax.plot(x, diff_up_smooth, c=f'C{color}', marker='^', markersize=10, linestyle=':', label='_nolegend_')

                diff_down_smooth = lowess(diff_down, x, frac=0.5, return_sorted=False)
                ax.plot(x, diff_down_smooth, c=f'C{color}', marker='v', markersize=10, linestyle=':', label='_nolegend_')

                color += 1

            if count == 0:
                plt.close()
                continue
            else:
                do_plot = True

            axes[0][ix].plot(bins[[0, -1]], [0, 0], 'k-')
            axes[0][ix].fill_between(bins[:-1], -np.sqrt(data), np.sqrt(data), hatch='/', facecolor='none', edgecolor='r', alpha=0.3, step='post', label='data error')
            axes[0][ix].set_title(f'{syst_type}', fontsize=20)
            #axes[0][ix].grid()

            axes[1][ix].plot(bins[[0, -1]], [0, 0], 'k-')
            axes[1][ix].fill_between(bins[:-1], -np.sqrt(data), np.sqrt(data), hatch='/', facecolor='none', edgecolor='r', alpha=0.3, step='post', label='data error')
            axes[1][ix].set_xlabel(r'$p_{T}$')
            axes[1][ix].set_xlim(bins[0], bins[-2])

            if ix == 3:
                axes[0][ix].legend()

        if not do_plot:
            continue

        outdir = f'plots/systematics/ttbar_generator/{channel}'
        pt.make_directory(outdir, clear=False)

        plt.tight_layout(h_pad=0., w_pad=0., rect=[0, 0.03, 1, 0.95])
        plt.savefig(f'{outdir}/{category}.png')
        plt.close()
        #plt.show()

HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))


