# profile likelihood scans on the Asimov dataset

In this notebook we will:

   * generate the Asimov dataset
   * carry out likelihood scans for a number of nuisance parameters while minimizing the NLL w.r.t. all other nuisance parameters
   
This first part is based of `w_br_asimov.ipynb`.

In [3]:
## imports and configuration
%cd '/home/naodell/work/wbr/analysis'

from functools import partial
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.optimize import minimize
from tqdm import tqdm_notebook

import scripts.plot_tools as pt
import scripts.fit_helpers as fh
from nllfit.nllfitter import ScanParameters

np.set_printoptions(precision=5)
matplotlib.style.use('default')
params = {'legend.fontsize': 20,
          'axes.labelsize': 20,
          'figure.figsize': (8, 8),
          'axes.facecolor': 'white',
          'axes.titlesize':'x-large',
          'xtick.labelsize':18,
          'ytick.labelsize':18,
         }
matplotlib.rcParams.update(params)
%matplotlib inline
%connect_info

/home/naodell/work/wbr/analysis
{
  "shell_port": 47775,
  "iopub_port": 34627,
  "stdin_port": 37207,
  "control_port": 51197,
  "hb_port": 52649,
  "ip": "127.0.0.1",
  "key": "3d83209e-c4bbc3cda1dd9ffc718b4530",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-83b2660f-a13a-4a00-a4e4-d8368a97addf.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


In [4]:
# configure, get the input data, and do any additional processing that is needed
input_dir  = f'local_data/templates/test_zjets_shape/'
selections = [
              'emu', 
              'mumu',  'mutau',  'mu4j',
              'ee',  'etau', 'e4j'
             ]
n_selection = len(selections)
targets    = dict(
                  mumu  = 'lepton2_pt', 
                  ee    = 'lepton2_pt', 
                  emu   = 'trailing_lepton_pt', 
                  mutau = 'lepton2_pt', 
                  etau  = 'lepton2_pt', 
                  mu4j  = 'lepton1_pt',
                  e4j   = 'lepton1_pt'
                 )
plot_labels = dict(
                   mumu  = [r'$\sf p_{T, \mu}$', r'$\mu\mu$'],
                   ee    = [r'$\sf p_{T, e}$', r'$ee$'],
                   emu   = [r'$\sf p_{T, \ell}$', r'$e\mu$'],
                   mutau = [r'$\sf p_{T, \tau}$', r'$\mu\tau$'],
                   etau  = [r'$\sf p_{T, \tau}$', r'$e\tau$'],
                   mu4j  = [r'$\sf p_{T, \mu}$', r'$\mu$ + jets'],
                   e4j   = [r'$\sf p_{T, e}$', r'$e$ + jets']
                  )

# initial values for W branching fraction (beta_e, beta_mu, beta_tau, beta_h) and tau branching fraction (b_e, b_mu, b_h)
br_tau    = [0.1783, 0.1741, 0.6476]
beta_init = [0.108, 0.108, 0.108, 1 - 3*0.108] 
var_beta  = [0.0009**2, 0.0009**2, 0.0009**2, 0.0031**2]

# initialize fit data
fit_data = fh.FitData(input_dir, selections, targets)

In [5]:
# generate Asimov dataset
toy_data = dict()
for selection in selections:
    toy_data[selection] = dict()
    sdata = fit_data.get_selection_data(selection)
    for category, bdata in sdata.items():
        
        templates = bdata['templates']
        # signal component
        toy_data[selection][category] = np.zeros(bdata['bins'].size - 1)
        for dataset, template in templates.items():
            if dataset in['ttbar', 't', 'wjets']: 
                signal_template = pd.DataFrame.from_items((dm, t['val']) for dm, t in template.items())
                signal = fh.signal_mixture_model(beta_init, br_tau, signal_template, sample=False, single_w=(dataset == 'wjets'))
                toy_data[selection][category] += signal
            elif dataset != 'data':
                toy_data[selection][category] += template['val']

  del sys.path[0]


In [6]:
# configuration for likelihood scans 

def reduced_objective(obj, p, mask, p_init):
    '''
    Fixes parameters specified by (p, mask) and minimizes objective w.r.t. remaining parameters.
    '''
    masked_p = p_init.copy()
    masked_p[mask] = p
    return fit_data.objective(masked_p, data=toy_data, cost_type=cost_type, no_shape=False)

params_init = fit_data.get_params_init().values
cost_type = 'poisson'
nscan_points = 20
param = fit_data._parameters.loc['lumi']
ix = 7

pmin, pmax = param.val_init-param.err_init, param.val_init+param.err_init
scan_points = np.vstack([params_init for _ in range(nscan_points+1)])
scan_points[:,ix] = np.sort(np.append(np.linspace(pmin, pmax, num=nscan_points), params_init[ix]))

mask = np.zeros(len(params_init)).astype(bool)
mask[ix] = True

In [None]:
# carry out one-dimensional profile likelihood scans
def obj_callback(params, objective, data, cache):
    cache['params'].append(params)
    cache['cost'].append(objective(params, data))
    print(objective(params, data))

nll_cache = dict(params = [], cost = [])
p = params_init + 0.01*np.random.randn(params_init.size)
p[:4] = params_init[:4]

res = minimize(fit_data.objective, p,
               method = 'BFGS', 
               #jac = '2-point', 
               #hess = 'cs', 
               callback = partial(obj_callback, objective=fit_data.objective, data=toy_data, cache=nll_cache),
               args = (toy_data),
               options = {'disp':True}
              )

fobj = partial(reduced_objective, p=0.95, mask=mask, p_init=params_init)
res_reduced = minimize(fobj, np.delete(params_init, ix), 
                       method = 'BFGS', 
                       #jac = True, 
                       #hess = True, 
                       options = {'disp':True}
                       #callback = partial(obj_callback, objective=f_obj, data=toy_data, cache=nll_cache),
                      )
nll_nominal = fit_data.objective(params_init, toy_data)

-39133540.77123285
-39133650.79986652
-39133845.8912693
-39133966.54151255
-39133991.102851
-39134023.50023022
-39134031.47684568
-39134039.161774166
-39134041.49972127
-39134044.75563016
-39134047.21844243
-39134051.493265085
-39134074.24052604
-39134081.1278544
-39134081.62007253
-39134082.55709564
-39134087.43747481
-39134090.30580603
-39134093.60368712
-39134099.1868425
-39134109.39437195
-39134125.85379611
-39134155.51004343
-39134200.41604623
-39134266.74224504
-39134304.06713711
-39134355.06841983
-39134382.35418589
-39134399.95731668
-39134422.54385751
-39134430.24306857
-39134442.90144064
-39134462.4357257
-39134470.99709707
-39134484.03572634
-39134497.52395274
-39134499.57912122
-39134502.894228764
-39134505.886776716
-39134508.09735714
-39134510.1383041
-39134510.62221568
-39134510.72021797
-39134510.88652353
-39134511.72591005
-39134511.87024356
-39134512.040789686
-39134512.24024094
-39134512.37705552
-39134512.48919217
-39134512.602054365
-39134512.75903975
-39134512.941

In [None]:
# carry out one-dimensional profile likelihood scans
from multiprocessing import Pool
pool = Pool(processes=12)
results = []
for sp in scan_points:
    fobj = partial(reduced_objective, p=sp[ix], mask=mask, p_init=params_init)
    p = np.delete(params_init, ix)
    #res = pool.apply_async(minimize, args=(fobj, p), kwds={'method':'BFGS', 'jac':False, 'hess':False})
    res = pool.apply_async(minimize, args=(fobj, p), kwds={'method':'trust-ncg', 'jac':True, 'hess':True, 'options':{'disp':True}})
    results.append(res)
    
    break
    
pool.close()
pool.join()

print('All done!!!')

results = [r.get() for r in results]
nll_scan = [r.fun for r in results]

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10), facecolor='white', sharey=False)

ax.plot(scan_points[:,ix], nll_scan-np.min(nll_scan), 'k-', linewidth=1.5)
ax.plot(scan_points[:,ix], 0.5*(params_init[ix] - scan_points[:,ix])**2/param.err_init**2, 'b--', linewidth=1.5)
#ax.plot([nll_scan[0, 0], nll_scan[-1, 0]], [0., 0.], 'r-', linewidth=1.5)
ax.set_title(param.label)
#ax.set_xlim(-0.1, 0.1)
#ax.set_ylim(-0.1, 100)
ax.set_ylabel('$\sf NLL - NLL_{min}$')
ax.grid()

plt.tight_layout()
plt.show()