# Testing lepton universality

We are interested in quantifying the probability for the data to support several hypothesis:

   * _null_: this is the assumption of lepton universality, i.e., $B(W\rightarrow e) = B(W\rightarrow \mu) = B(W\rightarrow \tau)$
   * _alt 1_: in this case, lepton universality is violated only for third generation leptons, i.e., $B(W\rightarrow e) = B(W\rightarrow \mu) \neq B(W\rightarrow \tau)$
   * _alt 2_: in this case, none of the branching fractions are the same and lepton universality completly breaks down, i.e., $B(W\rightarrow e) \neq B(W\rightarrow \mu) \neq B(W\rightarrow \tau)$
   
To do this, we carry out the fit to data with the various assumptions and construct the likelihood ratios, $q$ so that we can determine $P(q|data)$.  To determine the p values we will need to first determine the correct pdf for $q$ which can be done by evaluating the fit on toy data constructed assuming the null hypothesis.  Assuming the usual asymptotic approximation applies, Wilk's theorem, the pdf for $q$ will be a $\chi^{2}_{n}$ distribution with $k=1(2)$ for alternative hypothesis 1(2).

In [1]:
## imports and configuration
%cd '/home/naodell/work/wbr/analysis'
#%load_ext autoreload

from functools import partial
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.optimize import minimize
from tqdm import tqdm_notebook

import scripts.plot_tools as pt
import scripts.fit_helpers as fh
from nllfit.nllfitter import ScanParameters

np.set_printoptions(precision=3)
rc_params = {
             'figure.figsize': (10, 10),
             'axes.labelsize': 20,
             'axes.facecolor': 'white',
             'axes.titlesize':'x-large',
             'legend.fontsize': 20,
             'xtick.labelsize':18,
             'ytick.labelsize':18,
             'font.size':18,
             'font.sans-serif':['Arial', 'sans-serif'],
             'mathtext.sf':'Arial',
             'lines.markersize':8.,
             'lines.linewidth':2.5,
            }
matplotlib.rcParams.update(rc_params)

%connect_info

/home/naodell/work/wbr/analysis
{
  "shell_port": 52175,
  "iopub_port": 39651,
  "stdin_port": 60865,
  "control_port": 56515,
  "hb_port": 38971,
  "ip": "127.0.0.1",
  "key": "1d47d7e6-3e701372a0f8bc3f5b7e7735",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-b4bded23-fb4e-4f3e-8bce-b0d4e4a7e000.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


In [2]:
# configure, get the input data, and do any additional processing that is needed
input_dir  = f'local_data/templates/nominal/'
processes = ['ttbar', 't', 'ww', 'wjets', 'zjets_alt', 'diboson', 'fakes'] 
selections = [
              'ee', 'emu', 'mumu',  
              'mutau', 'etau', 
              'mu4j', 'e4j'
             ]

# initialize fit data
fit_data = fh.FitData(input_dir, selections, processes, process_cut=0.1)
params = fit_data._parameters
params_pre = fit_data.get_params_init().values.copy()
sample = {cat:fit_data.mixture_model(params_pre, cat) for cat in fit_data._model_data.keys()}

In [3]:
# set up fit configuration
# bounds
bounds = [(0.1, 0.12), (0.1, 0.12), (0.1, 0.12), (0.64, 0.7)]
bounds += [(0.16, 0.18), (0.16, 0.18), (0.64, 0.66)]
bounds += fit_data._nnorm*[(0.2, 1.5), ]
bounds += fit_data._nshape*[(-2, 2), ]

# minimizer options
step_sizes = 0.001*params['err_init']
step_sizes[:4] = 4*[1e-6,]
min_options = dict(#eps=1e-9, 
                   #xtol=1e-3, 
                   #ftol=1e-9, 
                   #stepmx=0.1, 
                   #maxCGit=50, 
                   #accuracy=1e-10,
                   maxfun=2.5e4,
                   disp=None
                  )

In [None]:
# carry out fit for n trials with random offsets for templates
ntrials = 10
results = dict(null=[], alt1=[], alt2=[])
cost = dict(null=[], alt1=[], alt2=[])
for _ in tqdm_notebook(range(ntrials)):
    
    # generate toy dataset
    for category, rnums in fit_data._rnum_cache.items():
        fit_data._rnum_cache[category]= np.random.randn(rnums.size)
        sample[category] = fit_data.mixture_model(params_pre, category, randomize=True)
        
    # carry out fit for null
    fobj = partial(fit_data.objective, data = sample, do_mc_stat=True, randomize_templates=False)
    result = minimize(fobj, params_pre,
                      method  = 'L-BFGS-B', 
                      options = min_options,
                      bounds  = bounds,
                     )
    print(result.fun, result.x[:4])
    if result.success:
        results['null'].append(result.x)
        cost['null'].append(result.fun)
    else:
        print(result)
    
    # carry out fit for alt1
    fobj = partial(fh.objective_lu, data = sample, objective = fobj, test_type=1)
    mask = np.ones(params_pre.size).astype(bool)
    mask[1:3] = False
    result = minimize(fobj, params_pre[mask],
                      method  = 'L-BFGS-B', 
                      options = min_options,
                      bounds  = bounds[mask.astype(int)],
                     )
        

    print(result.fun, result.x[:4])
    if result.success:
        results['alt1'].append(result.x)
        cost['alt1'].append(result.fun)
    else:
        print(result)
        
    # carry out fit for alt2
    fobj = partial(fh.objective_lu, data = sample, objective = fobj, test_type=2)
    mask[2] = False
    result = minimize(fobj, params_pre[mask],
                      method  = 'L-BFGS-B', 
                      options = min_options,
                      bounds  = bounds[mask.astype(int)],
                     )
        

    print(result.fun, result.x[:4])
    if result.success:
        results['alt2'].append(result.x)
        cost['alt2'].append(result.fun)
    else:
        print(result)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))