# PREAMBLE
<script
  src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
  type="text/javascript">
</script>

In [1]:
import numpy as np
import pandas as pd
import numpy.linalg as la
from validphys.api import API
from validphys.loader import FallbackLoader as Loader
from matplotlib import pyplot as plt
l = Loader()

# Definition of the input

In [2]:
fit="NNPDF40_ht_test"

theory = 200 

ht_const = 1
ht_const_step_size = 0.2

covmat_scaling_factor = 1

In [3]:
common_dict = dict(dataset_inputs={"from_": "fit"},
            fit=fit,
            fits=[fit],
            use_cuts="fromfit",
            metadata_group="nnpdf31_process",)

# COMPUTATION OF $\alpha_s$

In [4]:
fitpath = API.fit(fit=fit).path 
filterpath = fitpath / 'filter.yml'
import yaml
with open(filterpath) as f:
    filterfile = yaml.safe_load(f)
pdf_ori=filterfile['theorycovmatconfig']['pdf'] # PDF used to compute theory covmat

In [5]:
#Inputs for theory
inps_central = dict(theoryid=theory, pdf=pdf_ori, use_t0=True, datacuts={"from_": "fit"}, t0pdfset={"from_": "datacuts"}, **common_dict)

In [6]:
#Inputs for central theory
inps_central_fit = dict(theoryid=theory, pdf={"from_": "fit"}, **common_dict)

In [7]:
#Experimental covariance matrix
C = API.groups_covmat_no_table(**inps_central)

LHAPDF 6.4.0 loading /Users/s2569857/venvs/Conda/nnpdf/share/LHAPDF/210718-n3fit-data-003/210718-n3fit-data-003_0000.dat
210718-n3fit-data-003 PDF set, member #0, version 1


In [8]:
dsindex=API.groups_index(**inps_central)

In [9]:
datth_central = API.group_result_table_no_table(**inps_central)
datth_central_fit = API.group_result_table_no_table(**inps_central_fit)

LHAPDF 6.4.0 loading all 101 PDFs in set 210718-n3fit-data-003
210718-n3fit-data-003, version 1; 101 PDF members
LHAPDF 6.4.0 loading all 4 PDFs in set NNPDF40_ht_test
NNPDF40_ht_test, version 1; 4 PDF members


In [10]:
# Defining DataFrame for kinematics
# TO CHECK: is kinematics ordered as dsindex?
kin_table = API.combine_by_type_ht(**inps_central).data
kin_df = pd.concat([pd.DataFrame(v) for k, v in kin_table.items()])
kin_df = pd.DataFrame(kin_df.values, index=dsindex, columns=('kin1', 'kin2', 'kin3'))

In [11]:
th_replicas_fit=datth_central_fit.iloc[:,2:].to_numpy()

Computation of Eqs.(3.37)-(3.38) in [arXiv:2105.05114](https://arxiv.org/pdf/2105.05114.pdf)

In [12]:
beta_tilde = np.sqrt(covmat_scaling_factor)*(ht_const_step_size/np.sqrt(2))*np.array([1,-1])
S_tilde = beta_tilde@beta_tilde

In [13]:
delta = ht_const * (datth_central["theory_central"] / kin_df['kin2'] ** 2 / ( 1 - kin_df['kin1'] )).to_numpy()
delta_plus  = (np.sqrt(covmat_scaling_factor)/np.sqrt(2))*delta
delta_minus = - (np.sqrt(covmat_scaling_factor)/np.sqrt(2))*delta
beta = [delta_plus,delta_minus]
S_hat = beta_tilde@beta

In [14]:
# TO CHECK
#S = np.outer(delta_plus,delta_plus)+np.outer(delta_minus,delta_minus)
S = np.outer(delta,delta)
S = pd.DataFrame(S,index=dsindex,columns=dsindex)
S = pd.DataFrame(S.values, index=C.index, columns=C.index)

In [15]:
# S.to_csv("alphascovmat_01190_extended_nnpdf40_without_nuclearuncs_ernfits_fixed.csv")

In [16]:
invcov = la.inv(C+S)

In [17]:
# Different from the prediction of the mean PDF (i.e. replica0)
mean_prediction = np.mean(th_replicas_fit[:],axis=1)

In [18]:
X = np.zeros_like(C.values)
for i in range(th_replicas_fit.shape[1]):
    X += np.outer((th_replicas_fit[:,i]-mean_prediction),(th_replicas_fit[:,i]-mean_prediction))
X *= 1/th_replicas_fit.shape[1]

Final result

In [19]:
pseudodata = API.read_pdf_pseudodata(**common_dict)

In [20]:
# BUG: dat_central should become average over data replicas
dat_central = np.mean([i.pseudodata.reindex(datth_central.index.to_list()).to_numpy().flatten() for i in pseudodata],axis=0)
# dat_central = datth_central["data_central"]

In [21]:
delta_T_tilde = S_hat@invcov@(dat_central-mean_prediction)
P_tilde = S_hat.T@invcov@X@invcov@S_hat + (S_tilde - S_hat.T@invcov@S_hat)
pred = ht_const + delta_T_tilde
unc  = np.sqrt(P_tilde)

In [22]:
try:
    stored_covmat = pd.read_csv(
        fitpath / 'tables/datacuts_theory_theorycovmatconfig_user_covmat.csv', 
        sep='\t',encoding='utf-8', index_col=2,header=3,skip_blank_lines=False,
    );
except FileNotFoundError:
    stored_covmat = pd.read_csv(
        fitpath / 'tables/datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv', 
        sep='\t',encoding='utf-8', index_col=2,header=3,skip_blank_lines=False,
    );
if np.allclose(S.to_numpy(), stored_covmat.to_numpy()[:,2:].astype('float64'), atol=1e-1):
    print(rf"Prediction for $\alpha_s$: {pred:.5f} ± {unc:.5f}")
else:
    print("Reconstructed theory covmat, S, is note the same as the stored covmat!")

Reconstructed theory covmat, S, is note the same as the stored covmat!
