In [1]:
from validphys.api import API
import numpy as np
import pandas as pd

fitname = "htcovmatfit"

In [2]:
ht_coeff = API.fit(fit=fitname).as_input()["theorycovmatconfig"]["ht_coeff"]

# dict used to produce theory predictions to construct the theory covmat as well as to produce 
# theory predictions from the fit performed using the ht covmat (i.e. the predicitons that should
# be compared to data)
common_dict = dict(
    dataset_inputs={"from_": "fit"},
    fit=fitname,
    fits=[fitname],
    use_cuts="fromfit",
    metadata_group="nnpdf31_process",
    theory={"from_": "fit"},
    theoryid={"from_": "theory"},
)

In [3]:
# Calculate theory predictions of the input PDF - this is used to construct the ht covmat
S_dict = dict(
    theorycovmatconfig={"from_": "fit"},
    pdf={"from_": "theorycovmatconfig"},
    use_t0=True,
    datacuts={"from_": "fit"},
    t0pdfset={"from_": "datacuts"},
)
preds_ht_cov_construction = API.group_result_table_no_table(**(S_dict|common_dict))

LHAPDF 6.5.0 loading /home/roy/miniconda3/envs/nnpdf/share/LHAPDF/210718-n3fit-data-003/210718-n3fit-data-003_0000.dat
210718-n3fit-data-003 PDF set, member #0, version 1
LHAPDF 6.5.0 loading all 101 PDFs in set 210718-n3fit-data-003
210718-n3fit-data-003, version 1; 101 PDF members


In [4]:
# Calculate theory predictions of the fit with ht covmat - this will be compared to data
preds = API.group_result_table_no_table(pdf={"from_": "fit"}, **common_dict)

In [5]:
process_info = API.combine_by_type_ht(**(S_dict|common_dict))
kin_table = process_info.data
kin_df = pd.concat([pd.DataFrame(v) for v in kin_table.values()])
dsindex = API.groups_index(**common_dict)
kin_df = pd.DataFrame(kin_df.values, index=dsindex, columns=('kin1', 'kin2', 'kin3'))

In [34]:
common_dict = dict(
    dataset_inputs={"from_": "fit"},
    fit=fitname,
    fits=[fitname],
    use_cuts="fromfit",
    metadata_group="nnpdf31_process",
    theory={"from_": "fit"},
    theoryid={"from_": "theory"},
)

MultiIndex([('DIS NC',   'HERACOMBNCEP920', 108),
            ('DIS NC',   'HERACOMBNCEP920', 109),
            ('DIS NC',   'HERACOMBNCEP920', 110),
            ('DIS NC',   'HERACOMBNCEP920', 111),
            ('DIS NC',   'HERACOMBNCEP920', 112),
            ('DIS NC',   'HERACOMBNCEP920', 113),
            ('DIS NC',   'HERACOMBNCEP920', 114),
            ('DIS NC',   'HERACOMBNCEP920', 115),
            ('DIS NC',   'HERACOMBNCEP920', 116),
            ('DIS NC',   'HERACOMBNCEP920', 117),
            ...
            ('DIS CC', 'CHORUSNUPb_dw_ite', 596),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 597),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 598),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 599),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 600),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 601),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 602),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 604),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 605),
            ('DIS CC', 'CHORUSNUPb

Compute delta_T_tilde (Eq. 3.37) and P_tilde (Eq. 3.38) of arXiv:2105.05114

In [8]:
preds_onlyreplicas = preds.iloc[:, 2:].to_numpy()
mean_prediction = np.mean(preds_onlyreplicas,axis=1)

X = np.zeros((preds.shape[0],preds.shape[0]))
for i in range(preds_onlyreplicas.shape[1]):
    X += np.outer(
        (preds_onlyreplicas[:, i] - mean_prediction), (preds_onlyreplicas[:, i] - mean_prediction)
    )
X *= 1 / preds_onlyreplicas.shape[1]

In [9]:
pseudodata = API.read_pdf_pseudodata(**common_dict)
dat_central = np.mean(
    [i.pseudodata.reindex(preds.index.to_list()).to_numpy().flatten() for i in pseudodata],
    axis=0,
)

In [11]:
# Theory covariance matrix
delta_pred = ht_coeff * (preds_ht_cov_construction["theory_central"] / kin_df['kin2'] ** 2 / ( 1 - kin_df['kin1'] )).to_numpy()
S = np.outer(delta_pred,delta_pred)
S = pd.DataFrame(S,index=dsindex,columns=dsindex)

# Experimental covariance matrix
C = API.groups_covmat_no_table(**common_dict)

In [12]:
beta_tilde = np.sqrt(ht_coeff / 2) * np.array([1, -1])
S_tilde = beta_tilde @ beta_tilde
beta = [delta_pred, -delta_pred]
S_hat = beta_tilde @ beta

invcov = np.linalg.inv(C+S)

ht_coeff_central = 0.0
delta_T_tilde = S_hat @ invcov @ (dat_central - mean_prediction)
P_tilde = S_hat.T @ invcov @ X @ invcov @ S_hat + (S_tilde - S_hat.T @ invcov @ S_hat)
pred = ht_coeff_central + delta_T_tilde
unc = np.sqrt(P_tilde)

In [166]:
fitpath = API.fit(fit=fitname).path
try:
    stored_covmat = pd.read_csv(
        fitpath / "tables/datacuts_theory_theorycovmatconfig_user_covmat.csv",
        sep="\t",
        encoding="utf-8",
        index_col=2,
        header=3,
        skip_blank_lines=False,
    )
except FileNotFoundError:
    stored_covmat = pd.read_csv(
            fitpath / "tables/datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv",
            index_col=[0, 1, 2],
            header=[0, 1, 2], 
            sep="\t|,",
            engine="python",
        ).fillna(0)
    storedcovmat_index = pd.MultiIndex.from_tuples(
        [(aa, bb, np.int64(cc)) for aa, bb, cc in stored_covmat.index],
        names=['group', 'dataset', 'id']
    )
    stored_covmat = pd.DataFrame(stored_covmat.values, index=storedcovmat_index, columns=storedcovmat_index)
    stored_covmat = stored_covmat.reindex(C.index).T.reindex(C.index)
if np.allclose(S, stored_covmat):
    print(rf"Prediction for $\alpha_s$: {pred:.5f} ± {unc:.5f}")
else:
    print("Reconstructed theory covmat, S, is not the same as the stored covmat!")