In [1]:
from validphys.api import API
import numpy as np
import pandas as pd

fitname = "htcovmatfit"

In [2]:
ht_coeff = API.fit(fit=fitname).as_input()["theorycovmatconfig"]["ht_coeff"]

# dict used to produce theory predictions to construct the theory covmat as well as to produce 
# theory predictions from the fit performed using the ht covmat (i.e. the predicitons that should
# be compared to data)
common_dict = dict(
    dataset_inputs={"from_": "fit"},
    fit=fitname,
    fits=[fitname],
    use_cuts="fromfit",
    metadata_group="nnpdf31_process",
    theory={"from_": "fit"},
    theoryid={"from_": "theory"},
)

In [3]:
# Calculate theory predictions of the input PDF - this is used to construct the ht covmat
S_dict = dict(
    theorycovmatconfig={"from_": "fit"},
    pdf={"from_": "theorycovmatconfig"},
    use_t0=True,
    datacuts={"from_": "fit"},
    t0pdfset={"from_": "datacuts"},
)
preds_ht_cov_construction = API.group_result_table_no_table(**(S_dict|common_dict))

LHAPDF 6.5.0 loading /home/roy/miniconda3/envs/nnpdf/share/LHAPDF/210718-n3fit-data-003/210718-n3fit-data-003_0000.dat
210718-n3fit-data-003 PDF set, member #0, version 1
LHAPDF 6.5.0 loading all 101 PDFs in set 210718-n3fit-data-003
210718-n3fit-data-003, version 1; 101 PDF members


In [4]:
# Calculate theory predictions of the fit with ht covmat - this will be compared to data
# preds = API.group_result_table_no_table(pdf={"from_": "fit"}, **common_dict)

In [5]:
process_info = API.combine_by_type_ht(**(S_dict|common_dict))
kin_table = process_info.data
kin_df = pd.concat([pd.DataFrame(v) for v in kin_table.values()])
dsindex = API.groups_index(**common_dict)
kin_df = pd.DataFrame(kin_df.values, index=dsindex, columns=('kin1', 'kin2', 'kin3'))

In [35]:
dsindex

MultiIndex([('DIS NC',   'HERACOMBNCEP920', 108),
            ('DIS NC',   'HERACOMBNCEP920', 109),
            ('DIS NC',   'HERACOMBNCEP920', 110),
            ('DIS NC',   'HERACOMBNCEP920', 111),
            ('DIS NC',   'HERACOMBNCEP920', 112),
            ('DIS NC',   'HERACOMBNCEP920', 113),
            ('DIS NC',   'HERACOMBNCEP920', 114),
            ('DIS NC',   'HERACOMBNCEP920', 115),
            ('DIS NC',   'HERACOMBNCEP920', 116),
            ('DIS NC',   'HERACOMBNCEP920', 117),
            ...
            ('DIS CC', 'CHORUSNUPb_dw_ite', 596),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 597),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 598),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 599),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 600),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 601),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 602),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 604),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 605),
            ('DIS CC', 'CHORUSNUPb

In [34]:
common_dict = dict(
    dataset_inputs={"from_": "fit"},
    fit=fitname,
    fits=[fitname],
    use_cuts="fromfit",
    metadata_group="nnpdf31_process",
    theory={"from_": "fit"},
    theoryid={"from_": "theory"},
)
API.procs_index(**common_dict)

MultiIndex([('DIS NC',   'HERACOMBNCEP920', 108),
            ('DIS NC',   'HERACOMBNCEP920', 109),
            ('DIS NC',   'HERACOMBNCEP920', 110),
            ('DIS NC',   'HERACOMBNCEP920', 111),
            ('DIS NC',   'HERACOMBNCEP920', 112),
            ('DIS NC',   'HERACOMBNCEP920', 113),
            ('DIS NC',   'HERACOMBNCEP920', 114),
            ('DIS NC',   'HERACOMBNCEP920', 115),
            ('DIS NC',   'HERACOMBNCEP920', 116),
            ('DIS NC',   'HERACOMBNCEP920', 117),
            ...
            ('DIS CC', 'CHORUSNUPb_dw_ite', 596),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 597),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 598),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 599),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 600),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 601),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 602),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 604),
            ('DIS CC', 'CHORUSNUPb_dw_ite', 605),
            ('DIS CC', 'CHORUSNUPb

In [6]:
process_info.namelist

defaultdict(list,
            {'DIS NC': ['HERACOMBNCEP920', 'NMCPD_dw_ite'],
             'DIS CC': ['NTVNBDMNFe_dw_ite', 'CHORUSNUPb_dw_ite']})

In [7]:
process_info.sizes

defaultdict(list,
            {'HERACOMBNCEP920': 377,
             'NMCPD_dw_ite': 121,
             'NTVNBDMNFe_dw_ite': 37,
             'CHORUSNUPb_dw_ite': 416})

Compute delta_T_tilde (Eq. 3.37) and P_tilde (Eq. 3.38) of arXiv:2105.05114

In [8]:
# preds_onlyreplicas = preds.iloc[:, 2:].to_numpy()
# mean_prediction = np.mean(preds_onlyreplicas,axis=1)

# X = np.zeros((preds.shape[0],preds.shape[0]))
# for i in range(preds_onlyreplicas.shape[1]):
#     X += np.outer(
#         (preds_onlyreplicas[:, i] - mean_prediction), (preds_onlyreplicas[:, i] - mean_prediction)
#     )
# X *= 1 / preds_onlyreplicas.shape[1]

In [9]:
# pseudodata = API.read_pdf_pseudodata(**common_dict)
# dat_central = np.mean(
#     [i.pseudodata.reindex(preds.index.to_list()).to_numpy().flatten() for i in pseudodata],
#     axis=0,
# )

In [10]:
sum(preds_ht_cov_construction["theory_central"].index != dsindex)

0

In [11]:
# Theory covariance matrix
delta_pred = ht_coeff * (preds_ht_cov_construction["theory_central"] / kin_df['kin2'] ** 2 / ( 1 - kin_df['kin1'] )).to_numpy()
S = np.outer(delta_pred,delta_pred)
S = pd.DataFrame(S,index=dsindex,columns=dsindex)

# Experimental covariance matrix
C = API.groups_covmat_no_table(**common_dict)

In [12]:
# beta_tilde = np.sqrt(ht_coeff / 2) * np.array([1, -1])
# S_tilde = beta_tilde @ beta_tilde
# beta = [delta_pred, -delta_pred]
# S_hat = beta_tilde @ beta

# invcov = np.linalg.inv(C+S)

# ht_coeff_central = 0.0
# delta_T_tilde = S_hat @ invcov @ (dat_central - mean_prediction)
# P_tilde = S_hat.T @ invcov @ X @ invcov @ S_hat + (S_tilde - S_hat.T @ invcov @ S_hat)
# pred = ht_coeff_central + delta_T_tilde
# unc = np.sqrt(P_tilde)

In [166]:
fitpath = API.fit(fit=fitname).path
try:
    stored_covmat = pd.read_csv(
        fitpath / "tables/datacuts_theory_theorycovmatconfig_user_covmat.csv",
        sep="\t",
        encoding="utf-8",
        index_col=2,
        header=3,
        skip_blank_lines=False,
    )
except FileNotFoundError:
    stored_covmat = pd.read_csv(
            fitpath / "tables/datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv",
            index_col=[0, 1, 2],
            header=[0, 1, 2], 
            sep="\t|,",
            engine="python",
            # converters={'id': np.int64}
        ).fillna(0)
    storedcovmat_index = pd.MultiIndex.from_tuples(
        [(aa, bb, np.int64(cc)) for aa, bb, cc in stored_covmat.index],
        names=['group', 'dataset', 'id']
    )
    stored_covmat = pd.DataFrame(stored_covmat.values, index=storedcovmat_index, columns=storedcovmat_index)
    stored_covmat = stored_covmat.reindex(C.index).T.reindex(C.index)
if np.allclose(S, stored_covmat):
    print(rf"Prediction for $\alpha_s$: {pred:.5f} ± {unc:.5f}")
else:
    print("Reconstructed theory covmat, S, is not the same as the stored covmat!")

In [172]:
np.allclose(S, stored_covmat)

True

In [168]:
S

Unnamed: 0_level_0,Unnamed: 1_level_0,group,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,...,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC
Unnamed: 0_level_1,Unnamed: 1_level_1,dataset,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,...,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite
Unnamed: 0_level_2,Unnamed: 1_level_2,id,108,109,110,111,112,113,114,115,116,117,...,596,597,598,599,600,601,602,604,605,606
group,dataset,id,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3
DIS NC,HERACOMBNCEP920,108,0.032029,0.032168,0.032261,0.032289,0.032270,0.031387,0.029545,0.027721,0.025708,0.023829,...,6.087842e-04,2.518271e-04,1.341826e-04,1.243875e-03,2.648796e-04,1.073352e-04,5.635695e-05,1.057020e-04,4.185017e-05,2.163131e-05
DIS NC,HERACOMBNCEP920,109,0.032168,0.032307,0.032401,0.032429,0.032410,0.031523,0.029674,0.027841,0.025819,0.023933,...,6.114255e-04,2.529197e-04,1.347648e-04,1.249271e-03,2.660288e-04,1.078009e-04,5.660146e-05,1.061606e-04,4.203174e-05,2.172516e-05
DIS NC,HERACOMBNCEP920,110,0.032261,0.032401,0.032496,0.032523,0.032504,0.031615,0.029760,0.027922,0.025895,0.024003,...,6.132081e-04,2.536571e-04,1.351577e-04,1.252913e-03,2.668044e-04,1.081152e-04,5.676648e-05,1.064701e-04,4.215428e-05,2.178850e-05
DIS NC,HERACOMBNCEP920,111,0.032289,0.032429,0.032523,0.032551,0.032532,0.031642,0.029785,0.027946,0.025916,0.024023,...,6.137240e-04,2.538704e-04,1.352714e-04,1.253968e-03,2.670288e-04,1.082061e-04,5.681423e-05,1.065596e-04,4.218975e-05,2.180683e-05
DIS NC,HERACOMBNCEP920,112,0.032270,0.032410,0.032504,0.032532,0.032513,0.031623,0.029768,0.027929,0.025901,0.024009,...,6.133657e-04,2.537223e-04,1.351925e-04,1.253236e-03,2.668730e-04,1.081430e-04,5.678107e-05,1.064974e-04,4.216512e-05,2.179410e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DIS CC,CHORUSNUPb_dw_ite,601,0.000107,0.000108,0.000108,0.000108,0.000108,0.000105,0.000099,0.000093,0.000086,0.000080,...,2.040168e-06,8.439273e-07,4.496752e-07,4.168494e-06,8.876690e-07,3.597036e-07,1.888644e-07,3.542303e-07,1.402490e-07,7.249123e-08
DIS CC,CHORUSNUPb_dw_ite,602,0.000056,0.000057,0.000057,0.000057,0.000057,0.000055,0.000052,0.000049,0.000045,0.000042,...,1.071201e-06,4.431087e-07,2.361044e-07,2.188691e-06,4.660755e-07,1.888644e-07,9.916428e-08,1.859905e-07,7.363851e-08,3.806192e-08
DIS CC,CHORUSNUPb_dw_ite,604,0.000106,0.000106,0.000106,0.000107,0.000106,0.000104,0.000098,0.000091,0.000085,0.000079,...,2.009124e-06,8.310858e-07,4.428328e-07,4.105065e-06,8.741619e-07,3.542303e-07,1.859905e-07,3.488402e-07,1.381149e-07,7.138818e-08
DIS CC,CHORUSNUPb_dw_ite,605,0.000042,0.000042,0.000042,0.000042,0.000042,0.000041,0.000039,0.000036,0.000034,0.000031,...,7.954647e-07,3.290486e-07,1.753290e-07,1.625302e-06,3.461035e-07,1.402490e-07,7.363851e-08,1.381149e-07,5.468330e-08,2.826444e-08


In [169]:
stored_covmat

Unnamed: 0_level_0,Unnamed: 1_level_0,group,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,DIS NC,...,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC,DIS CC
Unnamed: 0_level_1,Unnamed: 1_level_1,dataset,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,HERACOMBNCEP920,...,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite,CHORUSNUPb_dw_ite
Unnamed: 0_level_2,Unnamed: 1_level_2,id,108,109,110,111,112,113,114,115,116,117,...,596,597,598,599,600,601,602,604,605,606
group,dataset,id,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3
DIS NC,HERACOMBNCEP920,108,0.032029,0.032168,0.032261,0.032289,0.032270,0.031387,0.029545,0.027721,0.025708,0.023829,...,6.087842e-04,2.518271e-04,1.341826e-04,1.243875e-03,2.648796e-04,1.073352e-04,5.635695e-05,1.057020e-04,4.185017e-05,2.163131e-05
DIS NC,HERACOMBNCEP920,109,0.032168,0.032307,0.032401,0.032429,0.032410,0.031523,0.029674,0.027841,0.025819,0.023933,...,6.114254e-04,2.529197e-04,1.347648e-04,1.249271e-03,2.660288e-04,1.078009e-04,5.660146e-05,1.061606e-04,4.203174e-05,2.172516e-05
DIS NC,HERACOMBNCEP920,110,0.032261,0.032401,0.032496,0.032523,0.032504,0.031615,0.029760,0.027922,0.025895,0.024003,...,6.132081e-04,2.536570e-04,1.351577e-04,1.252913e-03,2.668044e-04,1.081152e-04,5.676648e-05,1.064701e-04,4.215428e-05,2.178850e-05
DIS NC,HERACOMBNCEP920,111,0.032289,0.032429,0.032523,0.032551,0.032532,0.031642,0.029785,0.027946,0.025916,0.024023,...,6.137239e-04,2.538704e-04,1.352714e-04,1.253968e-03,2.670288e-04,1.082061e-04,5.681423e-05,1.065596e-04,4.218975e-05,2.180683e-05
DIS NC,HERACOMBNCEP920,112,0.032270,0.032410,0.032504,0.032532,0.032513,0.031623,0.029768,0.027929,0.025901,0.024009,...,6.133657e-04,2.537222e-04,1.351924e-04,1.253236e-03,2.668730e-04,1.081430e-04,5.678107e-05,1.064974e-04,4.216512e-05,2.179410e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DIS CC,CHORUSNUPb_dw_ite,601,0.000107,0.000108,0.000108,0.000108,0.000108,0.000105,0.000099,0.000093,0.000086,0.000080,...,2.040168e-06,8.439272e-07,4.496752e-07,4.168494e-06,8.876690e-07,3.597036e-07,1.888644e-07,3.542303e-07,1.402490e-07,7.249123e-08
DIS CC,CHORUSNUPb_dw_ite,602,0.000056,0.000057,0.000057,0.000057,0.000057,0.000055,0.000052,0.000049,0.000045,0.000042,...,1.071201e-06,4.431087e-07,2.361044e-07,2.188691e-06,4.660755e-07,1.888644e-07,9.916428e-08,1.859905e-07,7.363851e-08,3.806192e-08
DIS CC,CHORUSNUPb_dw_ite,604,0.000106,0.000106,0.000106,0.000107,0.000106,0.000104,0.000098,0.000091,0.000085,0.000079,...,2.009124e-06,8.310857e-07,4.428327e-07,4.105064e-06,8.741619e-07,3.542303e-07,1.859905e-07,3.488402e-07,1.381149e-07,7.138818e-08
DIS CC,CHORUSNUPb_dw_ite,605,0.000042,0.000042,0.000042,0.000042,0.000042,0.000041,0.000039,0.000036,0.000034,0.000031,...,7.954647e-07,3.290486e-07,1.753290e-07,1.625302e-06,3.461035e-07,1.402490e-07,7.363851e-08,1.381149e-07,5.468331e-08,2.826444e-08
