In [None]:
from validphys.api import API
import numpy as np
import pandas as pd

fitname = "231215-01-rs-ht-tcm-disonly"

In [None]:
ht_coeff = API.fit(fit=fitname).as_input()["theorycovmatconfig"]["ht_coeff"]

# dict used to produce theory predictions to construct the theory covmat as well as to produce 
# theory predictions from the fit performed using the ht covmat (i.e. the predicitons that should
# be compared to data)
common_dict = dict(
    dataset_inputs={"from_": "fit"},
    fit=fitname,
    fits=[fitname],
    use_cuts="fromfit",
    metadata_group="nnpdf31_process",
    theory={"from_": "fit"},
    theoryid={"from_": "theory"},
)

In [None]:
# Calculate theory predictions of the input PDF - this is used to construct the ht covmat
S_dict = dict(
    theorycovmatconfig={"from_": "fit"},
    pdf={"from_": "theorycovmatconfig"},
    use_t0=True,
    datacuts={"from_": "fit"},
    t0pdfset={"from_": "datacuts"},
)
preds_ht_cov_construction = API.group_result_table_no_table(**(S_dict|common_dict))

In [None]:
# Calculate theory predictions of the fit with ht covmat - this will be compared to data
preds = API.group_result_table_no_table(pdf={"from_": "fit"}, **common_dict)

In [None]:
aa=np.unique([i[1] for i in preds.index], return_counts=True)[0]
bb=np.unique([i[1] for i in preds.index], return_counts=True)[1]
for i,j in zip(aa,bb):
    print(i,j)


In [None]:
process_info = API.combine_by_type_ht(**(S_dict|common_dict))
kin_table = process_info.data
kin_df = pd.concat([pd.DataFrame(v) for v in kin_table.values()])
dsindex = API.groups_index(**common_dict)
kin_df = pd.DataFrame(kin_df.values, index=dsindex, columns=('kin1', 'kin2', 'kin3'))

In [None]:
common_dict = dict(
    dataset_inputs={"from_": "fit"},
    fit=fitname,
    fits=[fitname],
    use_cuts="fromfit",
    metadata_group="nnpdf31_process",
    theory={"from_": "fit"},
    theoryid={"from_": "theory"},
)

In [None]:
preds_onlyreplicas = preds.iloc[:, 2:].to_numpy()
mean_prediction = np.mean(preds_onlyreplicas,axis=1)

X = np.zeros((preds.shape[0],preds.shape[0]))
for i in range(preds_onlyreplicas.shape[1]):
    X += np.outer(
        (preds_onlyreplicas[:, i] - mean_prediction), (preds_onlyreplicas[:, i] - mean_prediction)
    )
X *= 1 / preds_onlyreplicas.shape[1]

In [None]:
pseudodata = API.read_pdf_pseudodata(**common_dict)
dat_central = np.mean(
    [i.pseudodata.reindex(preds.index.to_list()).to_numpy().flatten() for i in pseudodata],
    axis=0,
)

In [None]:
sum(preds_ht_cov_construction.index != dsindex )

In [None]:
# NOTE: preds_ht_cov_construction is ordered the same way as C and stored_covmat. Thus no 
# reordering of the index should be necessery since S constructed from there predictions should
# alreay be in the same order as stored_covmat. Yet this is not the case... 

In [None]:
delta_pred = ht_coeff * (preds_ht_cov_construction["theory_central"] / kin_df['kin2'] ** 2 / ( 1 - kin_df['kin1'] )).to_numpy()

_delta_reind = np.zeros(delta_pred.size, dtype=np.float32)
pred_index = [0 for i in range(delta_pred.size)]
covmap = API.covmap(use_ht_uncertainties=True, **(common_dict|S_dict))
for i in range(delta_pred.size):
    _delta_reind[covmap[i]] = delta_pred[i]
    pred_index[covmap[i]] = dsindex[i]
pred_index = pd.MultiIndex.from_tuples(pred_index)
delta_pred = _delta_reind


In [None]:
# Theory covariance matrix
S = np.outer(delta_pred,delta_pred)
S = pd.DataFrame(S,index=dsindex,columns=dsindex)

# Experimental covariance matrix
C = API.groups_covmat_no_table(**common_dict)

Compute delta_T_tilde (Eq. 3.37) and P_tilde (Eq. 3.38) of arXiv:2105.05114

In [None]:
# NOTE: check the normalizations here

# The factors 0.5 are to normalize for the fact that beta provides information 
# about theoretical uncertainties along two derections
beta_tilde = ht_coeff * np.array([1, -1])
S_tilde = 0.5*beta_tilde @ beta_tilde
beta = np.array([delta_pred, -delta_pred])
S_hat = 0.5*beta_tilde @ beta

invcov = np.linalg.inv(C+S)

ht_coeff_central = 0.0
delta_T_tilde = S_hat @ invcov @ (dat_central - mean_prediction)
P_tilde = S_hat.T @ invcov @ X @ invcov @ S_hat + (S_tilde - S_hat.T @ invcov @ S_hat)
pred = ht_coeff_central + delta_T_tilde
unc = np.sqrt(P_tilde)

In [None]:
fitpath = API.fit(fit=fitname).path
try:
    stored_covmat = pd.read_csv(
        fitpath / "tables/datacuts_theory_theorycovmatconfig_user_covmat.csv",
        sep="\t",
        encoding="utf-8",
        index_col=2,
        header=3,
        skip_blank_lines=False,
    )
except FileNotFoundError:
    stored_covmat = pd.read_csv(
            fitpath / "tables/datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv",
            index_col=[0, 1, 2],
            header=[0, 1, 2], 
            sep="\t|,",
            engine="python",
        ).fillna(0)
    storedcovmat_index = pd.MultiIndex.from_tuples(
        [(aa, bb, np.int64(cc)) for aa, bb, cc in stored_covmat.index],
        names=['group', 'dataset', 'id']
    )
    stored_covmat = pd.DataFrame(stored_covmat.values, index=storedcovmat_index, columns=storedcovmat_index)
    stored_covmat = stored_covmat.reindex(C.index).T.reindex(C.index)
if np.allclose(S, stored_covmat):
    print(rf"Prediction for ht_coeff: {pred:.5f} Â± {unc:.5f}")
else:
    print("Reconstructed theory covmat, S, is not the same as the stored covmat!")