# PREAMBLE
<script
  src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
  type="text/javascript">
</script>

In [None]:

import numpy as np
import pandas as pd
from validphys.api import API
from validphys.loader import FallbackLoader

%matplotlib inline

l = FallbackLoader()

# Definition of the input

In [None]:
fitname = "240502-rs-alphas-tcm"

mhou_fit = False


covmat_scaling_factor = 0.5

# COMPUTATION OF $\alpha_s$

In [None]:
fit = API.fit(fit=fitname)

prior_pdf = fit.as_input()["theorycovmatconfig"]["pdf"]
# prior_pdf = "240409-01-rs-symm_pos_pseudodata"

common_dict = dict(
    dataset_inputs={"from_": "fit"},
    fit=fit.name,
    fits=[fit.name],
    use_cuts="fromfit",
    metadata_group="nnpdf31_process",
)

theoryids = API.theoryids( **({
        "point_prescription": {"from_": "theorycovmatconfig"},
        "theoryids":{ "from_": "scale_variation_theories"},
        "theoryid": {"from_": "theory"},
        "theory": {"from_": "fit"},
        "theorycovmatconfig": {"from_": "fit"},
    } | common_dict))
theory_plus = theoryids[1].id
theory_mid = theoryids[0].id
theory_min = theoryids[2].id

# Inputs for central theory (used to construct the alphas covmat)
inps_central = dict(theoryid=theory_mid, pdf=prior_pdf, **common_dict)

# Inputs for plus theory (used to construct the alphas covmat)
inps_plus = dict(theoryid=theory_plus, pdf=prior_pdf, **common_dict)

# Inputs for minus theory prediction (used to construct the alphas covmat)
inps_minus = dict(theoryid=theory_min, pdf=prior_pdf, **common_dict)

# inputs for the computation of the prediction of the fit with cov=C+S, where S is computed using the
# inps_central, inps_plus, and inps_minus dictionaries
inps_central_fit = dict(theoryid=theory_mid, pdf={"from_": "fit"}, **common_dict)

In [None]:
prior_theorypreds_central = API.group_result_table_no_table(**inps_central).iloc[:, 2:].mean(axis=1)

In [None]:
prior_theorypreds_plus = API.group_result_table_no_table(**inps_plus).iloc[:, 2:].mean(axis=1)

In [None]:
prior_theorypreds_minus = API.group_result_table_no_table(**inps_minus).iloc[:, 2:].mean(axis=1)

In [None]:
gamma = prior_theorypreds_plus + prior_theorypreds_minus - 2 * prior_theorypreds_central

In [None]:
# Get the values of alphas...
alphas_plus = API.theory_info_table(theory_db_id=theory_plus).loc["alphas"].iloc[0]
alphas_central = API.theory_info_table(theory_db_id=theory_mid).loc["alphas"].iloc[0]
alphas_min = API.theory_info_table(theory_db_id=theory_min).loc["alphas"].iloc[0]

# ... and make sure the alphas shift in both directions is symmetric
delta_alphas_plus = alphas_plus - alphas_central
delta_alphas_min = alphas_central - alphas_min
if abs(delta_alphas_min - delta_alphas_plus) > 1e-6:
    raise ValueError("alphas shifts in both directions is not symmetric")
else:
    alphas_step_size = delta_alphas_min

In [None]:
alphas_central

In [None]:
beta_tilde = np.sqrt(covmat_scaling_factor) * (alphas_step_size)
S_tilde = beta_tilde * beta_tilde

In [None]:
# delta_plus = (np.sqrt(covmat_scaling_factor) / np.sqrt(2)) * (
#     prior_theorypreds_plus - prior_theorypreds_central
# )
# delta_minus = (np.sqrt(covmat_scaling_factor) / np.sqrt(2)) * (
#     prior_theorypreds_minus - prior_theorypreds_central
# )

# beta = [delta_plus, delta_minus]
# S_hat = beta_tilde @ beta

# S = np.outer(delta_plus, delta_plus) + np.outer(delta_minus, delta_minus)
# S = pd.DataFrame(S, index=delta_minus.index, columns=delta_minus.index)

In [None]:
beta = np.sqrt(covmat_scaling_factor) * (
    prior_theorypreds_plus - prior_theorypreds_minus
)

S_hat = beta_tilde * beta

S = np.outer(beta, beta)
S = pd.DataFrame(S, index=beta.index, columns=beta.index)

In [None]:
try:
    stored_covmat = pd.read_csv(
        fit.path / "tables/datacuts_theory_theorycovmatconfig_user_covmat.csv",
        sep="\t",
        encoding="utf-8",
        index_col=2,
        header=3,
        skip_blank_lines=False,
    )
except FileNotFoundError:
    stored_covmat = pd.read_csv(
        fit.path / "tables/datacuts_theory_theorycovmatconfig_theory_covmat_custom.csv",
        index_col=[0, 1, 2],
        header=[0, 1, 2],
        sep="\t|,",
        engine="python",
    ).fillna(0)
    storedcovmat_index = pd.MultiIndex.from_tuples(
        [(aa, bb, np.int64(cc)) for aa, bb, cc in stored_covmat.index],
        names=["group", "dataset", "id"],
    )  # make sure theoryID is an integer, same as in S
    stored_covmat = pd.DataFrame(
        stored_covmat.values, index=storedcovmat_index, columns=storedcovmat_index
    )
    stored_covmat = stored_covmat.reindex(S.index).T.reindex(S.index)

if not np.allclose(fit.as_input()["theorycovmatconfig"]["rescale_alphas_covmat"] * S, stored_covmat):
    print("Reconstructed theory covmat, S, is not the same as the stored covmat!")

In [None]:
theorypreds_fit = API.group_result_table_no_table(**inps_central_fit).iloc[:, 2:]

In [None]:
# Experimental covariance matrix
C = API.groups_covmat(
    use_t0=True,
    datacuts={"from_": "fit"},
    t0pdfset={"from_": "datacuts"},
    theoryid=theory_mid,
    **common_dict
)

In [None]:
# # MHOU covmat saved as user uncertainties
# try:
#     mhou_fit = fit.as_input()["theorycovmatconfig"]["use_user_uncertainties"]
#     if mhou_fit:
#         mhou_covmat = API.user_covmat(**(inps_central_fit|fit.as_input()['theorycovmatconfig']))
#         exp_covmat = C # we don't use exp_covmat, but may be useful to keep
#         C = C + mhou_covmat
# except:
#     pass

In [None]:
# Different from the prediction of the mean PDF (i.e. replica0)
mean_prediction = theorypreds_fit.mean(axis=1)

X = np.zeros_like(C.values)
for i in range(theorypreds_fit.shape[1]):
    X += np.outer(
        (theorypreds_fit.iloc[:, i] - mean_prediction),
        (theorypreds_fit.iloc[:, i] - mean_prediction),
    )
X *= 1 / theorypreds_fit.shape[1]

In [None]:
pseudodata = API.read_pdf_pseudodata(**common_dict)

In [None]:
dat_reps = pd.concat(
    [i.pseudodata.reindex(prior_theorypreds_central.index) for i in pseudodata], axis=1
)
# dat_central = API.group_result_central_table_no_table(**inps_central)["data_central"]

In [None]:
# Instead of loading, generate the pseudodata used in the fit (and maybe a bit more if so desired)

# regenerate_pseudodata=API.pseudodata_table(
#     nreplica=200,
#     fit=prior_pdf,
#     dataset_inputs={"from_": "fit"},
#     use_cuts="fromfit",
#     theoryid=theory_mid,
#     mcseed={"from_": "fit"},
#     genrep={"from_": "fit"},
#     separate_multiplicative=False,
# )
# regenerate_pseudodata.index = pd.MultiIndex.from_tuples([i[1:3] for i in regenerate_pseudodata.index], names=["dataset", "id"])
# regenerate_pseudodata = regenerate_pseudodata.reindex(pd.MultiIndex.from_tuples([i[1:3] for i in prior_theorypreds_central.index], names=["dataset", "id"]))

In [None]:
import gc; gc.collect()

In [None]:
# z = 1 / (gamma @ np.linalg.inv(C) @ (prior_theorypreds_central - dat_central) + 1)
z = fit.as_input()["theorycovmatconfig"]["rescale_alphas_covmat"]
z

In [None]:
# z_vals = []
# repindices = API.fitted_replica_indexes(pdf=fitname)
# Cinv = np.linalg.inv(C)
# datainfo = pd.concat(
#     [i.pseudodata.reindex(prior_theorypreds_central.index) for i in pseudodata], axis=1
# )
# datcentral = API.group_result_central_table_no_table(**inps_central)["data_central"]
# for _ in range(int(1e4)):
#     reps = np.random.randint(1,500,size=500)
#     mask1 = [f'replica {repindices[r-1]}' for r in reps]
#     mask2 = [f"rep_{r:05d}" for r in reps]
#     z_vals.append(1 / (gamma.loc[:,mask2].mean(axis=1) @ Cinv @ (prior_theorypreds_central.loc[:,mask2].mean(axis=1) - datainfo.loc[:,mask1].mean(axis=1)) + 1))

In [None]:
# in case we'd like to save the covmat to be used in a fit
# savethiscovmat=z*S
# savethiscovmat.to_csv("alphas_covmat-114_118_122-240401-01-rs-nnpdf40like-baseline.csv")

In [None]:
invcov = np.linalg.inv(C + z*S)
delta_T_tilde = - z * S_hat @ invcov @ (mean_prediction - dat_reps.mean(axis=1))
P_tilde = z**2 * S_hat @ invcov @ X @ invcov @ S_hat + z * S_tilde - z**2 * S_hat @ invcov @ S_hat
pred = alphas_central + delta_T_tilde
unc = np.sqrt(P_tilde)
print(rf"Prediction for $\alpha_s$: {pred:.4f} ± {unc:.4f}")

In [None]:
z**2 * S_hat @ invcov @ S_hat

In [None]:
z * S_tilde

In [None]:
z**2 * S_hat @ invcov @ X @ invcov @ S_hat

In [None]:
z**2 * S_hat @ invcov @ X @ invcov @ S_hat + z * S_tilde - z**2 * S_hat @ invcov @ S_hat

In [None]:
delta_T_vals = []
repindices = API.fitted_replica_indexes(pdf=fitname)

for _ in range(int(1e3)):
    reps = np.random.randint(1,500,size=500)
    mask1 = [f'replica {repindices[r-1]}' for r in reps]
    mask2 = [f"rep_{r:05d}" for r in reps]
    delta_T_vals.append(- S_hat @ invcov @ (theorypreds_fit.loc[:,mask2].mean(axis=1) - dat_reps.loc[:,mask1].mean(axis=1)))
delta_T_vals = np.array(delta_T_vals)

In [None]:
print(np.std(delta_T_vals*z))
print(np.mean(delta_T_vals*z))

In [None]:
from matplotlib import pyplot as plt
plt.hist(delta_T_vals*z)

In [None]:
delta_z = 0.072
delta_z * delta_T_tilde + z * np.std(delta_T_vals) + np.std(delta_T_vals) * delta_z