In [1]:
import numpy as np
import pandas as pd
from scipy import interpolate as scint
from validphys.api import API
from validphys.loader import FallbackLoader
from validphys.theorycovariance.construction import compute_normalisation_by_experiment

%matplotlib inline

l = FallbackLoader()

In [2]:
fitname = "240921_02_ht_preds_abmp"
fitname_ref = "240417-01-bl-abmp_cuts"

In [3]:
fit = API.fit(fit=fitname)
fit_ref = API.fit(fit=fitname_ref)

settings_dict = dict(
    dataset_inputs={"from_": "fit"},
    fit=fit.name,
    use_cuts="fromfit",
    metadata_group="nnpdf31_process",
    theory={"from_": "fit"},
    theoryid={"from_": "theory"},
    pdf={"from_": "fit"},
    datacuts={"from_": "fit"},
    t0pdfset={"from_": "datacuts"},
)

settings_dict_ref = dict(
    dataset_inputs={"from_": "fit"},
    fit=fit_ref.name,
    use_cuts="fromfit",
    metadata_group="nnpdf31_process",
    theory={"from_": "fit"},
    theoryid={"from_": "theory"},
    pdf={"from_": "fit"},
    datacuts={"from_": "fit"},
    t0pdfset={"from_": "datacuts"},
)

In [4]:
theorypreds_all = API.group_result_table_no_table(**settings_dict)
kinematics_all = API.group_kin_table_no_table(**settings_dict)

theorypreds_all_ref = API.group_result_table_no_table(**settings_dict_ref)

# Sanity check
try:
  pd.testing.assert_index_equal(kinematics_all.index, theorypreds_all.index)
except AssertionError as e:
  print("Different index")
  print(e)

LHAPDF 6.5.4 loading all 501 PDFs in set 240921_02_ht_preds_abmp
240921_02_ht_preds_abmp, version 1; 501 PDF members
LHAPDF 6.5.4 loading all 501 PDFs in set 240417-01-bl-abmp_cuts
240417-01-bl-abmp_cuts, version 1; 501 PDF members


In [5]:
theorypred_reps = theorypreds_all.iloc[:, 2:]
theorypdres_mean = theorypred_reps.mean(axis=1)
theorypred_rep0 = theorypreds_all.iloc[:, 1]

theorypred_reps_ref = theorypreds_all_ref.iloc[:, 2:]
theorypdres_mean_ref = theorypred_reps_ref.mean(axis=1)
theorypred_rep0_ref = theorypreds_all_ref.iloc[:, 1]

In [6]:
pseudodata = API.read_pdf_pseudodata(**settings_dict)
data_reps = pd.concat(
    [i.pseudodata.reindex(theorypreds_all.index) for i in pseudodata], axis=1
)
data_reps_mean = data_reps.mean(axis=1)
data_exp = theorypreds_all["data_central"]


data_exp_ref = theorypreds_all_ref["data_central"]

In [7]:
Ct0 = API.groups_covmat(
    use_t0=True,
    **settings_dict
)
Cexp = API.groups_covmat(
    use_t0=False,
    **settings_dict
)

Ct0_ref = API.groups_covmat(
    use_t0=True,
    **settings_dict_ref
)
Cexp_ref = API.groups_covmat(
    use_t0=False,
    **settings_dict_ref
)

LHAPDF 6.5.4 loading /opt/homebrew/Caskroom/miniconda/base/envs/nnpdf/share/LHAPDF/210715-n3fit-1000-001/210715-n3fit-1000-001_0000.dat
210715-n3fit-1000-001 PDF set, member #0, version 1


In [8]:
Cinvt0 = np.linalg.inv(Ct0)
Cinvexp = np.linalg.inv(Cexp)

Cinvt0_ref = np.linalg.inv(Ct0_ref)
Cinvexp_ref = np.linalg.inv(Cexp_ref)

In [9]:
ht_theory = pd.Series(data=np.zeros_like(theorypred_rep0.to_numpy()), index=theorypred_rep0.index )
ht_theory = pd.DataFrame(ht_theory, columns=['ht'])

In [10]:
# Concatenate kinematics to HT dataframe
try:
  pd.testing.assert_index_equal(kinematics_all.index, ht_theory.index)
  ht_theory = pd.concat([ht_theory, kinematics_all],axis=1)
except AssertionError as e:
  print("Different index")
  print(e)

In [11]:
# Define ABMP HT
x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1]
y_h2 = [0.023, -0.032, -0.005, 0.025, 0.051, 0.003, 0.0]
y_ht = [-0.319, -0.134, -0.052, 0.071, 0.030, 0.003, 0.0]

H_2 = scint.CubicSpline(x_knots, y_h2)
H_T = scint.CubicSpline(x_knots, y_ht)

# Reconstruct HL from HT and H2
def H_L(x):
    return (H_2(x) - np.power(x, 0.05) * H_T(x))

H_2 = np.vectorize(H_2)
H_L = np.vectorize(H_L)

## Compute the HT

In [12]:
# Deactivate performance warning when using df.loc
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

included_proc = ['DIS NC']
excluded_exp = {"DIS NC" : []}
for process_name, process_group in ht_theory.groupby(level='group'):
  for exp_name, exp_group in process_group.groupby(level='dataset'):
    if process_name in included_proc and exp_name not in excluded_exp[process_name]:
      x = exp_group.kin_1.to_numpy()
      q2 = exp_group.kin_2.to_numpy()
      y = exp_group.kin_3.to_numpy()
      N2, NL = compute_normalisation_by_experiment(exp_name, x, y, q2)
      PC_2 = N2 * H_2(x) / q2
      PC_L = NL * H_L(x) / q2
      ht_theory.loc[(process_name, exp_name), 'ht'] = PC_2 + PC_L

## Collect process and experiment information

In [13]:
# Collecting name of nnpdf31_processes and experiments
process_list = []
exp_list = []
process_name = ''
exp_name = ''
for proc in theorypred_rep0.index.to_numpy():
  if proc[0] != process_name:
    process_name = proc[0]
    process_list.append(process_name)

  if proc[1] != exp_name:
    exp_name = proc[1]
    exp_list.append(exp_name)

# Compute global $\chi^2$

In [14]:
# With HT
ndat = theorypred_rep0.size
chi2t0 = (theorypred_rep0 +  ht_theory['ht']- data_exp) @ Cinvt0 @ (theorypred_rep0 +  ht_theory['ht'] - data_exp) / ndat
chi2t0_average = np.mean([(theorypred_reps[r] +  ht_theory['ht'] - data_exp.to_numpy()) @ Cinvt0 @ (theorypred_reps[r] +  ht_theory['ht'] - data_exp.to_numpy()) for r in theorypred_reps]) / ndat
chi2t0_meanT = (theorypred_reps.mean(axis=1) +  ht_theory['ht'] - data_exp) @ Cinvt0 @ (theorypred_reps.mean(axis=1) +  ht_theory['ht'] - data_exp) / ndat
chi2exp = (theorypred_rep0 +  ht_theory['ht'] - data_exp) @ Cinvexp @ (theorypred_rep0 +  ht_theory['ht'] - data_exp) / ndat

# Without HT
chi2t0_no_ht = (theorypred_rep0 - data_exp) @ Cinvt0 @ (theorypred_rep0 - data_exp) / ndat
chi2t0_average_no_ht = np.mean([(theorypred_reps[r]  - data_exp.to_numpy()) @ \
                                Cinvt0 @ \
                                (theorypred_reps[r]  - data_exp.to_numpy()) for r in theorypred_reps]) / ndat
chi2t0_meanT_no_ht = (theorypred_reps.mean(axis=1)  - data_exp) @ Cinvt0 @ (theorypred_reps.mean(axis=1) - data_exp) / ndat
chi2exp_no_ht = (theorypred_rep0  - data_exp) @ Cinvexp @ (theorypred_rep0  - data_exp) / ndat

# Reference fit
ndat_ref = theorypred_rep0_ref.size
chi2t0_ref = (theorypred_rep0_ref - data_exp_ref) @ Cinvt0_ref @ (theorypred_rep0_ref  - data_exp_ref) / ndat_ref
chi2t0_average_ref = np.mean([(theorypred_reps_ref[r] - data_exp_ref.to_numpy()) @ Cinvt0_ref @ (theorypred_reps_ref[r] - data_exp_ref.to_numpy()) for r in theorypred_reps_ref]) / ndat_ref
chi2t0_meanT_ref = (theorypred_reps_ref.mean(axis=1) - data_exp_ref) @ Cinvt0_ref @ (theorypred_reps_ref.mean(axis=1) - data_exp_ref) / ndat_ref
chi2exp_ref = (theorypred_rep0_ref - data_exp_ref) @ Cinvexp_ref @ (theorypred_rep0_ref - data_exp_ref) / ndat_ref


if False:
  print(f"{"name":>15}{"chi2 w/o HT":>20}{"chi2 w/ HT":>15}")
  print(f"{"chi2t0":>15}{chi2t0:>15.4f}{chi2t0_no_ht:>15.4f}")
  print(f"{"chi2t0_average":>15}{chi2t0_average:>15.4f}{chi2t0_average_no_ht:>15.4f}")
  print(f"{"chi2t0_meanT":>15}{chi2t0_meanT:>15.4f}{chi2t0_meanT_no_ht:>15.4f}")
  print(f"{"chi2exp":>15}{chi2exp:>15.4f}{chi2exp_no_ht:>15.4f}")

if True:
    print(f"{"name":>15}{"Baseline no HT":>20}{"With HT":>10}")
    print(f"{"chi2t0":>15}{chi2t0_ref:>15.4f}{chi2t0:>15.4f}")
    print(f"{"chi2t0_average":>15}{chi2t0_average_ref:>15.4f}{chi2t0_average:>15.4f}")
    print(f"{"chi2t0_meanT":>15}{chi2t0_meanT_ref:>15.4f}{chi2t0_meanT:>15.4f}")
    print(f"{"chi2exp":>15}{chi2exp_ref:>15.4f}{chi2exp:>15.4f}")

           name      Baseline no HT   With HT
         chi2t0         1.2860         1.2696
 chi2t0_average         1.3090         1.2924
   chi2t0_meanT         1.2859         1.2694
        chi2exp         1.2182         1.2007


## Compute $\chi^2$ per process

In [15]:
chi2_dict = {}
collector = 0
for name in process_list:
  theorypred_rep0_PROC = theorypred_rep0.loc[[name]].to_numpy()
  data_exp_PROC = data_exp.loc[[name]].to_numpy()

  # For reference fit
  theorypred_rep0_PROC_ref = theorypred_rep0_ref.loc[[name]].to_numpy()
  data_exp_PROC_ref = data_exp_ref.loc[[name]].to_numpy()

  ht_PROC = ht_theory.loc[[name], 'ht'].to_numpy()

  try:
    assert(theorypred_rep0_PROC.shape == data_exp_PROC.shape == ht_PROC.shape)
  except AssertionError as e:
    print("Problem with the shape")
    print(e)

  # For reference fit
  try:
    assert(theorypred_rep0_PROC_ref.shape == data_exp_PROC_ref.shape)
  except AssertionError as e:
    print("Problem with the shape")
    print(e)
    
  Ndata_PROC = theorypred_rep0_PROC.shape[0]
  Cinvexp_PROC = Cinvexp[collector:collector+Ndata_PROC, collector:collector+Ndata_PROC]

  # For reference fit
  Ndata_PROC_ref = theorypred_rep0_PROC_ref.shape[0]
  try:
    assert(Ndata_PROC_ref == Ndata_PROC)
  except AssertionError as e:
    print("Fit and reference fit do not have the same number of points")
    print(e)
    exit
  Cinvexp_PROC_ref = Cinvexp_ref[collector:collector + Ndata_PROC_ref, collector:collector + Ndata_PROC_ref]

  collector += Ndata_PROC

  chi2exp_PROC = (theorypred_rep0_PROC +  ht_PROC - data_exp_PROC) @ Cinvexp_PROC @ (theorypred_rep0_PROC +  ht_PROC - data_exp_PROC) / Ndata_PROC
  chi2exp_no_ht_PROC = (theorypred_rep0_PROC - data_exp_PROC) @ Cinvexp_PROC @ (theorypred_rep0_PROC - data_exp_PROC) / Ndata_PROC

  # For reference fit
  chi2exp_PROC_ref = (theorypred_rep0_PROC_ref - data_exp_PROC_ref) @ Cinvexp_PROC_ref @ (theorypred_rep0_PROC_ref - data_exp_PROC_ref) / Ndata_PROC_ref

  chi2_dict[name] = {"HT": chi2exp_PROC, "NO_HT": chi2exp_no_ht_PROC, "ref": chi2exp_PROC_ref}

if False:
  print(f"{"name":>10}{"chi2 w/o HT":>20}{"chi2 w/ HT":>15}")
  for name in chi2_dict.keys():
    print(f"{name:>10}{chi2_dict[name]["NO_HT"]:>15.4f}{chi2_dict[name]["HT"]:>15.4f}")

if True:
  print(f"{"name":>15}{"Baseline no HT":>20}{"With HT":>10}")
  for name in chi2_dict.keys():
    print(f"{name:>14}{chi2_dict[name]["ref"]:>15.4f}{chi2_dict[name]["HT"]:>15.4f}")


           name      Baseline no HT   With HT
        DIS NC         1.2450         1.2474
        DIS CC         1.0122         1.0146
         DY NC         1.2882         1.2313
         DY CC         1.6504         1.8411
           TOP         1.5540         1.3374
          JETS         1.1094         1.0018
         DIJET         2.2559         2.1151
        PHOTON         0.8130         0.7513
     SINGLETOP         0.3491         0.3727


## Compute $\chi^2$ per experiment

In [16]:
chi2_dict_exp = {}
collector = 0
for name in exp_list:
  theorypred_rep0_exp = theorypred_rep0.xs(name, level="dataset").to_numpy()
  data_exp_exp = data_exp.xs(name, level="dataset").to_numpy()
  ht_exp = ht_theory.xs(name, level='dataset')['ht'].to_numpy()
  try:
    assert(theorypred_rep0_exp.shape == ht_exp.shape == data_exp_exp.shape)
  except AssertionError as e:
    print("Problem with the shape")
    print(e)

  # For reference fit
  theorypred_rep0_exp_ref = theorypred_rep0_ref.xs(name, level="dataset").to_numpy()
  data_exp_exp_ref = data_exp_ref.xs(name, level="dataset").to_numpy()
    
  ndata_exp = theorypred_rep0_exp.shape[0]
  Cinvexp_exp = Cinvexp[collector:collector + ndata_exp, collector:collector + ndata_exp]

  # For reference fit
  ndata_exp_ref = theorypred_rep0_exp_ref.shape[0]
  Cinvexp_exp_ref = Cinvexp_ref[collector:collector + ndata_exp_ref, collector:collector + ndata_exp_ref]

  collector += ndata_exp

  chi2exp_exp = (theorypred_rep0_exp +  ht_exp - data_exp_exp) @ Cinvexp_exp @ (theorypred_rep0_exp +  ht_exp - data_exp_exp) / ndata_exp
  chi2exp_no_ht_exp = (theorypred_rep0_exp - data_exp_exp) @ Cinvexp_exp @ (theorypred_rep0_exp - data_exp_exp) / ndata_exp

  chi2exp_no_ht_exp_ref = (theorypred_rep0_exp_ref - data_exp_exp_ref) @ Cinvexp_exp_ref @ (theorypred_rep0_exp_ref - data_exp_exp_ref) / ndata_exp_ref

  chi2_dict_exp[name] = {"HT": chi2exp_exp, 
                         "NO_HT": chi2exp_no_ht_exp, 
                         "ref": chi2exp_no_ht_exp_ref,
                         "show": True if ht_theory.xs(name, level='dataset')['ht'].index[0][0] == 'DIS NC' else False}

if False:
  print(f"{"name":>30}{"chi2 w/o HT":>30}{"chi2 w/ HT":>15}")
  for name in chi2_dict_exp.keys():
    if chi2_dict_exp[name]['show']:
      print(f"{name:>40}{chi2_dict_exp[name]["NO_HT"]:>15.4f}{chi2_dict_exp[name]["HT"]:>15.4f}")

if True:
  print(f"{"name":>40}{"Baseline no HT":>20}{"With HT":>10}")
  for name in chi2_dict_exp.keys():
    if chi2_dict_exp[name]['show']:
      print(f"{name:>40}{chi2_dict_exp[name]["ref"]:>15.4f}{chi2_dict_exp[name]["HT"]:>15.4f}")

                                    name      Baseline no HT   With HT
                NMC_NC_NOTFIXED_DW_EM-F2         0.8809         0.9447
           NMC_NC_NOTFIXED_P_EM-SIGMARED         1.5983         1.4989
             SLAC_NC_NOTFIXED_P_DW_EM-F2         1.1067         0.7105
             SLAC_NC_NOTFIXED_D_DW_EM-F2         0.7556         0.7225
            BCDMS_NC_NOTFIXED_P_DW_EM-F2         1.6784         1.6928
            BCDMS_NC_NOTFIXED_D_DW_EM-F2         1.4883         1.6271
              HERA_NC_318GEV_EM-SIGMARED         1.5036         1.4338
              HERA_NC_225GEV_EP-SIGMARED         1.3805         1.2975
              HERA_NC_251GEV_EP-SIGMARED         1.1664         1.0964
              HERA_NC_300GEV_EP-SIGMARED         1.5225         1.4120
              HERA_NC_318GEV_EP-SIGMARED         1.4357         1.3632
      HERA_NC_318GEV_EAVG_CHARM-SIGMARED         2.0776         2.1650
     HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED         1.4436         3.5583


# Compute pseudodata

In [17]:
chi2t0_pseudodata = (theorypred_rep0 - data_reps_mean) @ Cinvt0 @ (theorypred_rep0 - data_reps_mean) / ndat
chi2t0_average_pseudodata = np.mean([(theorypred_reps[r] - data_reps_mean.to_numpy()) @ Cinvt0 @ (theorypred_reps[r] - data_reps_mean.to_numpy()) for r in theorypred_reps]) / ndat
chi2t0_meanT_pseudodata = (theorypred_reps.mean(axis=1) - data_reps_mean) @ Cinvt0 @ (theorypred_reps.mean(axis=1) - data_reps_mean) / ndat
chi2exp_pseudodata = (theorypred_rep0 - data_reps_mean) @ Cinvexp @ (theorypred_rep0 - data_reps_mean) / ndat