In [1]:
# Put here the path where the new datafiles are
from pathlib import Path
BUILDMASTER_PATH = Path("/mount/storage/Academic_Workspace/NNPDF/src/nnpdf/buildmaster")

from validphys.core import CommonDataSpec
from reportengine.compat import yaml
import pandas as pd

In [2]:
# This cell is (more or less) what the loader will do when asked for a dataset_input
# and will be a more-or less substitute commondataparser.parse_commondata
# which parts will be inside CommonDataSpec and which inside parse_commondata can be decided a posteriori

# Write here the dataset_input you want to play with
dataset_input = {
    "dataset": "NMCPD",
    "variant": "shifted"
}

# Loader
setname = dataset_input["dataset"]
variant = dataset_input["variant"]

setdir = BUILDMASTER_PATH / setname
metadatafile = setdir / "metadata.yaml"
if not metadatafile.exists():
    raise FileNotFoundError(f"Metadata not found for {setname}")                                                                                

cd_spec = CommonDataSpec(setname, variant, metadatafile)

In [3]:
# Let's look at the kinematics:
kk = cd_spec.kinematics
kk.get_kintable()

Unnamed: 0_level_0,x,x,x,Q2,Q2,Q2,y,y,y
Unnamed: 0_level_1,avg,min,max,avg,min,max,avg,min,max
0,0.0015,0.0015,0.0015,0.16,0.16,0.16,0.0,0.0,0.0
1,0.0015,0.0015,0.0015,0.25,0.25,0.25,0.0,0.0,0.0
2,0.0015,0.0015,0.0015,0.35,0.35,0.35,0.0,0.0,0.0
3,0.0015,0.0015,0.0015,0.45,0.45,0.45,0.0,0.0,0.0
4,0.0015,0.0015,0.0015,0.60,0.60,0.60,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
255,0.6750,0.6750,0.6750,26.49,26.49,26.49,0.0,0.0,0.0
256,0.6750,0.6750,0.6750,35.40,35.40,35.40,0.0,0.0,0.0
257,0.6750,0.6750,0.6750,47.03,47.03,47.03,0.0,0.0,0.0
258,0.6750,0.6750,0.6750,63.53,63.53,63.53,0.0,0.0,0.0


In [4]:
# And now at the uncertainties
print(cd_spec.nsys)

4


In [5]:
# Load the full commondata_table
cd = cd_spec.load()
cd.commondata_table

Unnamed: 0,x,Q2,y,data,stat,sys_1_ADD_CORR,sys_2_ADD_CORR,sys_3_ADD_CORR,sys_5_ADD_CORR
0,0.0015,0.16,0.0,0.9815,0.0203,0.000981,0.000000,0.010797,0.000000
1,0.0015,0.25,0.0,1.0030,0.0212,0.001003,0.000000,0.013039,0.001003
2,0.0015,0.35,0.0,0.9675,0.0205,0.001935,0.000000,0.010642,0.000000
3,0.0015,0.45,0.0,1.0330,0.0258,0.001033,0.000000,0.019627,0.000000
4,0.0015,0.60,0.0,0.9912,0.0176,0.000991,0.000000,0.011894,0.000000
...,...,...,...,...,...,...,...,...,...
255,0.6750,26.49,0.0,0.6717,0.0235,0.000672,0.002687,0.000000,0.001343
256,0.6750,35.40,0.0,0.7194,0.0330,0.000719,0.002158,0.000000,0.002158
257,0.6750,47.03,0.0,0.6959,0.0373,0.000696,0.000696,0.000696,0.002088
258,0.6750,63.53,0.0,0.7020,0.0513,0.000702,0.000000,0.000702,0.002106


In [6]:
cd.systematics_table

Unnamed: 0,sys_1_ADD_CORR,sys_2_ADD_CORR,sys_3_ADD_CORR,sys_5_ADD_CORR
0,0.000981,0.000000,0.010797,0.000000
1,0.001003,0.000000,0.013039,0.001003
2,0.001935,0.000000,0.010642,0.000000
3,0.001033,0.000000,0.019627,0.000000
4,0.000991,0.000000,0.011894,0.000000
...,...,...,...,...
255,0.000672,0.002687,0.000000,0.001343
256,0.000719,0.002158,0.000000,0.002158
257,0.000696,0.000696,0.000696,0.002088
258,0.000702,0.000000,0.000702,0.002106


In [8]:
# Select data with cuts
loaded_cd_with_cuts = cd.with_cuts([32,67,89])
loaded_cd_with_cuts.commondata_table

Unnamed: 0,x,Q2,y,data,stat,sys_1_ADD_CORR,sys_2_ADD_CORR,sys_3_ADD_CORR,sys_5_ADD_CORR
33,0.0125,0.16,0.0,0.9683,0.0543,0.001937,-0.00581,0.001937,0.0
68,0.025,1.74,0.0,0.9802,0.0076,0.00196,0.0,0.00098,0.0
90,0.035,11.45,0.0,0.9572,0.0107,0.000957,0.0,0.000957,0.0


In [11]:
loaded_cd_with_cuts.central_values

33    0.9683
68    0.9802
90    0.9572
Name: data, dtype: float64