In [1]:
# Put here the path where the new datafiles are
from pathlib import Path
BUILDMASTER_PATH = Path("/mount/storage/Academic_Workspace/NNPDF/src/nnpdf/buildmaster")

from validphys.core import CommonDataSpec
from reportengine.compat import yaml
import pandas as pd

In [2]:
if False:
    # Playground for kinematics
    kinfile = BUILDMASTER_PATH / "NMCPD/kinematics.yaml"
    kinyaml = yaml.safe_load(kinfile.read_text())
    kin_data = []
    keys = []
    for key, data in kinyaml.items():
        kin_data.append(pd.DataFrame.from_records(data, index="index"))
        keys.append(key.replace("kin_", ""))
    kin_df = pd.concat(kin_data, axis=1, keys=keys).swaplevel(0,1, axis=1).sort_values(1, axis="columns")
    
if False:     # Playground for uncertainties
    unfile = BUILDMASTER_PATH / "NMCPD/uncertainties.yaml"
    unyaml = yaml.safe_load(unfile.read_text())
    unc_data = []
    keys = []
    for key, data in unyaml.items():
        if key == "stat":
            unc_data.append(pd.DataFrame.from_records(data, index="index"))
            keys.append(key)
        else:
            unc_data.append(pd.DataFrame.from_records(data["errors"], index="index"))
            keys.append(f"{key}_" + "_".join(data["mode"]))
    unc_df = pd.concat(unc_data, axis=1, keys=keys).droplevel(1, axis=1)
    
if False:
    _data_file = BUILDMASTER_PATH / "NMCPD/data.yaml"
    datayaml = yaml.safe_load(_data_file.read_text(encoding="utf-8"))
    data_df = pd.DataFrame.from_records(datayaml["data_central"], index="index")
    data_df.rename(columns={"value":"data"}, inplace=True)

In [3]:
# This cell is (more or less) what the loader will do when asked for a dataset_input
# and will be a more-or less substitute commondataparser.parse_commondata
# which parts will be inside CommonDataSpec and which inside parse_commondata can be decided a posteriori

# Write here the dataset_input you want to play with
dataset_input = {
    "dataset": "NMCPD",
    "variant": "shifted"
}

# Loader
setname = dataset_input["dataset"]
variant = dataset_input["variant"]

setdir = BUILDMASTER_PATH / setname
metadatafile = setdir / "metadata.yaml"
if not metadatafile.exists():
    raise FileNotFoundError(f"Metadata not found for {setname}")                                                                                

cd_spec = CommonDataSpec(setname, variant, metadatafile)

In [4]:
# Let's look at the kinematics:
kk = cd_spec.kinematics
kk.get_kintable()

Unnamed: 0_level_0,x,x,x,y,y,y,q2,q2,q2
Unnamed: 0_level_1,avg,max,min,avg,max,min,avg,max,min
index,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1,0.0015,0.0015,0.0015,0.120852,0.120852,0.120852,0.16,0.16,0.16
2,0.0015,0.0015,0.0015,0.188831,0.188831,0.188831,0.25,0.25,0.25
3,0.0015,0.0015,0.0015,0.264363,0.264363,0.264363,0.35,0.35,0.35
4,0.0015,0.0015,0.0015,0.339895,0.339895,0.339895,0.45,0.45,0.45
5,0.0015,0.0015,0.0015,0.453194,0.453194,0.453194,0.60,0.60,0.60
...,...,...,...,...,...,...,...,...,...
256,0.6750,0.6750,0.6750,0.044463,0.044463,0.044463,26.49,26.49,26.49
257,0.6750,0.6750,0.6750,0.059419,0.059419,0.059419,35.40,35.40,35.40
258,0.6750,0.6750,0.6750,0.078940,0.078940,0.078940,47.03,47.03,47.03
259,0.6750,0.6750,0.6750,0.106635,0.106635,0.106635,63.53,63.53,63.53


In [5]:
# And now at the uncertainties
print(cd_spec.nsys)

1


In [6]:
# Load the full commondata_table
cd = cd_spec.load()
cd.commondata_table

Unnamed: 0_level_0,x,y,q2,data,stat,sys_1_ADD_CORR
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,0.0015,0.120852,0.16,0.9815,0.0203,0.0109
2,0.0015,0.188831,0.25,1.0030,0.0212,0.0134
3,0.0015,0.264363,0.35,0.9675,0.0205,0.0112
4,0.0015,0.339895,0.45,1.0330,0.0258,0.0195
5,0.0015,0.453194,0.60,0.9912,0.0176,0.0121
...,...,...,...,...,...,...
256,0.6750,0.044463,26.49,0.6717,0.0235,0.0034
257,0.6750,0.059419,35.40,0.7194,0.0330,0.0033
258,0.6750,0.078940,47.03,0.6959,0.0373,0.0026
259,0.6750,0.106635,63.53,0.7020,0.0513,0.0029


In [7]:
cd.systematics_table

Unnamed: 0_level_0,sys_1_ADD_CORR
index,Unnamed: 1_level_1
1,0.0109
2,0.0134
3,0.0112
4,0.0195
5,0.0121
...,...
256,0.0034
257,0.0033
258,0.0026
259,0.0029


In [8]:
# Select data with cuts
loaded_cd_with_cuts = cd.with_cuts([32,67,89])
loaded_cd_with_cuts.commondata_table

Unnamed: 0_level_0,x,y,q2,data,stat,sys_1_ADD_CORR
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
33,0.008,0.490016,3.46,0.9924,0.0122,0.0084
68,0.025,0.062088,1.37,0.9849,0.0107,0.0027
90,0.035,0.290044,8.96,0.9686,0.0115,0.0021


In [9]:
loaded_cd_with_cuts.central_values

index
33    0.9924
68    0.9849
90    0.9686
Name: data, dtype: float64