In [1]:
from inspect import ismethod
from IPython.display import HTML as html_print
import numpy as np

from validphys.api import API
from validphys.commondataparser import parse_commondata_new

## Old-New comparison

The next cells use `parse_commondata` and `pase_commondata_new` to compare the `CommonData` objects returned by parsing common data files in the old and new styles.

At the moment the "new commondatas" are in a hard-coded folder called `new_data` (it is hardcoded in `commondataparser.py` while the old ones are taken from whatever was installed in `share/NNPDF/data`. Eventually the new commondata will also live there.

The comparison checks that the data, uncertainties and kinematics are the same (or as close as they can be). The format of the dataframes is slightly different at the moment, but this is at the moment a desired result.

In [2]:
# Helper functions

def load_old_and_new(old_name, new_name, theoryid=200):
    """Load the old and new commondata. The theory id is needed by the validphys API when using .dataset
    but it is never used. Putting 200 as default since I guess everyone has that theory installed already.
    """
    old_cd = API.dataset(dataset_input={"dataset": old_name}, use_cuts="nocuts", theoryid=theoryid).load_commondata()

    # If this doesn't find your data, change the path inside `commondataparser.py` to wherever your data is
    # the variable is `_folder_data` at the beginning of the file.
    new_cd = parse_commondata_new(new_name)
    return old_cd, new_cd

def print_check(msg, res):
    color = "green" if res else "red"
    display(html_print(f"<text style=color:{color}>equal={res} {msg}</text>"))

def check_this(cds, method, naming=None):
    test = None
    check_result = True
    if naming is None:
        naming = method
    
    for cd in cds:
        res = getattr(cd, method)
        if ismethod(res): # since sometimes we have methods, sometimes they are attributes
            res = res()
        if test is None:
            test = res
            continue
        check_result = np.allclose(res, test)
        
    print_check(f"Testing {naming} ({method})", check_result)


def run_checks(old_cd, new_cd):
    """Run through a number of checks to ensure they contain the same information"""
    cds = [old_cd, new_cd]
        
    check_this(cds, "get_cv", naming="central_values")
    check_this(cds, "get_kintable", naming="kinematics")
    check_this(cds, "multiplicative_errors")
    check_this(cds, "additive_errors")
    check_this(cds, "stat_errors")

In [3]:
def test_group(datasets, nofail=False):
    all_cds = []
    for old_name, new_name in datasets.items():
        print(f"\nChecking {new_name} (old name: {old_name})")
        try:
            old_cd, new_cd = load_old_and_new(old_name, new_name)
            all_cds.append((old_cd, new_cd))
            print(" > Comparing values...")
            run_checks(old_cd, new_cd)
        except FileNotFoundError:
            print(f"Files for {new_name} not found")
            continue
        except Exception as e:
            if nofail:
                print(f"### FAILED for {new_name}\n")
                continue
            raise e
    return all_cds

## FT Drell-yan

These datasets can be found in the `E605` branch [#1679](https://github.com/NNPDF/nnpdf/pull/1679)

In [4]:
datasets = {
    "DYE605":  "E605_DY_38P8GEV_PXSEC",
    "DYE886P": "E866_DY_800GEV_PXSEC",
    "DYE886R": "E866_DY_800GEV_RATIO_PDXSECRATIO",
    "DYE906R": "E906_DY_120GEV_PDXSECRATIO",
}

res_ft = test_group(datasets, nofail=False)


#old_cd, new_cd = load_old_and_new("DYE906R", "E906_DY_120GEV_PDXSECRATIO")
#run_checks(old_cd, new_cd)


Checking E605_DY_38P8GEV_PXSEC (old name: DYE605)


ValidationError: Problem processing key at line 19 in /Users/jumax9/Academic_Workspace/NNPDF/src/nnpdf/new_data/E605_DY_38P8GEV/metadata.yaml:
Cannot process field 'implemented_observables' of value into the corresponding field of 'SetMetaData'
Problem processing list item at line 20 in /Users/jumax9/Academic_Workspace/NNPDF/src/nnpdf/new_data/E605_DY_38P8GEV/metadata.yaml:
Cannot process list item 1.
Problem processing key at line 45 in /Users/jumax9/Academic_Workspace/NNPDF/src/nnpdf/new_data/E605_DY_38P8GEV/metadata.yaml:
Cannot process field 'theory' of value into the corresponding field of 'ObservableMetaData'
No match for any possible type:
Not a valid match for 'TheoryMeta': Cannot process field 'operation' of value into the corresponding field of 'TheoryMeta'
Not a valid match for 'NoneType': Expecting value of type 'NoneType', not CommentedMap.

In [5]:
old_cd, new_cd = res_ft[-2]

old_cd.commondata_table

Unnamed: 0_level_0,process,kin1,kin2,kin3,data,stat,ADD,MULT
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,DYP_E886R,1.52978,21.16,38.8,1.038,0.022,0.010069,0.97
2,DYP_E886R,1.220281,26.01,38.8,1.056,0.011,0.010243,0.97
3,DYP_E886R,1.02308,31.36,38.8,1.081,0.01,0.010486,0.97
4,DYP_E886R,0.884408,38.44,38.8,1.086,0.011,0.010534,0.97
5,DYP_E886R,0.749325,47.61,38.8,1.118,0.013,0.010845,0.97
6,DYP_E886R,0.648223,51.84,38.8,1.116,0.015,0.010825,0.97
7,DYP_E886R,0.551877,56.25,38.8,1.115,0.018,0.010816,0.97
8,DYP_E886R,0.471793,60.84,38.8,1.161,0.023,0.011262,0.97
9,DYP_E886R,0.414373,67.24,38.8,1.132,0.027,0.01098,0.97
10,DYP_E886R,0.355011,75.69,38.8,1.124,0.027,0.010903,0.97


In [6]:
new_cd.commondata_table

Unnamed: 0_level_0,process,kin1,kin2,kin3,data,stat,ADD
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,DY,1.550133,21.16,38.763,1.038,0.022,0.010069
2,DY,1.236795,26.01,38.763,1.056,0.011,0.010243
3,DY,1.036368,31.36,38.763,1.081,0.01,0.010486
4,DY,0.894809,38.44,38.763,1.086,0.011,0.010534
5,DY,0.766403,46.24,38.763,1.118,0.013,0.010845
6,DY,0.654778,51.84,38.763,1.116,0.015,0.010825
7,DY,0.557312,56.25,38.763,1.115,0.018,0.010816
8,DY,0.476266,60.84,38.763,1.161,0.023,0.011262
9,DY,0.417904,67.24,38.763,1.132,0.027,0.01098
10,DY,0.357677,75.69,38.763,1.124,0.027,0.010903


## CMS ttbar

These datasets can be found in the `more_efficient_metadata_for_new_commondata` branch [#1684](https://github.com/NNPDF/nnpdf/pull/1684)

In [7]:
datasets = {
    "CMSTOPDIFF8TEVTTRAPNORM":  "CMS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM",
}

res = test_group(datasets, nofail=False)


Checking CMS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM (old name: CMSTOPDIFF8TEVTTRAPNORM)
Files for CMS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM not found
