In [1]:
from pathlib import Path
from validphys.loader import _get_nnpdf_profile
from validphys.api import API
import numpy as np
import pandas as pd
from validphys.convolution import central_predictions

profile = _get_nnpdf_profile()
yaml_db = Path(profile["data_path"]) / "yamldb"

The `yaml_db` folder is a temporary thing as it contains files that look like:

```yaml
conversion_factor: 1.0
operands:
- - NMC_NC_EM_D_F2
- - NMC_NC_EM_P_F2
operation: RATIO
target_dataset: NMCPD
```

This information will eventually be part of the new commondata format of course.

The `operation` is applied to the first level of the list while the second level is just concatenated. This is necessary since `pineappl` fktables might contain one layer of concatenation which is already done for the "classic" fktables.

The `pineappl` fktables will live inside the appropiate `theory_xxx` folder `/pineappls`.

In [2]:
# Test them all
if True:
    from yaml import safe_load
    pdf = API.pdf(pdf="NNPDF40_nnlo_as_01180")
    all_res = []
    nnpdf40_runcard = safe_load(Path("/home/juacrumar/NNPDF-testing/nnpdf/n3fit/NNPDF40_with_pineappl.yml").read_text())
    for d in nnpdf40_runcard["dataset_inputs"]:
        target_ds = d["dataset"]
        cfac = d.get("cfac", [])
        old_ds = API.dataset(dataset_input={"dataset": target_ds, "cfac": cfac + ["oldmode"]}, theoryid=200, use_cuts="internal")
        ds = API.dataset(dataset_input={"dataset": target_ds, "cfac": cfac}, theoryid=200, use_cuts="internal")
        new_cp = central_predictions(ds, pdf)
        cp = central_predictions(old_ds, pdf)
        all_res.append(pd.concat([new_cp, cp, new_cp/cp], axis=1, keys=["vp", "pine", f"ratio for {target_ds}"]))
        
    for i in all_res:
        mean_ratio = i[i.columns[2]].mean()
        if not (0.9 < mean_ratio < 1.1) and not (0.9 < 1000*mean_ratio < 1.1):
            print(i)


-- Reading COMMONDATA for Dataset: NMCPD_dw_ite
nData: 260 nSys: 105
-- COMMONDATA Files for NMCPD_dw_ite successfully read.

LHAPDF 6.4.0 loading /usr/share/lhapdf/LHAPDF/NNPDF40_nnlo_as_01180/NNPDF40_nnlo_as_01180_0000.dat
NNPDF40_nnlo_as_01180 PDF set, member #0, version 1

-- Reading COMMONDATA for Dataset: NMC
nData: 292 nSys: 16
-- COMMONDATA Files for NMC successfully read.


-- Reading COMMONDATA for Dataset: SLACP_dwsh
nData: 211 nSys: 3
-- COMMONDATA Files for SLACP_dwsh successfully read.


-- Reading COMMONDATA for Dataset: SLACD_dw_ite
nData: 211 nSys: 103
-- COMMONDATA Files for SLACD_dw_ite successfully read.


-- Reading COMMONDATA for Dataset: BCDMSP_dwsh
nData: 351 nSys: 11
-- COMMONDATA Files for BCDMSP_dwsh successfully read.


-- Reading COMMONDATA for Dataset: BCDMSD_dw_ite
nData: 254 nSys: 108
-- COMMONDATA Files for BCDMSD_dw_ite successfully read.


-- Reading COMMONDATA for Dataset: CHORUSNUPb_dw_ite
nData: 607 nSys: 1014
-- COMMONDATA Files for CHORUSNUPb_dw


-- Reading COMMONDATA for Dataset: CMS_TTB_DIFF_13TEV_2016_LJ_TRAP
nData: 11 nSys: 11
-- COMMONDATA Files for CMS_TTB_DIFF_13TEV_2016_LJ_TRAP successfully read.


-- Reading COMMONDATA for Dataset: CMS_SINGLETOP_TCH_TOT_7TEV
nData: 1 nSys: 3
-- COMMONDATA Files for CMS_SINGLETOP_TCH_TOT_7TEV successfully read.


-- Reading COMMONDATA for Dataset: CMS_SINGLETOP_TCH_R_8TEV
nData: 1 nSys: 1
-- COMMONDATA Files for CMS_SINGLETOP_TCH_R_8TEV successfully read.


-- Reading COMMONDATA for Dataset: CMS_SINGLETOP_TCH_R_13TEV
nData: 1 nSys: 1
-- COMMONDATA Files for CMS_SINGLETOP_TCH_R_13TEV successfully read.


-- Reading COMMONDATA for Dataset: LHCBZ940PB
nData: 9 nSys: 11
-- COMMONDATA Files for LHCBZ940PB successfully read.


-- Reading COMMONDATA for Dataset: LHCBZEE2FB_40
nData: 17 nSys: 19
-- COMMONDATA Files for LHCBZEE2FB_40 successfully read.


-- Reading COMMONDATA for Dataset: LHCBWZMU7TEV
nData: 33 nSys: 35
-- COMMONDATA Files for LHCBWZMU7TEV successfully read.


-- Reading COMMON

In [2]:
target_ds = "ATLAS_DY_2D_8TEV_LOWMASS"
cfac = []
old_ds = API.dataset(dataset_input={"dataset": target_ds, "cfac": cfac + ["oldmode"]}, theoryid=200, use_cuts="internal")
ds = API.dataset(dataset_input={"dataset": target_ds, "cfac": cfac}, theoryid=200, use_cuts="internal")

In [3]:
# Let's try to get a prediction out of it
pdf = API.pdf(pdf="NNPDF40_nnlo_as_01180")
new_cp = central_predictions(ds, pdf)
cp = central_predictions(old_ds, pdf)
pd.concat([new_cp, cp, new_cp/cp], axis=1, keys=["vp", "pine", "ratio"])


-- Reading COMMONDATA for Dataset: ATLAS_DY_2D_8TEV_LOWMASS
nData: 84 nSys: 277
-- COMMONDATA Files for ATLAS_DY_2D_8TEV_LOWMASS successfully read.



ValueError: operands could not be broadcast together with shapes (84,10000,91) (84,1) (84,10000,91) 

In [5]:
pine_fkspec = ds.fkspecs[0]
old_fkspec = old_ds.fkspecs[0]

In [6]:
import pineappl
pines = [pineappl.fk_table.FkTable.read(i.as_posix()) for i in pine_fkspec.fkpath]
# Inspect the pineappl prediction
res_pine = []
pp = pines[0]
lpdf = pdf.load()

for p in pines:
    res_pine.append(p.convolute_with_one(2212, lpdf.central_member.xfxQ2))
total_pine = np.concatenate(res_pine)

LHAPDF 6.4.0 loading all 101 PDFs in set NNPDF40_nnlo_as_01180
NNPDF40_nnlo_as_01180, version 1; 101 PDF members


In [7]:
# Let's inspect the content of the old fktables, remove the cfactor for now
from validphys.fkparser import load_fktable
old_fkspec.cfactors = False
old_fktabledata = load_fktable(old_fkspec)

In [8]:
print(f"hadronic?: {old_fktabledata.hadronic}")
print(f"Q: {old_fktabledata.Q0}")
print(f"n: {old_fktabledata.ndata}")
print(f"xgrid shape: {old_fktabledata.xgrid.shape}")
#old_fktabledata.sigma

hadronic?: True
Q: 1.65
n: 84
xgrid shape: (50,)


In [9]:
# First read the metadata that vp `FKTableData` needs and that all subgrids share
Q0 = np.sqrt(pp.muf2())
xgrid = pp.x_grid()
# Hadronic means in practice that not all luminosity combinations are just electron X proton
hadronic = not all(-11 in i for i in pp.lumi())
# Now prepare the concatenation of grids
fktables = []
for p in pines:
    tmp = p.table().T/p.bin_normalizations()
    fktables.append(tmp.T)
fktable = np.concatenate(fktables, axis=0)
ndata = fktable.shape[0]

In [10]:
# Now let's try to join the fktable, luminosity and xgrid into a pandas dataframe
# keeping compatibility with validphys and, hopefully, 50% of my own sanity

# Step 1), make the luminosity into a 14x14 mask for the evolution basis
eko_numbering_scheme = (22, 100, 21, 200, 203, 208, 215, 224, 235, 103, 108, 115, 124, 135)
# note that this is the same ordering that was used in fktables
flavour_map = np.zeros((14, 14), dtype=bool)
for i, j in pp.lumi():
    idx = eko_numbering_scheme.index(i)
    jdx = eko_numbering_scheme.index(j)
    flavour_map[idx,jdx] = True
    
# Step 2) prepare the indices for the dataframe
xi = np.arange(len(xgrid))
ni = np.arange(ndata)
mi = pd.MultiIndex.from_product([ni, xi, xi], names=["data", "x1", "x2"])
co = np.where(flavour_map.ravel())[0]

# Step 3) Now play with the array until we flatten it in the right way?
# The fktables for pineappl have this extra factor of x...
# The output of pineappl is (ndata, flavours, x, x)
lf = len(co)
xfktable = fktable.reshape(ndata, lf, -1)/(xgrid[:,None]*xgrid[None,:]).flatten()
fkmod = np.moveaxis(xfktable, 1, -1)
fkframe = fkmod.reshape(-1, lf)

# Uff, big
df = pd.DataFrame(fkframe, index=mi, columns=co)

from validphys.convolution import central_hadron_predictions
from validphys.coredata import FKTableData
fk = FKTableData(sigma=df, ndata=ndata,  Q0=Q0, metadata=None, hadronic=True, xgrid=xgrid)
central_hadron_predictions(fk, pdf)

Unnamed: 0_level_0,0
data,Unnamed: 1_level_1
0,7315.803089
1,7315.311488
2,7291.203597
3,7283.822320
4,7269.490316
...,...
79,717.508960
80,571.652765
81,405.970202
82,233.472195


In [11]:
# Create a luminosity tensor and check that the results are correct
from validphys.pdfbases import evolution

evol_basis = (
    "photon",
    "singlet",
    "g",
    "V",
    "V3",
    "V8",
    "V15",
    "V24",
    "V35",
    "T3",
    "T8",
    "T15",
    "T24",
    "T35",
)
total_pdf = evolution.grid_values(pdf, evol_basis, xgrid, [Q0]).squeeze()[0]/xgrid
print(total_pdf.shape)
lumi = np.einsum('ij,kl->ikjl', total_pdf, total_pdf)
lumi_masked = lumi[flavour_map]
print(fktable.shape)
print(lumi_masked.shape)
res = np.einsum('ijkl,jkl->i', fktable, lumi_masked)
#pd.concat([pd.DataFrame(res), cp, pd.DataFrame(res)/cp,  ], axis=1)

(14, 100)
(84, 91, 100, 100)
(91, 100, 100)


In [12]:
xfktable.reshape(48,91,-1).shape

(48, 91, 17500)

In [13]:
from validphys.fkparser import open_fkpath, _parse_string, _parse_header, _build_sigma
from validphys.fkparser import _parse_flavour_map, _parse_hadronic_fast_kernel
try:
    f.close()
except:
    pass
f = open_fkpath(old_fkspec.fkpath)
line_and_stream = enumerate(f, start=1)
lineno, header = next(line_and_stream)
res = {}
while True:
    marker, header_name = _parse_header(lineno, header)
    if header_name == "FastKernel":
        break
    if header_name == "FlavourMap":
        out, lineno, header = _parse_flavour_map(line_and_stream)
    else:
        out, lineno, header = _parse_string(line_and_stream)
    res[header_name] = out   

In [14]:
res["FlavourMap"].shape

(14, 14)

In [15]:
i_hate_pandas = _parse_hadronic_fast_kernel(f)

In [16]:
i_hate_pandas

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,2,3,4,5,6,7,8,9,...,186,187,188,189,190,191,192,193,194,195
data,x1,x2,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
0,8,9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0,8,10,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0,8,11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0,8,12,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0,8,13,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,49,45,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
83,49,46,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
83,49,47,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
83,49,48,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
old_fktabledata.sigma

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,15,16,17,18,19,20,23,24,25,29,...,151,155,156,157,158,159,160,163,164,165
data,x1,x2,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
0,8,9,3.551256e-12,6.723554e-14,1.440352e-14,5.063869e-15,1.687916e-15,-1.687970e-15,1.990991e-12,6.636638e-13,-6.636638e-13,6.777992e-14,...,2.210156e-13,-6.636581e-13,-1.256423e-14,-1.686316e-15,1.686384e-15,5.621234e-16,5.921075e-15,6.630468e-13,2.210156e-13,1.623593e-12
0,8,10,1.063391e-09,-6.265803e-11,8.951103e-12,3.163000e-12,1.054332e-12,-1.054332e-12,5.930050e-10,1.976683e-10,-1.976683e-10,1.742030e-11,...,6.584724e-11,-1.960762e-10,1.168314e-11,-1.053518e-12,1.053549e-12,3.511840e-13,3.683942e-12,1.975417e-10,6.584724e-11,4.885527e-10
0,8,11,-1.153745e-08,2.671182e-10,-6.604233e-11,-2.333724e-11,-7.779053e-12,7.779080e-12,-6.416875e-09,-2.138958e-09,2.138959e-09,-1.975025e-10,...,-7.124743e-10,2.133962e-09,-4.973325e-11,7.773604e-12,-7.773457e-12,-2.591189e-12,-2.718048e-11,-2.137423e-09,-7.124743e-10,-5.274198e-09
0,8,12,8.035378e-07,1.021512e-08,-2.029926e-09,-7.136345e-10,-2.378793e-10,2.378865e-10,4.486250e-07,1.495417e-07,-1.495417e-07,1.458973e-08,...,4.980587e-08,-1.494869e-07,-1.901015e-09,2.375349e-10,-2.375408e-10,-7.918113e-11,-8.340609e-10,1.494176e-07,4.980587e-08,3.671113e-07
0,8,13,3.994267e-06,1.024851e-07,-8.110835e-09,-2.848692e-09,-9.495391e-10,9.495396e-10,2.228781e-06,7.429271e-07,-7.429272e-07,7.217351e-08,...,2.474392e-07,-7.427955e-07,-1.906113e-08,9.480188e-10,-9.480854e-10,-3.160177e-10,-3.331678e-09,7.423176e-07,2.474392e-07,1.824352e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,49,45,6.994065e-05,9.919801e-05,-2.470269e-08,-6.708537e-09,-2.236068e-09,2.236141e-09,5.246045e-06,1.748682e-06,-1.748682e-06,1.071540e-04,...,4.347564e-09,-1.406371e-06,-2.605165e-06,-3.788580e-12,5.088462e-12,1.696026e-12,3.111542e-11,1.304269e-08,4.347564e-09,7.609675e-08
83,49,46,6.753852e-05,9.724627e-05,-1.244625e-08,-3.675849e-09,-1.225285e-09,1.225241e-09,4.887053e-06,1.629018e-06,-1.629018e-06,1.031891e-04,...,4.045674e-09,-1.373817e-06,-2.546778e-06,-6.150348e-12,7.366137e-12,2.455353e-12,4.199674e-11,1.213702e-08,4.045674e-09,7.082937e-08
83,49,47,6.500422e-05,9.536854e-05,-3.852804e-08,-1.070439e-08,-3.568221e-09,3.568189e-09,4.500866e-06,1.500289e-06,-1.500289e-06,9.897122e-05,...,3.747365e-09,-1.342658e-06,-2.491063e-06,9.553159e-12,-8.415844e-12,-2.805294e-12,-2.653528e-11,1.124209e-08,3.747365e-09,6.561773e-08
83,49,48,6.265452e-05,9.362106e-05,-7.560318e-08,-2.055815e-08,-6.852734e-09,6.852660e-09,4.146040e-06,1.382013e-06,-1.382013e-06,9.505574e-05,...,3.495485e-09,-1.313825e-06,-2.439459e-06,2.466470e-11,-2.359942e-11,-7.866388e-12,-9.388939e-11,1.048646e-08,3.495485e-09,6.118776e-08
