Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add theory error to matching grids #47

Merged
merged 20 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion runcards/fit_runcard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ experiments:

# Define some kinematic cuts (Omit if not needed)
kinematic_cuts:
q2max: 30.0 # Max-Q2 value for real datasets (to be named better)
q2max: 25.0 # Max-Q2 value for real datasets (to be named better)
w2min: 3.50 # Minimum W2 value for all datasets

# chi2 history stored for each `log_freq` epochs
Expand Down
16 changes: 9 additions & 7 deletions src/nnusf/cli/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def sub_fit(model, runcard, destination):


@subcommand.command("sf")
@click.argument("source", type=click.Path(path_type=pathlib.Path, exists=True))
@click.argument("dataset", type=click.Path(path_type=pathlib.Path, exists=True))
@click.option(
"-k",
"--kind",
Expand All @@ -153,22 +153,24 @@ def sub_fit(model, runcard, destination):
default=pathlib.Path.cwd().absolute() / "plots",
help="Alternative destination path to store the resulting plots (default: $PWD/plots).",
)
def sub_sf(source, kind, destination):
def sub_sf(dataset, kind, destination):
"""Plots structure functions."""

sf.main(source, kind, destination)
sf.main(dataset, kind, destination)


@subcommand.command("matching_dataset")
@click.argument("source", type=click.Path(path_type=pathlib.Path, exists=True))
@click.argument("dataset", type=click.Path(path_type=pathlib.Path, exists=True))
@click.option(
"-d",
"--destination",
type=click.Path(path_type=pathlib.Path),
default=pathlib.Path.cwd().absolute() / "plots",
help="Alternative destination path to store the resulting plots (default: $PWD/plots).",
)
def sub_matching_dataset(source, destination):
"""Plots the matching datasets along with the actual data"""
def sub_matching_dataset(dataset, destination):
"""Plots the matching datasets along with the actual data.

matching.main(source, destination)
eg: nnu plot matching_dataset commondata/data/DATA_NUTEV_F2_MATCHING.csv
"""
matching.main(dataset, destination)
11 changes: 10 additions & 1 deletion src/nnusf/cli/theory.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,18 @@ def sub_sub_hiq(data, destination):
@option_dest
def sub_sub_all(data, destination):
"""Full datasets runcards"""
runcards.dvst(data, destination=destination)
runcards.dvst(data, destination=destination, activate_scale_var=False)


@sub_runcards.command("th_err")
@click.argument(
"data", nargs=-1, type=click.Path(exists=True, path_type=pathlib.Path)
)
@option_dest
def sub_sub_all(data, destination):
"""Full datasets runcards with 7 points perescription scale variations"""
runcards.dvst(data, destination=destination, activate_scale_var=True)

@subcommand.command("by")
@click.argument(
"observables", nargs=-1, type=click.Choice(bodek_yang.load.load().members)
Expand Down
19 changes: 14 additions & 5 deletions src/nnusf/data/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,11 +245,20 @@ def build_covariance_matrix(

"""
if "_MATCHING" in dataset_name:
dataset_name = "MATCH_" + dataset_name.removesuffix("_MATCHING")
nrep_predictions = np.load(
f"{commondata_path}/matching/{dataset_name}.npy"
)
covmat = np.cov(nrep_predictions[mask_predictions])
sv_variations = []
for variation in pathlib.Path(f"{commondata_path}/matching/").iterdir():
if dataset_name in variation.stem:
# central scale
if "xif1_xir1" in variation.stem:
nrep_predictions = np.load(variation)
else:
sv_variations.append(np.load(variation))
# build th shift
th_shift = (sv_variations - nrep_predictions[:,0]).T
# build covaraince
pdf_covmat = np.cov(nrep_predictions[mask_predictions])
th_covamt = np.cov(th_shift[mask_predictions])
covmat = np.sqrt(th_covamt ** 2 + pdf_covmat ** 2)
return clip_covmat(covmat, dataset_name)
else:
diagonal = np.power(unc_df["stat"], 2)
Expand Down
55 changes: 31 additions & 24 deletions src/nnusf/data/matching_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import yaml

from .. import utils
from ..theory.predictions import pdf_error
from ..theory.predictions import pdf_error, theory_error
from .utils import (
MAP_OBS_PID,
build_obs_dict,
Expand Down Expand Up @@ -58,13 +58,14 @@ def proton_boundary_conditions(
if grids is not None:
obsdic_list = []
for grid in grids:
grid_name = grid.stem[6:-13]
grid_name = grid.stem[6:-4]
_logger.info(f"Generating BC data for '{grid_name}'.")

obstype = grid_name.split("_")[-1]
obstype = grid_name.split("_")[1]
obspid = MAP_OBS_PID[obstype]
obsdic = build_obs_dict(obstype, [None], obspid)
obsdic_list.append(obsdic)
if obsdic not in obsdic_list:
obsdic_list.append(obsdic)
main(
grid,
pdf,
Expand Down Expand Up @@ -175,13 +176,11 @@ def main(
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = pathlib.Path(tmpdir).absolute()

grid_name = grids.stem[6:-13]
obs = grid_name.split("_")[-1]
new_name = f"{grid_name}_MATCHING"
experiment = grid_name.split("_")[0]
full_grid_name = grids.stem[6:-4]
experiment, obs, _, xif, xir = full_grid_name.split("_")
new_name = f"{experiment}_{obs}_MATCHING"
new_experiment = f"{experiment}_MATCHING"

# if grids.suffix == ".tar.gz":
if str(grids).endswith(".tar.gz"):
utils.extract_tar(grids, tmpdir)
grids = tmpdir / "grids"
Expand All @@ -195,32 +194,40 @@ def main(
if "pineappl" not in gpath.name:
continue
grid = pineappl.grid.Grid.read(gpath)
prediction = pdf_error(grid, pdf, kin_grid["x"], reshape=False)
if xif == "xif1" and xir == "xir1":
prediction = pdf_error(grid, pdf, kin_grid["x"], reshape=False)
else:
prescription = [(float(xir[3:]), float(xif[3:]))]
prediction = theory_error(
grid, pdf, prescription, kin_grid["x"], reshape=False
)
full_pred.append(prediction[0])
pred = np.average(full_pred, axis=0)

# Select only predictions for Replicas_0 in data
data_pd = pd.DataFrame({"data": pred[:, 0]})
# store data only for unvaried matching grids
if xif == "xif1" and xir == "xir1":
# data_pd = pd.DataFrame({"data": np.median(pred, axis=1)})
# Select only predictions for Replicas_0 to be the Central Value
data_pd = pd.DataFrame({"data": pred[:, 0]})

# Dump the kinematics into CSV
dump_kinematics(destination, kin_grid, new_experiment, is_xsec)
# Dump the kinematics into CSV
dump_kinematics(destination, kin_grid, new_experiment, is_xsec)

# Dump the central (replica) data into CSV
central_val_folder = destination.joinpath("data")
central_val_folder.mkdir(exist_ok=True)
write_to_csv(central_val_folder, f"DATA_{new_name}", data_pd)
# Dump the central (replica) data into CSV
central_val_folder = destination.joinpath("data")
central_val_folder.mkdir(exist_ok=True)
write_to_csv(central_val_folder, f"DATA_{new_name}", data_pd)

# Dump the dummy uncertainties into CSV
dump_uncertainties(destination, new_name, n_points)
# Dump the dummy uncertainties into CSV
dump_uncertainties(destination, new_name, n_points)

# Dump the predictions for the REST of the replicas as NPY
pred_folder = destination.joinpath("matching")
pred_folder.mkdir(exist_ok=True)
mat_dest = (pred_folder / f"MATCH_{grid_name}").with_suffix(".npy")
np.save(mat_dest, pred)
np.save(pred_folder / f"{full_grid_name}.npy", pred)

msg = f"The matching/BC grid for {grid_name} are stored in "
msg += f"'{destination.absolute().relative_to(pathlib.Path.cwd())}'"
msg = f"The matching/BC grid for {full_grid_name} are stored in "
msg += f"'{destination.absolute()}'"
_logger.info(msg)


Expand Down
46 changes: 38 additions & 8 deletions src/nnusf/plot/matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,23 @@ def main(

data_name = "_".join(dataset.stem.split("_")[1:])
exp_name = data_name.removesuffix("_MATCHING")
data = loader.Loader(data_name, dataset.parents[1]).table
full_data = loader.Loader(data_name, dataset.parents[1])
data = full_data.table
data_exp = loader.Loader(exp_name, dataset.parents[1]).table

temp_name = "MATCH_" + data_name.removesuffix("_MATCHING")
nrep_predictions = np.load(f"{dataset.parents[1]}/matching/{temp_name}.npy")

data["std"] = nrep_predictions.std(axis=1)
# bould the sv error
sv_variations = []
for variation in pathlib.Path(f"{dataset.parents[1]}/matching/").iterdir():
if data_name in variation.stem:
# central scale
if "xif1_xir1" in variation.stem:
nrep_predictions = np.load(variation)
else:
sv_variations.append(np.load(variation))
th_shift = (sv_variations - nrep_predictions[:, 0]).T
data["sv_err"] = np.sqrt(np.diag(np.cov(th_shift)))

data["total_err"] = np.sqrt(np.diag(full_data.covariance_matrix))
# np.testing.assert_allclose(data["data"], nrep_predictions.mean(axis=1), atol=1e-6)
# np.testing.assert_allclose(nrep_predictions.T[0], nrep_predictions.mean(axis=1), atol=1e-6)

Expand All @@ -62,20 +72,40 @@ def main(

central = fixed_x_data["data"]
central_exp = fixed_x_data_exp["data"]
std = fixed_x_data["std"]
total_err = fixed_x_data["total_err"]
sv_err = fixed_x_data["sv_err"]
std_exp = np.sqrt(
fixed_x_data_exp["syst"] ** 2 + fixed_x_data_exp["stat"] ** 2
)

plt.figure()
plt.errorbar(fixed_x_data["Q2"], central, yerr=std, fmt="o")
plt.errorbar(
fixed_x_data_exp["Q2"], central_exp, yerr=std_exp, fmt="x"
fixed_x_data["Q2"],
central,
yerr=total_err,
fmt="o",
capsize=5,
label="yadism (tot err)",
)
plt.errorbar(
fixed_x_data["Q2"],
central,
yerr=sv_err,
fmt="o",
label="yadism (sv err)",
)
plt.errorbar(
fixed_x_data_exp["Q2"],
central_exp,
yerr=std_exp,
fmt="x",
label="data",
)

plt.title(f"{data_name}, x={x}. x_exp={x_exp:3f}")
plt.xlabel("$Q^2$")
plt.xscale("log")
plt.legend()
plt.tight_layout()
plt.savefig(preds_dest / f"{data_name}_x_{x}.pdf")

Expand Down
12 changes: 8 additions & 4 deletions src/nnusf/sffit/compute_expchi2.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,17 @@ def compute_exp_chi2(datainfo, nn_layers, optimizer_parameters, **kwargs):
for datname, chi2_value in chi2_values.items()
if "_loss" in datname
}
# Compute the experimental Chi2 only for real data
expreal_losses = [
chi2_real_dataset
for n, chi2_real_dataset in chi2_values.items()
if (("_MATCHING" not in n) and ("_loss" in n))
]
expreal_pt = [v for n, v in nb_dpts_dataset.items() if "_MATCHING" not in n]

normalized_chi2s["total_chi2"] = chi2_values["loss"] / sum(
nb_dpts_dataset.values()
)

# Compute the experimental Chi2 only for real data
expreal_losses = [v for n, v in chi2_values.items() if "_MATCHING" not in n]
expreal_pt = [v for n, v in nb_dpts_dataset.items() if "_MATCHING" not in n]
normalized_chi2s["tot_chi2_real"] = sum(expreal_losses) / sum(expreal_pt)
return normalized_chi2s

Expand Down
18 changes: 17 additions & 1 deletion src/nnusf/theory/defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@

sfmap = dict(F2="F2_total", FL="FL_total", F3="F3_total")
xsmap = dict(
PROTONBC="XSCHORUSCC", CHORUS="XSCHORUSCC", NUTEV="XSNUTEVNU", CDHSW="XSCHORUSCC", FW="FW"
PROTONBC="XSCHORUSCC",
CHORUS="XSCHORUSCC",
NUTEV="XSNUTEVNU",
CDHSW="XSCHORUSCC",
FW="FW",
)
projectiles = dict(NU="neutrino", NB="antineutrino")
targets = {1: "proton", 20: "neon", 100: "marble", 56: "iron", 208: "lead"}
Expand All @@ -16,3 +20,15 @@
nine_points = list(itertools.product(three_points, three_points))
"""Nine points prescription for scale variations (as couples, referred to ``(fact,
ren)`` scales)."""

seven_points = [
(0.5, 0.5),
(1.0, 0.5),
(2.0, 1.0),
(1.0, 1.0),
(0.5, 1.0),
(1.0, 2.0),
(2.0, 2.0),
]
"""Seven points prescription for scale variations (as couples, referred to ``(fact,
ren)`` scales)."""
5 changes: 2 additions & 3 deletions src/nnusf/theory/grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def main(cards: pathlib.Path, destination: pathlib.Path):
"""
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = pathlib.Path(tmpdir).absolute()
full_data_name = str(cards).split("-")[1][:-4]

# extract tar content
if cards.suffix == ".tar":
Expand Down Expand Up @@ -51,10 +52,8 @@ def main(cards: pathlib.Path, destination: pathlib.Path):
output.dump_pineappl_to_file(res_path, obs)
_logger.info(f"Dumped {res_path.name}")

if "_NU" in data_name or "_NB" in data_name:
data_name = data_name[:-3]
file_name = (
f"grids-{data_name}.tar" if data_name != "" else f"grids.tar"
f"grids-{full_data_name}.tar.gz" if full_data_name != "" else f"grids.tar.gz"
)
with tarfile.open(destination / file_name, "w") as tar:
for path in grids_dest.iterdir():
Expand Down