In [1]:
import numpy as np
import pandas as pd
import pathlib
import matplotlib.pyplot as plt
import seaborn as sns

from itertools import product

%matplotlib inline

In [2]:
np.random.seed(87654321)
CURR_PATH = pathlib.Path().parent

In [3]:
def read_txt_pd(otype: str = "inclusive", charge: str = "nu") -> pd.DataFrame:
    
    path = f"clipped_nan_binned_sysevents_FASERv2_{otype}_{charge}.txt"
    fpath = CURR_PATH.joinpath(f"stat_syst_uncertainties/{path}")
    
    colnames = [
        "x_lower", 
        "x_upper", 
        "x_avg", 
        "Q2_lower", 
        "Q2_upper", 
        "Q2_avg", 
        "E_nu_lower", 
        "E_nu_upper", 
        "E_nu_avg", 
        "d^sigma/dxdQ2", 
        "N_events", 
        "N_events_errs", 
        "N_sys_errs", 
        "Percent_error_theta", 
        "Percent_error_Elepton", 
        "Percent_error_Ehadron", 
        "MC_Samples",
    ]
    
    return pd.read_csv(fpath, skiprows=2, delim_whitespace=True, names=colnames)

In [4]:
predictions = read_txt_pd()

In [5]:
MAP_ERROR_LABEL = {
    "Percent_error_Elepton": "El",
    "Percent_error_theta": "El",
    "Percent_error_Ehadron": "Eh",
    "Percent_error_combined": "comb",
}

def load_input(
    otype: str = "inclusive",
    charge: str = "nu",
    pdfname: str = "NNPDF40_nnlo_as_01180_iso",
    error: str = "Percent_error_Elepton",
) -> dict:
    # Read and Parse the central values
    partial_dataname = f"FASERv2_{otype}_{charge}"
    data_name = f"diffxsec-{partial_dataname}-a1_{pdfname}"
    path_cv = CURR_PATH.joinpath(f"pineappl_tables/{data_name}.txt")
    
    if charge == "nu" or charge == "nub":
        # Extract the y & central value from pineappl tables
        column = 3 if charge == "nu" else 4 # Select projectile
        x_avg, y_avg, sigma = np.loadtxt(
            pathlib.Path(path_cv),
            usecols=(0, 1, column),
            unpack=True,
            skiprows=1,
        )
    elif charge == "nochargediscrimination":
        x_avg, y_avg, sigma_nu, sigma_nub = np.loadtxt(
            pathlib.Path(path_cv),
            usecols=(0, 1, 3, 4),
            unpack=True,
            skiprows=1,
        )
        sigma = sigma_nu + sigma_nub
    else:
        raise ValueEror(f"{charge} is not valid!")
    
    df_predictions = read_txt_pd(otype=otype, charge=charge)
    # Compute the corresponding systematic errors
    syst_error = sigma * df_predictions["Percent_error_Elepton"].to_numpy()
    # Extract the statistical events error
    num_events_error = df_predictions["N_events_errs"]
    stat_error = 1.0 / num_events_error * sigma
    
    # Check that the two files have the same knots
    np.testing.assert_allclose(x_avg, df_predictions["x_avg"], rtol=5e-3)
    
    # Add the statistical and systematic in quadrature
    comb_error = np.sqrt(syst_error**2 + stat_error**2)
    
    return {
        "x_values": df_predictions["x_avg"].to_numpy(),
        "q2_values": df_predictions["Q2_avg"].to_numpy(),
        "y_values": y_avg,
        "stat_error": stat_error.to_numpy(),
        "syst_error": syst_error,
        "comb_error": comb_error.to_numpy(),
        "sigma": sigma,
        "dataset_name": f"{partial_dataname}_{MAP_ERROR_LABEL[error]}",
    }

In [6]:
def fluctuate_data(central: np.ndarray, covmat: np.ndarray) -> np.ndarray:
    cholesky = np.linalg.cholesky(covmat)
    random_samples = np.random.randn(central.shape[0])
    
    shift_data = cholesky @ random_samples
    pseudodata = central + shift_data
    
    return pseudodata

In [7]:
fasernu2_nu = load_input()

In [8]:
# Function to get the Covmat from array
get_covmat = lambda arr: np.diag(arr**2)

In [9]:
def dump_fluctuated_data(
    processes: list = ["inclusive", "charm"],
    charges: list = ["nu", "nub", "nochargediscrimination"],
):
    for proc, charge in product(processes, charges):
        load_results = load_input(otype=proc, charge=charge)
        
        # Compute the covariance matrix using the combined error
        covmat = get_covmat(load_results["comb_error"])
        
        # Fluctuate the central values
        fluctuated_sigma = fluctuate_data(
            central=load_results["sigma"],
            covmat=covmat,
        )
        
        # Combine everything into an array
        fluctuated_predictions = [
            load_results["x_values"],
            load_results["y_values"],
            load_results["q2_values"],
            fluctuated_sigma,
            load_results["stat_error"],
            load_results["syst_error"],
        ]
        
        # Dump the final results
        filename = f"{load_results['dataset_name']}_fluctuated"
        save_path = CURR_PATH.joinpath(f"fluctuated_data/{filename}.txt")
        np.savetxt(save_path, np.column_stack(fluctuated_predictions))

In [10]:
dump_fluctuated_data()