In [1]:
import numpy as np
import random
import pandas as pd
import itertools

## Produce data frames for theta selections

- Making three dataframes
  - high fidelity sample for training
  - high fidelity sample for validation
  - low fidelity sample for training

- Each data frame has selection of etau, alpha and beta' with index for each variable for naming

- indices
    - etau: total 4 points
        - 0: 5 ms
        - 1: 10.4 ms
        - 2: 18 ms
        - 3: 30 ms
    - MB alpha: total 5 points
        - 0: 0.89
        - 1: 0.91
        - 2: 0.93
        - 3: 0.95
        - 4: 0.97
    - MB beta': total 5 points
        - 0: 0.208
        - 1: 0.210
        - 2: 0.212
        - 3: 0.214
        - 4: 0.216

In [2]:
array_etau = [5000, 10400, 18000, 30000]
array_alpha = [0.89, 0.91, 0.93, 0.95, 0.97]
array_beta = [0.208, 0.210, 0.212, 0.214, 0.216]

etau_idx_map  = {v: i for i, v in enumerate(array_etau)}
alpha_idx_map = {v: i for i, v in enumerate(array_alpha)}
beta_idx_map  = {v: i for i, v in enumerate(array_beta)}

In [3]:
n_samples = 44
random.seed(44)
data = {
    "etau":  [random.choice(array_etau)  for _ in range(n_samples)],
    "alpha": [random.choice(array_alpha) for _ in range(n_samples)],
    "beta":  [random.choice(array_beta)  for _ in range(n_samples)],
}

df_hf = pd.DataFrame(data)

In [4]:
df_hf[(df_hf.etau == 10.4) & (df_hf.alpha == 0.93) & (df_hf.beta == 0.212)]

Unnamed: 0,etau,alpha,beta


In [5]:
cv_row = pd.DataFrame([
    {"etau": 10.4, "alpha": 0.93, "beta": 0.212},
])

In [6]:
df_hf_train = df_hf.head(4)
df_hf_train = pd.concat([df_hf_train, cv_row], ignore_index=True)

In [7]:
df_hf_train

Unnamed: 0,etau,alpha,beta
0,30000.0,0.93,0.212
1,5000.0,0.95,0.208
2,10400.0,0.95,0.208
3,30000.0,0.97,0.214
4,10.4,0.93,0.212


In [8]:
df_hf_valid = df_hf.tail(40).reset_index(drop=True)

In [9]:
df_hf_valid

Unnamed: 0,etau,alpha,beta
0,10400,0.97,0.212
1,18000,0.91,0.21
2,5000,0.91,0.216
3,10400,0.95,0.21
4,5000,0.95,0.208
5,5000,0.93,0.212
6,5000,0.97,0.214
7,10400,0.89,0.212
8,18000,0.93,0.208
9,30000,0.91,0.216


In [10]:
combinations = list(itertools.product(array_etau, array_alpha, array_beta))

df_lf = pd.DataFrame(
    combinations,
    columns=["etau", "alpha", "beta"]
)

In [11]:
df_lf

Unnamed: 0,etau,alpha,beta
0,5000,0.89,0.208
1,5000,0.89,0.210
2,5000,0.89,0.212
3,5000,0.89,0.214
4,5000,0.89,0.216
...,...,...,...
95,30000,0.97,0.208
96,30000,0.97,0.210
97,30000,0.97,0.212
98,30000,0.97,0.214


In [12]:
df_hf_train["etau_idx"]  = df_hf_train["etau"].map(etau_idx_map)
df_hf_train["alpha_idx"] = df_hf_train["alpha"].map(alpha_idx_map)
df_hf_train["beta_idx"]  = df_hf_train["beta"].map(beta_idx_map)

df_hf_valid["etau_idx"]  = df_hf_valid["etau"].map(etau_idx_map)
df_hf_valid["alpha_idx"] = df_hf_valid["alpha"].map(alpha_idx_map)
df_hf_valid["beta_idx"]  = df_hf_valid["beta"].map(beta_idx_map)

df_lf["etau_idx"]  = df_lf["etau"].map(etau_idx_map)
df_lf["alpha_idx"] = df_lf["alpha"].map(alpha_idx_map)
df_lf["beta_idx"]  = df_lf["beta"].map(beta_idx_map)

In [13]:
df_hf_train

Unnamed: 0,etau,alpha,beta,etau_idx,alpha_idx,beta_idx
0,30000.0,0.93,0.212,3.0,2,2
1,5000.0,0.95,0.208,0.0,3,0
2,10400.0,0.95,0.208,1.0,3,0
3,30000.0,0.97,0.214,3.0,4,3
4,10.4,0.93,0.212,,2,2


In [14]:
df_hf_valid

Unnamed: 0,etau,alpha,beta,etau_idx,alpha_idx,beta_idx
0,10400,0.97,0.212,1,4,2
1,18000,0.91,0.21,2,1,1
2,5000,0.91,0.216,0,1,4
3,10400,0.95,0.21,1,3,1
4,5000,0.95,0.208,0,3,0
5,5000,0.93,0.212,0,2,2
6,5000,0.97,0.214,0,4,3
7,10400,0.89,0.212,1,0,2
8,18000,0.93,0.208,2,2,0
9,30000,0.91,0.216,3,1,4


In [15]:
df_lf

Unnamed: 0,etau,alpha,beta,etau_idx,alpha_idx,beta_idx
0,5000,0.89,0.208,0,0,0
1,5000,0.89,0.210,0,0,1
2,5000,0.89,0.212,0,0,2
3,5000,0.89,0.214,0,0,3
4,5000,0.89,0.216,0,0,4
...,...,...,...,...,...,...
95,30000,0.97,0.208,3,4,0
96,30000,0.97,0.210,3,4,1
97,30000,0.97,0.212,3,4,2
98,30000,0.97,0.214,3,4,3


In [16]:
df_hf_train.to_csv("data_hf_train_etau.csv", index=False)
df_hf_valid.to_csv("data_hf_valid_etau.csv", index=False)
df_lf.to_csv("data_lf_etau.csv", index=False)

## produce fcl files to run each sample

update

standard_g4_dune10kt_1x2x6.fcl physics:producers:tpcrawdecoder:structs:lifetime

standard_detsim_dune10kt_1x2x6.fcl services:LArG4Parameters:ModBoxA/ModBoxB

In [17]:
from pathlib import Path
g4_template_path = Path("./fcl/template/standard_g4_dune10kt_1x2x6_temp.fcl")
detsim_template_path = Path("./fcl/template/standard_detsim_dune10kt_1x2x6_temp.fcl")

g4_template_text = g4_template_path.read_text()
detsim_template_text = detsim_template_path.read_text()

hf_train_out_dir = Path("./fcl/output/hf_train/")
hf_train_out_dir.mkdir(parents=True, exist_ok=True)

hf_valid_out_dir = Path("./fcl/output/hf_valid/")
hf_valid_out_dir.mkdir(parents=True, exist_ok=True)

lf_out_dir = Path("./fcl/output/lf/")
lf_out_dir.mkdir(parents=True, exist_ok=True)

Run for df_hf_train

In [18]:
for row in df_hf_train.itertuples(index=False):
    etau = row.etau
    alpha = row.alpha
    beta = row.beta
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx

    g4_new_text = g4_template_text.replace("ModBoxA: <MODBOXA>", f"ModBoxA: {alpha}")
    g4_new_text = g4_new_text.replace("ModBoxB: <MODBOXB>", f"ModBoxB: {beta}")
    detsim_new_text = detsim_template_text.replace("lifetime: <ETAUVAL>", f"lifetime: {etau}")

    g4_out_path = hf_train_out_dir / f"standard_g4_dune10kt_1x2x6_etau{etau_idx}_alpha{alpha_idx}_beta{beta_idx}.fcl"
    g4_out_path.write_text(g4_new_text)

    detsim_out_path = hf_train_out_dir / f"standard_detsim_dune10kt_1x2x6_etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}.fcl"
    detsim_out_path.write_text(detsim_new_text)


Run for df_hf_valid

In [19]:
for row in df_hf_valid.itertuples(index=False):
    etau = row.etau
    alpha = row.alpha
    beta = row.beta
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx

    g4_new_text = g4_template_text.replace("ModBoxA: <MODBOXA>", f"ModBoxA: {alpha}")
    g4_new_text = g4_new_text.replace("ModBoxB: <MODBOXB>", f"ModBoxB: {beta}")
    detsim_new_text = detsim_template_text.replace("lifetime: <ETAUVAL>", f"lifetime: {etau}")

    g4_out_path = hf_valid_out_dir / f"standard_g4_dune10kt_1x2x6_etau{etau_idx}_alpha{alpha_idx}_beta{beta_idx}.fcl"
    g4_out_path.write_text(g4_new_text)

    detsim_out_path = hf_valid_out_dir / f"standard_detsim_dune10kt_1x2x6_etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}.fcl"
    detsim_out_path.write_text(detsim_new_text)


Run for df_lf

In [20]:
for row in df_lf.itertuples(index=False):
    etau = row.etau
    alpha = row.alpha
    beta = row.beta
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx

    g4_new_text = g4_template_text.replace("ModBoxA: <MODBOXA>", f"ModBoxA: {alpha}")
    g4_new_text = g4_new_text.replace("ModBoxB: <MODBOXB>", f"ModBoxB: {beta}")
    detsim_new_text = detsim_template_text.replace("lifetime: <ETAUVAL>", f"lifetime: {etau}")

    g4_out_path = lf_out_dir / f"standard_g4_dune10kt_1x2x6_etau{etau_idx}_alpha{alpha_idx}_beta{beta_idx}.fcl"
    g4_out_path.write_text(g4_new_text)

    detsim_out_path = lf_out_dir / f"standard_detsim_dune10kt_1x2x6_etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}.fcl"
    detsim_out_path.write_text(detsim_new_text)
