In [1]:
import numpy as np
import random
import pandas as pd
import itertools

## Produce data frames for theta selections

- Making three dataframes
  - high fidelity sample for training
  - high fidelity sample for validation
  - low fidelity sample for training

- Each data frame has selection of etau, alpha and beta' with index for each variable for naming

- indices
    - etau: total 4 points
        - 0: 5 ms
        - 1: 10.4 ms
        - 2: 18 ms
        - 3: 30 ms
    - MB alpha: total 5 points
        - 0: 0.87
        - 1: 0.91
        - 2: 0.93
        - 3: 0.95
        - 4: 0.99
    - MB beta': total 5 points
        - 0: 0.206
        - 1: 0.210
        - 2: 0.212
        - 3: 0.214
        - 4: 0.218

In [2]:
model = "hn_esf"

In [3]:
array_etau = [5000, 10400, 18000, 30000]
array_alpha = [0.87, 0.91, 0.93, 0.95, 0.99]
array_beta = [0.206, 0.210, 0.212, 0.214, 0.218]

etau_idx_map  = {v: i for i, v in enumerate(array_etau)}
alpha_idx_map = {v: i for i, v in enumerate(array_alpha)}
beta_idx_map  = {v: i for i, v in enumerate(array_beta)}

In [4]:
n_samples = 44
random.seed(44)
data = {
    "etau":  [random.choice(array_etau)  for _ in range(n_samples)],
    "alpha": [random.choice(array_alpha) for _ in range(n_samples)],
    "beta":  [random.choice(array_beta)  for _ in range(n_samples)],
}

df_hf = pd.DataFrame(data)

In [5]:
df_hf[(df_hf.etau == 10400) & (df_hf.alpha == 0.93) & (df_hf.beta == 0.212)]

Unnamed: 0,etau,alpha,beta


In [6]:
cv_row = pd.DataFrame([
    {"etau": 10400, "alpha": 0.93, "beta": 0.212},
])

In [7]:
df_hf_train = df_hf.head(4)
df_hf_train = pd.concat([df_hf_train, cv_row], ignore_index=True)

In [8]:
df_hf_train

Unnamed: 0,etau,alpha,beta
0,30000,0.93,0.212
1,5000,0.95,0.206
2,10400,0.95,0.206
3,30000,0.99,0.214
4,10400,0.93,0.212


In [9]:
df_hf_valid = df_hf.tail(40).reset_index(drop=True)

In [10]:
df_hf_valid

Unnamed: 0,etau,alpha,beta
0,10400,0.99,0.212
1,18000,0.91,0.21
2,5000,0.91,0.218
3,10400,0.95,0.21
4,5000,0.95,0.206
5,5000,0.93,0.212
6,5000,0.99,0.214
7,10400,0.87,0.212
8,18000,0.93,0.206
9,30000,0.91,0.218


In [11]:
combinations = list(itertools.product(array_etau, array_alpha, array_beta))

df_lf = pd.DataFrame(
    combinations,
    columns=["etau", "alpha", "beta"]
)

In [12]:
df_lf

Unnamed: 0,etau,alpha,beta
0,5000,0.87,0.206
1,5000,0.87,0.210
2,5000,0.87,0.212
3,5000,0.87,0.214
4,5000,0.87,0.218
...,...,...,...
95,30000,0.99,0.206
96,30000,0.99,0.210
97,30000,0.99,0.212
98,30000,0.99,0.214


In [13]:
df_hf_train["etau_idx"]  = df_hf_train["etau"].map(etau_idx_map)
df_hf_train["alpha_idx"] = df_hf_train["alpha"].map(alpha_idx_map)
df_hf_train["beta_idx"]  = df_hf_train["beta"].map(beta_idx_map)

df_hf_valid["etau_idx"]  = df_hf_valid["etau"].map(etau_idx_map)
df_hf_valid["alpha_idx"] = df_hf_valid["alpha"].map(alpha_idx_map)
df_hf_valid["beta_idx"]  = df_hf_valid["beta"].map(beta_idx_map)

df_lf["etau_idx"]  = df_lf["etau"].map(etau_idx_map)
df_lf["alpha_idx"] = df_lf["alpha"].map(alpha_idx_map)
df_lf["beta_idx"]  = df_lf["beta"].map(beta_idx_map)

add first genie file idx

there are 10k genie files from prefix_0.root to prefix_99999.root

we should define which file will be used for each job using "genie_1st_file_idx + jobid"


In [14]:
df_hf_train["genie_1st_file_idx"] = df_hf_train.index * 200

In [15]:
df_hf_train

Unnamed: 0,etau,alpha,beta,etau_idx,alpha_idx,beta_idx,genie_1st_file_idx
0,30000,0.93,0.212,3,2,2,0
1,5000,0.95,0.206,0,3,0,200
2,10400,0.95,0.206,1,3,0,400
3,30000,0.99,0.214,3,4,3,600
4,10400,0.93,0.212,1,2,2,800


In [16]:
df_hf_valid["genie_1st_file_idx"] = df_hf_valid.index * 200 + 1000

In [17]:
df_hf_valid

Unnamed: 0,etau,alpha,beta,etau_idx,alpha_idx,beta_idx,genie_1st_file_idx
0,10400,0.99,0.212,1,4,2,1000
1,18000,0.91,0.21,2,1,1,1200
2,5000,0.91,0.218,0,1,4,1400
3,10400,0.95,0.21,1,3,1,1600
4,5000,0.95,0.206,0,3,0,1800
5,5000,0.93,0.212,0,2,2,2000
6,5000,0.99,0.214,0,4,3,2200
7,10400,0.87,0.212,1,0,2,2400
8,18000,0.93,0.206,2,2,0,2600
9,30000,0.91,0.218,3,1,4,2800


In [18]:
df_lf["genie_1st_file_idx"] = df_lf.index * 10 + 9000

In [19]:
df_lf

Unnamed: 0,etau,alpha,beta,etau_idx,alpha_idx,beta_idx,genie_1st_file_idx
0,5000,0.87,0.206,0,0,0,9000
1,5000,0.87,0.210,0,0,1,9010
2,5000,0.87,0.212,0,0,2,9020
3,5000,0.87,0.214,0,0,3,9030
4,5000,0.87,0.218,0,0,4,9040
...,...,...,...,...,...,...,...
95,30000,0.99,0.206,3,4,0,9950
96,30000,0.99,0.210,3,4,1,9960
97,30000,0.99,0.212,3,4,2,9970
98,30000,0.99,0.214,3,4,3,9980


In [20]:
df_hf_train.to_csv("data_hf_train_etau.csv", index=False)
df_hf_valid.to_csv("data_hf_valid_etau.csv", index=False)
df_lf.to_csv("data_lf_etau.csv", index=False)

## produce fcl files to run each sample

update

standard_g4_dune10kt_1x2x6.fcl physics:producers:tpcrawdecoder:structs:lifetime

standard_detsim_dune10kt_1x2x6.fcl services:LArG4Parameters:ModBoxA/ModBoxB

In [21]:
from pathlib import Path
g4_template_path = Path("./fcl/template/standard_g4_dune10kt_1x2x6_temp.fcl")
detsim_template_path = Path("./fcl/template/standard_detsim_dune10kt_1x2x6_temp.fcl")

g4_template_text = g4_template_path.read_text()
detsim_template_text = detsim_template_path.read_text()

hf_train_out_dir = Path("./fcl/output/hf_train/")
hf_train_out_dir.mkdir(parents=True, exist_ok=True)

hf_valid_out_dir = Path("./fcl/output/hf_valid/")
hf_valid_out_dir.mkdir(parents=True, exist_ok=True)

lf_out_dir = Path("./fcl/output/lf/")
lf_out_dir.mkdir(parents=True, exist_ok=True)

Run for df_hf_train

In [22]:
for row in df_hf_train.itertuples(index=False):
    etau = row.etau
    alpha = row.alpha
    beta = row.beta
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx

    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"

    g4_new_text = g4_template_text.replace("ModBoxA: <MODBOXA>", f"ModBoxA: {alpha}")
    g4_new_text = g4_new_text.replace("ModBoxB: <MODBOXB>", f"ModBoxB: {beta}")
    detsim_new_text = detsim_template_text.replace("lifetime: <ETAUVAL>", f"lifetime: {etau}")

    g4_out_path = hf_train_out_dir / f"standard_g4_dune10kt_1x2x6_{theta_str}.fcl"
    g4_out_path.write_text(g4_new_text)

    detsim_out_path = hf_train_out_dir / f"standard_detsim_dune10kt_1x2x6_{theta_str}.fcl"
    detsim_out_path.write_text(detsim_new_text)


Run for df_hf_valid

In [23]:
for row in df_hf_valid.itertuples(index=False):
    etau = row.etau
    alpha = row.alpha
    beta = row.beta
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx

    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"

    g4_new_text = g4_template_text.replace("ModBoxA: <MODBOXA>", f"ModBoxA: {alpha}")
    g4_new_text = g4_new_text.replace("ModBoxB: <MODBOXB>", f"ModBoxB: {beta}")
    detsim_new_text = detsim_template_text.replace("lifetime: <ETAUVAL>", f"lifetime: {etau}")

    g4_out_path = hf_valid_out_dir / f"standard_g4_dune10kt_1x2x6_{theta_str}.fcl"
    g4_out_path.write_text(g4_new_text)

    detsim_out_path = hf_valid_out_dir / f"standard_detsim_dune10kt_1x2x6_{theta_str}.fcl"
    detsim_out_path.write_text(detsim_new_text)


Run for df_lf

In [24]:
for row in df_lf.itertuples(index=False):
    etau = row.etau
    alpha = row.alpha
    beta = row.beta
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx

    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"

    g4_new_text = g4_template_text.replace("ModBoxA: <MODBOXA>", f"ModBoxA: {alpha}")
    g4_new_text = g4_new_text.replace("ModBoxB: <MODBOXB>", f"ModBoxB: {beta}")
    detsim_new_text = detsim_template_text.replace("lifetime: <ETAUVAL>", f"lifetime: {etau}")

    g4_out_path = lf_out_dir / f"standard_g4_dune10kt_1x2x6_{theta_str}.fcl"
    g4_out_path.write_text(g4_new_text)

    detsim_out_path = lf_out_dir / f"standard_detsim_dune10kt_1x2x6_{theta_str}.fcl"
    detsim_out_path.write_text(detsim_new_text)


## Produce Job submission code

In [25]:
run_all_template_path = Path("./sh/template/run_all_template.sh")
run_all_template_text = run_all_template_path.read_text()

hf_train_out_dir = Path("./sh/output/hf_train/")
hf_train_out_dir.mkdir(parents=True, exist_ok=True)

hf_valid_out_dir = Path("./sh/output/hf_valid/")
hf_valid_out_dir.mkdir(parents=True, exist_ok=True)

lf_out_dir = Path("./sh/output/lf/")
lf_out_dir.mkdir(parents=True, exist_ok=True)

make for hf train

In [26]:
for row in df_hf_train.itertuples(index=False):
    fidelity = "hf_train"
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx
    genie_1st_file_idx = row.genie_1st_file_idx

    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"

    g4_fcl = f"standard_g4_dune10kt_1x2x6_{theta_str}.fcl"
    detsim_fcl = f"standard_detsim_dune10kt_1x2x6_{theta_str}.fcl"

    run_all_new_text = run_all_template_text.replace("<MODEL>", model)
    run_all_new_text = run_all_new_text.replace("<FIDELITY>", fidelity)
    run_all_new_text = run_all_new_text.replace("<THETA_STR>", theta_str)
    run_all_new_text = run_all_new_text.replace("<FIRST_FILE_IDX>", str(genie_1st_file_idx))
    run_all_new_text = run_all_new_text.replace("<G4FCL>", g4_fcl)
    run_all_new_text = run_all_new_text.replace("<DETSIMFCL>", detsim_fcl)

    run_all_out_path = hf_train_out_dir / f"run_all_{fidelity}_{theta_str}.sh"
    run_all_out_path.write_text(run_all_new_text)



make for hf valid

In [27]:
for row in df_hf_valid.itertuples(index=False):
    fidelity = "hf_valid"
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx
    genie_1st_file_idx = row.genie_1st_file_idx

    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"

    g4_fcl = f"standard_g4_dune10kt_1x2x6_{theta_str}.fcl"
    detsim_fcl = f"standard_detsim_dune10kt_1x2x6_{theta_str}.fcl"

    run_all_new_text = run_all_template_text.replace("<MODEL>", model)
    run_all_new_text = run_all_new_text.replace("<FIDELITY>", fidelity)
    run_all_new_text = run_all_new_text.replace("<THETA_STR>", theta_str)
    run_all_new_text = run_all_new_text.replace("<FIRST_FILE_IDX>", str(genie_1st_file_idx))
    run_all_new_text = run_all_new_text.replace("<G4FCL>", g4_fcl)
    run_all_new_text = run_all_new_text.replace("<DETSIMFCL>", detsim_fcl)

    run_all_out_path = hf_valid_out_dir / f"run_all_{fidelity}_{theta_str}.sh"
    run_all_out_path.write_text(run_all_new_text)



make for lf

In [28]:
for row in df_lf.itertuples(index=False):
    fidelity = "lf"
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx
    genie_1st_file_idx = row.genie_1st_file_idx

    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"

    g4_fcl = f"standard_g4_dune10kt_1x2x6_{theta_str}.fcl"
    detsim_fcl = f"standard_detsim_dune10kt_1x2x6_{theta_str}.fcl"

    run_all_new_text = run_all_template_text.replace("<MODEL>", model)
    run_all_new_text = run_all_new_text.replace("<FIDELITY>", fidelity)
    run_all_new_text = run_all_new_text.replace("<THETA_STR>", theta_str)
    run_all_new_text = run_all_new_text.replace("<FIRST_FILE_IDX>", str(genie_1st_file_idx))
    run_all_new_text = run_all_new_text.replace("<G4FCL>", g4_fcl)
    run_all_new_text = run_all_new_text.replace("<DETSIMFCL>", detsim_fcl)

    run_all_out_path = lf_out_dir / f"run_all_{fidelity}_{theta_str}.sh"
    run_all_out_path.write_text(run_all_new_text)



Produce dirs

In [29]:
mkdir_text = "#!/bin/bash\n\n"

for row in df_hf_train.itertuples(index=False):
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx
    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"
    mkdir_text += f"mkdir -p ./hf_train/{theta_str}/anahist\n"
    mkdir_text += f"mkdir -p ./hf_train/{theta_str}/reco2\n"

Path("mkdir_hf_train.sh").write_text(mkdir_text)

483

In [30]:
mkdir_text = "#!/bin/bash\n\n"

for row in df_hf_valid.itertuples(index=False):
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx
    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"
    mkdir_text += f"mkdir -p ./hf_valid/{theta_str}/anahist\n"
    mkdir_text += f"mkdir -p ./hf_valid/{theta_str}/reco2\n"

Path("mkdir_hf_valid.sh").write_text(mkdir_text)

3773

In [31]:
mkdir_text = "#!/bin/bash\n\n"

for row in df_lf.itertuples(index=False):
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx
    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"
    mkdir_text += f"mkdir -p ./lf/{theta_str}/anahist\n"
    mkdir_text += f"mkdir -p ./lf/{theta_str}/reco2\n"

Path("mkdir_lf.sh").write_text(mkdir_text)

8213

## Produce jobsub scripts

In [32]:
submit_template_path = Path("./sh/template/submit_template.sh")
submit_template_text = submit_template_path.read_text()

hf_train_out_dir = Path("./sh/submit/hf_train/")
hf_train_out_dir.mkdir(parents=True, exist_ok=True)

hf_valid_out_dir = Path("./sh/submit/hf_valid/")
hf_valid_out_dir.mkdir(parents=True, exist_ok=True)

lf_out_dir = Path("./sh/submit/lf/")
lf_out_dir.mkdir(parents=True, exist_ok=True)

hf_train

In [33]:
for row in df_hf_train.itertuples(index=False):
    fidelity = "hf_train"
    njob = 200
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx

    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"
    run_sh = f"run_all_{fidelity}_{theta_str}.sh"

    submit_template_new_text = submit_template_text.replace("<NJOB>", str(njob))
    submit_template_new_text = submit_template_new_text.replace("<FIDELITY>", fidelity)
    submit_template_new_text = submit_template_new_text.replace("<RUNSH>", run_sh)

    submit_out_path = hf_train_out_dir / f"submit_{fidelity}_{theta_str}.sh"
    submit_out_path.write_text(submit_template_new_text)

hf_valid

In [34]:
for row in df_hf_valid.itertuples(index=False):
    fidelity = "hf_valid"
    njob = 200
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx

    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"
    run_sh = f"run_all_{fidelity}_{theta_str}.sh"

    submit_template_new_text = submit_template_text.replace("<NJOB>", str(njob))
    submit_template_new_text = submit_template_new_text.replace("<FIDELITY>", fidelity)
    submit_template_new_text = submit_template_new_text.replace("<RUNSH>", run_sh)

    submit_out_path = hf_valid_out_dir / f"submit_{fidelity}_{theta_str}.sh"
    submit_out_path.write_text(submit_template_new_text)

lf

In [35]:
for row in df_lf.itertuples(index=False):
    fidelity = "lf"
    njob = 10
    etau_idx = row.etau_idx
    alpha_idx = row.alpha_idx
    beta_idx = row.beta_idx

    theta_str = f"etau_{etau_idx}alpha_{alpha_idx}beta_{beta_idx}"
    run_sh = f"run_all_{fidelity}_{theta_str}.sh"

    submit_template_new_text = submit_template_text.replace("<NJOB>", str(njob))
    submit_template_new_text = submit_template_new_text.replace("<FIDELITY>", fidelity)
    submit_template_new_text = submit_template_new_text.replace("<RUNSH>", run_sh)

    submit_out_path = lf_out_dir / f"submit_{fidelity}_{theta_str}.sh"
    submit_out_path.write_text(submit_template_new_text)