In [85]:
import sys
#I want to import from the parent directory
sys.path.append('../src')
import os
from utils.path_utils import get_last_run_number

### Hyperparameters

In [86]:
model = "mlp"   # mlp, lstm, cnn
vocab_size = 0.1
seed = 57
dataset = "FMereani.csv"

### Folders

In [87]:
data_folder = "data"
runs_folder = "runs"

adv_folder = "adversarial_agents"
det_folder = "detectors"
vocab_file_name = "vocabulary.csv"

### Scripts

In [88]:
prepare_dataset = "src/prepare_dataset.py"
train_detector = "src/train_detector.py"
train_adversarial_agent = "src/train_adversarial_agent.py"
test_detector = "src/test_detector.py"
test_adversarial_agent = "src/test_adversarial_agent.py"
test_validity_mutated_dataset = "src/test_validity_mutated_dataset.py"
analyze_validity = "src/analyze_validity.py"

### Pipeline

In [89]:
vocab_folder = str(int(vocab_size * 100))
vocab_file = os.path.join(data_folder, vocab_folder, vocab_file_name)

In [90]:
### Run Server
command = "./serve_backend.sh"
print(command)

./serve_backend.sh


In [91]:
### Prepare dataset
command = f"""python {prepare_dataset} 
--dataset {os.path.join(data_folder, dataset)} 
--save_path {data_folder}
--seed {seed}
--vocab_size {vocab_size}""".strip()
print(" ".join(command.splitlines()))

python src/prepare_dataset.py  --dataset data/FMereani.csv  --save_path data --seed 57 --vocab_size 0.1


In [92]:
### Train detector
command = f"""python {train_detector}
--trainset {os.path.join(data_folder, vocab_folder,det_folder, "train.csv")}
--valset {os.path.join(data_folder, vocab_folder,det_folder, "val.csv")}
--vocabulary {os.path.join(data_folder, vocab_folder, vocab_file_name)}
--model {model}
--seed {seed}
""".strip()
print(" ".join(command.splitlines()))

python src/train_detector.py --trainset data/10/detectors/train.csv --valset data/10/detectors/val.csv --vocabulary data/10/vocabulary.csv --model lstm --seed 57


In [93]:
### Test detector
run_to_check = None
if run_to_check is None:
    last_run = get_last_run_number(os.path.join("../"+runs_folder, model, vocab_folder))
    run_to_check = os.path.join(runs_folder, model, vocab_folder, f"run_{last_run}")
else:
    run_to_check = os.path.join(runs_folder, model, vocab_folder, f"run_{run_to_check}")

command = f"""python {test_detector}
--testset {os.path.join(data_folder, vocab_folder, det_folder, "test.csv")}
--vocab_file {os.path.join(data_folder, vocab_folder, vocab_file_name)}
--model {model}
--checkpoint_folder {run_to_check}
--seed {seed}""".strip()
print(" ".join(command.splitlines()))

python src/test_detector.py --testset data/10/detectors/test.csv --vocab_file data/10/vocabulary.csv --model lstm --checkpoint_folder runs/lstm/10/run_0 --seed 57


In [94]:
### Train adversarial agent
command = f"""python {train_adversarial_agent}
--trainset {os.path.join(data_folder, vocab_folder,adv_folder, "train.csv")}
--valset {os.path.join(data_folder, vocab_folder,adv_folder, "val.csv")}
--config_detector {os.path.join(run_to_check, "config.json")}
--seed {seed}""".strip()
print(" ".join(command.splitlines()))

python src/train_adversarial_agent.py --trainset data/10/adversarial_agents/train.csv --valset data/10/adversarial_agents/val.csv --config_detector runs/lstm/10/run_0/config.json --seed 57


In [95]:
### Test adversarial agent
run_agent_to_check = None
if run_agent_to_check is None:
    last_run = get_last_run_number(os.path.join("../"+run_to_check, "adversarial_agent"))
    run_agent_to_check = os.path.join(run_to_check, "adversarial_agent", f"run_{last_run}")
else:
    run_agent_to_check = os.path.join(run_to_check, "adversarial_agent", f"run_{run_agent_to_check}")


command = f"""python {test_adversarial_agent}
--testset {os.path.join(data_folder, vocab_folder,adv_folder, "test.csv")}
--config_detector {os.path.join(run_to_check, "config.json")}
--checkpoint {os.path.join(run_agent_to_check, "best_model.zip")}
--seed {seed}""".strip()
print(" ".join(command.splitlines()))


python src/test_adversarial_agent.py --testset data/10/adversarial_agents/test.csv --config_detector runs/lstm/10/run_0/config.json --checkpoint runs/lstm/10/run_0/adversarial_agent/run_2/best_model.zip --seed 57


In [96]:
### Test validity of mutated dataset
command = f"""python {test_validity_mutated_dataset}
--dataset {os.path.join(run_agent_to_check, "empirical_study_set.csv")}
--vocab {os.path.join(data_folder, vocab_folder, vocab_file_name)}
--seed {seed}""".strip()
print(" ".join(command.splitlines()))

python src/test_validity_mutated_dataset.py --dataset runs/lstm/10/run_0/adversarial_agent/run_2/empirical_study_set.csv --vocab data/10/vocabulary.csv --seed 57


In [97]:
### Test detectors against the mutated dataset
command = f"""python {test_detector}
--testset {os.path.join(run_agent_to_check, "successes_test_set.csv")}
--vocab_file {os.path.join(data_folder, vocab_folder, vocab_file_name)}
--model {model}
--checkpoint_folder {run_to_check}
--test_file_name mutated_test_results.json
--seed {seed}""".strip()
print(" ".join(command.splitlines()))

python src/test_detector.py --testset runs/lstm/10/run_0/adversarial_agent/run_2/successes_test_set.csv --vocab_file data/10/vocabulary.csv --model lstm --checkpoint_folder runs/lstm/10/run_0 --test_file_name mutated_test_results.json --seed 57


In [98]:
### Analyze validity
command = f"""python {analyze_validity}
--dataset {os.path.join(run_agent_to_check, "validity.csv")}
--seed {seed}""".strip()
print(" ".join(command.splitlines()))

python src/analyze_validity.py --dataset runs/lstm/10/run_0/adversarial_agent/run_2/validity.csv --seed 57
