## Create script for missing experiments

This script reads `./results/non_targeted_check_result.csv` and `./results/targeted_check_result.csv`
and create slurm script for each missing experiments.

In [1]:
import os
from pathlib import Path
from glob import glob

import pandas as pd

from nvita.utils import create_dir

In [2]:
PATH_ROOT = Path(os.getcwd()).parent.absolute()
PATH_RESULT = os.path.join(PATH_ROOT, 'results')
PATH_OUTPUT = os.path.join(PATH_ROOT, 'slurm', 'missing')
create_dir(PATH_OUTPUT)
PATH_TARGET = os.path.join(PATH_RESULT, 'targeted_results')
PATH_UNTARGET = os.path.join(PATH_RESULT, 'non_targeted_results')

FILE_TARGET = 'targeted_check_result.csv'
FILE_UNTARGET = 'non_targeted_check_result.csv'

print(PATH_ROOT)

/home/lukec/workspace/nvita


In [3]:
# Clear previous results
for root, dirs, files in os.walk(PATH_OUTPUT):
    for file in files:
        os.remove(os.path.join(root, file))

In [4]:
df_untarget = pd.read_csv(os.path.join(PATH_RESULT, FILE_UNTARGET))
df_untarget_missing = df_untarget[df_untarget['occurrence'] == 0]

df_untarget_missing

Unnamed: 0,df_name,seed,eps,model,attack,occurrence,full_name,Unnamed: 7
1763,CNYExch,58361,0.20,RF,Non_Targeted_BR5V,0,df_CNYExch_seed_58361_model_RF_epsilon_0.2_att...,
1765,CNYExch,58361,0.20,RF,Non_Targeted_3VITA,0,df_CNYExch_seed_58361_model_RF_epsilon_0.2_att...,
1766,CNYExch,58361,0.20,RF,Non_Targeted_5VITA,0,df_CNYExch_seed_58361_model_RF_epsilon_0.2_att...,
2128,Oil,2210,0.15,GRU,Non_Targeted_BR5V,0,df_Oil_seed_2210_model_GRU_epsilon_0.15_attack...,
2133,Oil,2210,0.15,GRU,Non_Targeted_5VITA,0,df_Oil_seed_2210_model_GRU_epsilon_0.15_attack...,
...,...,...,...,...,...,...,...,...
2715,Oil,1111111,0.20,RF,Non_Targeted_BR5V,0,df_Oil_seed_1111111_model_RF_epsilon_0.2_attac...,
2716,Oil,1111111,0.20,RF,Non_Targeted_1VITA,0,df_Oil_seed_1111111_model_RF_epsilon_0.2_attac...,
2717,Oil,1111111,0.20,RF,Non_Targeted_3VITA,0,df_Oil_seed_1111111_model_RF_epsilon_0.2_attac...,
2718,Oil,1111111,0.20,RF,Non_Targeted_5VITA,0,df_Oil_seed_1111111_model_RF_epsilon_0.2_attac...,


In [5]:
df_target = pd.read_csv(os.path.join(PATH_RESULT, FILE_TARGET))
df_target_missing = df_target[df_target['occurrence'] == 0]

df_target_missing

Unnamed: 0,df_name,seed,eps,model,attack,target_direction,occurrence,full_name,Unnamed: 8
1791,NZTemp,9999,0.15,GRU,Targeted_1VITA,Positive,0,df_NZTemp_seed_9999_model_GRU_epsilon_0.15_att...,


In [6]:
def parse_attack_name(attack: str):
    """Return a valid attack name and the value of n"""
    prefix1 = 'Non_Targeted_'
    prefix2 = 'Targeted_'
    if attack.startswith(prefix1) or attack.startswith(prefix2):
        attack = attack.split('_')[-1]
        
    n = 1  # 1 is the default value for all attacks except nvita
    if attack[0].isdigit():
        n = int(attack[0])
        attack = 'NVITA'
    
    if attack == 'BR5V':
        attack = 'BRNV'
        n = 5
    return attack, n

In [7]:
df_untarget_missing.columns

Index(['df_name', 'seed', 'eps', 'model', 'attack', 'occurrence', 'full_name',
       'Unnamed: 7'],
      dtype='object')

In [8]:
df = pd.DataFrame(columns=['Target', 'Seed', 'Dataset', 'Model', 'Attack', 'Epsilon', 'N', 'Direction'])

In [9]:
# Loop through untargeted missing files
for i in range(len(df_untarget_missing)):
    row = df_untarget_missing.iloc[i]
    attackname, n = parse_attack_name(row['attack'])
    _df = pd.DataFrame([{
        'Target': 'untargeted', 
        'Seed': row['seed'], 
        'Dataset': row['df_name'], 
        'Model': row['model'], 
        'Attack': attackname, 
        'Epsilon': row['eps'], 
        'N': n, 
        'Direction': None,
    }])
    df = pd.concat([df, _df], ignore_index=True)

In [10]:
# Loop through targeted missing files
for i in range(len(df_target_missing)):
    row = df_target_missing.iloc[i]
    attackname, n = parse_attack_name(row['attack'])
    _df = pd.DataFrame([{
        'Target': 'targeted', 
        'Seed': row['seed'], 
        'Dataset': row['df_name'], 
        'Model': row['model'], 
        'Attack': attackname, 
        'Epsilon': row['eps'], 
        'N': n, 
        'Direction': row['target_direction'],
    }])
    df = pd.concat([df, _df], ignore_index=True)

In [11]:
path_output = os.path.join(PATH_RESULT, 'missing_exp.csv')
df = df.sort_values(['Target', 'Seed', 'Dataset', 'Model', 'Direction', 'Epsilon', 'Attack', 'N'])
df.to_csv(path_output, index=False)

In [12]:
TARGET_SCRIPT = {
    'untargeted': 'step4_attack_non_target.py',
    'targeted': 'step5_attack_target.py',
}
SLURM_HEADER = """#!/bin/bash
#SBATSH --job-name=nvita_missing
#SBATCH --output=log/log_%x_%j_%a.out
#SBATCH --error=log/log_%x_%j_%a.err
#SBATCH --time=72:00:00
#SBATCH --mem=8G
#SBATCH --cpus-per-task=6

module load Python/3.9.9-gimkl-2020a
source /nesi/project/uoa03620/nvita/venv/bin/activate
"""

In [13]:
def script_builder(target, dataset, model, attack, seed, eps, n, direction=None):
    script = f'python experiments/{TARGET_SCRIPT[target]} -d {dataset} -m {model} -a {attack} -s {seed} -e {eps} -n {n}'
    if target == 'targeted':
        script += f' -t {direction}'
    script += '\n'
    return script

In [14]:
slurm_scripts = []

for i in range(len(df)):
    row = df.iloc[i]
    script = script_builder(
        target=row['Target'], 
        dataset=row['Dataset'], 
        model=row['Model'], 
        attack=row['Attack'], 
        seed=row['Seed'], 
        eps=row['Epsilon'], 
        n=row['N'], 
        direction=row['Direction'],
    )
    slurm_scripts.append(script)

print(*slurm_scripts[:5])
print('# of lines:', len(slurm_scripts))

python experiments/step5_attack_target.py -d NZTemp -m GRU -a NVITA -s 9999 -e 0.15 -n 1 -t Positive
 python experiments/step4_attack_non_target.py -d Oil -m CNN -a BIM -s 2210 -e 0.2 -n 1
 python experiments/step4_attack_non_target.py -d Oil -m CNN -a BRNV -s 2210 -e 0.2 -n 5
 python experiments/step4_attack_non_target.py -d Oil -m CNN -a BRS -s 2210 -e 0.2 -n 1
 python experiments/step4_attack_non_target.py -d Oil -m CNN -a FGSM -s 2210 -e 0.2 -n 1

# of lines: 222


In [15]:
def save_script(lines, output_dir, idx):
    my_script = SLURM_HEADER + '\n' + ''.join(lines) + '\n'
    output_name = f'part_{idx}.sh'
    with open(os.path.join(PATH_ROOT, output_dir, output_name), 'w') as file:
        file.writelines(my_script)

In [16]:
# Create files
# Save script every 5 lines
file_count = 1
for i in range(0, len(slurm_scripts), 5):
    start = i
    end = len(slurm_scripts) if i + 5 >= len(slurm_scripts) else i + 5

    lines = slurm_scripts[start:end]
    save_script(lines, PATH_OUTPUT, file_count)
    file_count += 1

file_count = file_count - 1
print(f'Generated {file_count} files')

actual_count = len(glob(os.path.join(PATH_OUTPUT, 'part_*.sh')))
assert actual_count == file_count, \
    'Unmatched file count. Found: {actual_count} Received: {file_count}'

Generated 45 files


In [17]:
# Create the script to submit all files

def get_content(n):
    content = '''#!/bin/bash

for I in {{1..{}}}; do
    sbatch ./slurm/missing/part_$I.sh
done
        
squeue --me
'''.format(n)
    return content


with open(os.path.join(PATH_OUTPUT, 'run_missing.sh'), 'w') as file:
    file.writelines(get_content(file_count))