In [1]:
slurm_prefix= f"""#!/bin/bash
#SBATCH --job-name=Training
#SBATCH --output=slurm_outputs/gemma/training_%j.txt
#SBATCH --ntasks=1
#SBATCH --cpus-per-task 10
#SBATCH --time=72:00:00
#SBATCH --mem=1024G
###SBATCH --mail-user=...
###SBATCH --mail-type=ALL
#SBATCH --no-requeue
#SBATCH --gres=gpu:8
#SBATCH --partition=gpu100
###SBATCH --nodelist=gpu266
###SBATCH --reservation=chlgrp_146
###SBATCH --nodelist=gpu271
###SBATCH --reservation=chlgrp_156
#SBATCH --export=NONE
#SBATCH --account=chlgrp
#unset SLURM_EXPORT_ENV
module load python/3.12.8
module load cuda/12.4
module load tmux
source ~/.bashrc
source /nfs/scistore23/chlgrp/ezverev/envs/noexec_emb/bin/activate
export TRANSFORMERS_CACHE='./transformer_cache'
export WANDB_MODE=disabled
"""
slurm_prefix_evals= f"""#!/bin/bash
#SBATCH --job-name=TrainingTinyLlama
#SBATCH --output=slurm_outputs/training_%j.txt
#SBATCH --ntasks=1
#SBATCH --cpus-per-task 10
#SBATCH --time=72:00:00
#SBATCH --mem=196G
###SBATCH --mail-user=...
###SBATCH --mail-type=ALL
#SBATCH --no-requeue
#SBATCH --gres=gpu:1
#SBATCH --partition=gpu100
###SBATCH --nodelist=gpu266
###SBATCH --reservation=chlgrp_146
###SBATCH --nodelist=gpu271
###SBATCH --reservation=chlgrp_156
#SBATCH --account=chlgrp

#SBATCH --export=NONE
#unset SLURM_EXPORT_ENV
module load python/3.12.8
module load cuda/12.4
module load tmux
source ~/.bashrc
source /nfs/scistore23/chlgrp/ezverev/envs/noexec_emb/bin/activate
source ./side-env/bin/activate
module load python/3.12.8

export HF_TOKEN="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"

export HF_HOME='./transformer_cache'
export WANDB_MODE=disabled
export TORCH_CUDA_ARCH_LIST="9.0"
"""
# get_slurm_prefix("gpu266")

In [2]:
import itertools
import random

# Base command (static part)
base_command = "srun --export=ALL deepspeed --num_gpus=8 --master_port=29509 fine-tune.py"

# Dictionary of parameters and their possible values.
# The key "batch_and_accum" holds the combined parameter string.
params = {
    "--model_family": ["gemma_3_12b"],
    "--train_version": ["SFTv70"], # SFTv111
    "--emb_type": ["forward_rot","single_emb", "ise"],
    "--model_ix": ["1"],
    "--run_number": [None],
    "--train_type": ["full"],
    "--num_train_epochs": ["3"],
    # Treat these two parameters as a single entry:
    "batch_and_accum": ["--per_device_train_batch_size 8 --gradient_accumulation_steps 8",
                       "--per_device_train_batch_size 4 --gradient_accumulation_steps 8"],
    "--learning_rate": ["1e-6", "5e-6", "1e-5", "2e-5"],
    "--lr_scheduler_type": ["cosine"],
    "--warmup_ratio": ["0","0.1"],
    "--logging_steps": ["10"],
    "--evaluation_strategy": ["epoch"],
    "--save_strategy": ["epoch"],
    "--eval_steps": ["1"],
    "--save_steps": ["1"],
    "--save_total_limit": ["1"],
    "--load_best_model_at_end": ["True"],
    "--prediction_loss_only": ["True"],
    "--bf16": ["True"],
    "--embedding_init": ["rot_isoclinic"],
    "--rotation_alpha": ["1.57079633"],
    "--learned_rotation": ["False"],
    "--add_linear_shift": ["False"],
    "--rotation_direction": ["right"],
    "--gradual_rotation": ["False"]
}

# Prepare keys and values for the Cartesian product.
keys = list(params.keys())
values = list(params.values())

# Generate commands for all combinations.
commands = []
command_num=0
for combo in itertools.product(*values):
    command = base_command
    for key, value in zip(keys, combo):
        if key == "batch_and_accum":
            command += " " + value
        elif key == "--run_number":
            command += f" {key} {command_num}"
        else:
            command += f" {key} {value}"
    command_num += 1
    commands.append(command + "\n")

# Write the commands to a file, with one newline between each command.
with open("gemma-3-12b_training_1.sh", "w") as file:
    file.write("\n".join([slurm_prefix] + commands[:len(commands)//2]))
with open("gemma-3-12b_training_2.sh", "w") as file:
    file.write("\n".join([slurm_prefix] + commands[len(commands)//2:]))

print(f"{len(commands)} commands have been written")



48 commands have been written


In [60]:
import itertools
import random

# Base command (static part)
base_command = "srun --export=ALL deepspeed --num_gpus=8 --master_port=29509 fine-tune.py"

# Dictionary of parameters and their possible values.
# The key "batch_and_accum" holds the combined parameter string.
params = {
    "--model_family": ["llama_2_13b"],
    "--train_version": ["SFTv110"], # SFTv111
    "--emb_type": ["forward_rot","single_emb", "ise"],
    "--model_ix": ["1"],
    "--run_number": [None],
    "--train_type": ["full"],
    "--num_train_epochs": ["3"],
    # Treat these two parameters as a single entry:
    "batch_and_accum": ["--per_device_train_batch_size 2 --gradient_accumulation_steps 4",
                       "--per_device_train_batch_size 2 --gradient_accumulation_steps 8"],
    "--learning_rate": ["1e-6", "5e-6", "1e-5", "2e-5"],
    "--lr_scheduler_type": ["cosine"],
    "--warmup_ratio": ["0","0.1"],
    "--logging_steps": ["10"],
    "--evaluation_strategy": ["epoch"],
    "--save_strategy": ["epoch"],
    "--eval_steps": ["1"],
    "--save_steps": ["1"],
    "--save_total_limit": ["1"],
    "--load_best_model_at_end": ["True"],
    "--prediction_loss_only": ["True"],
    "--bf16": ["True"],
    "--embedding_init": ["rot_isoclinic"],
    "--rotation_alpha": ["1.57079633"],
    "--learned_rotation": ["False"],
    "--add_linear_shift": ["False"],
    "--rotation_direction": ["right"],
    "--gradual_rotation": ["False"]
}

# Prepare keys and values for the Cartesian product.
keys = list(params.keys())
values = list(params.values())

# Generate commands for all combinations.
commands = []
command_num=0
for combo in itertools.product(*values):
    command = base_command
    for key, value in zip(keys, combo):
        if key == "batch_and_accum":
            command += " " + value
        elif key == "--run_number":
            command += f" {key} {command_num}"
        else:
            command += f" {key} {value}"
    command_num += 1
    commands.append(command + "\n")

# Write the commands to a file, with one newline between each command.
with open("llama_2_13b_training_1.sh", "w") as file:
    file.write("\n".join([get_slurm_prefix("gpu266")] + commands[:len(commands)//2]))
with open("llama_2_13b_training_2.sh", "w") as file:
    file.write("\n".join([get_slurm_prefix("gpu271")] + commands[len(commands)//2:]))

print(f"{len(commands)} commands have been written")



48 commands have been written


In [5]:
def generate_commands(mapping, model_name, sft):
    """
    mapping: dict of form {
        embedding_type_1: (model_type_1, run_number_1),
        embedding_type_2: (model_type_2, run_number_2),
        ...
    }
    model_name: str (e.g. "llama_3.1_8b")
    sft: str (e.g. "SFTv110")

    Returns a list of commands (strings).
    """
    commands = []

    # ------------------------------------------------------------------
    # 1) get_model_outputs.py
    #    Example:
    #    srun --export=ALL torchrun --nproc_per_node=1 --master_port=29700 \
    #         get_model_outputs.py <embedding_type> <model_name> 1 <sft> <actual_model_type> <run_number>
    # ------------------------------------------------------------------
    port = 29700
    for i, (embedding_type, (actual_model_type, run_number)) in enumerate(mapping.items()):
        port = port + i + 1
        cmd = (
            f"srun --export=ALL torchrun --nproc_per_node=1 --master_port={port} "
            f"get_model_outputs.py {embedding_type} {model_name} 1 {sft} {actual_model_type} {run_number}"
        )
        commands.append(cmd)

    # ------------------------------------------------------------------
    # 2) get_alpaca_outputs.py
    #    Example:
    #    srun --export=ALL torchrun --nproc_per_node=1 --master_port=29600 \
    #         get_alpaca_outputs.py --data-path <data_path> --use-input True \
    #         --model ../models/<model_name>/<actual_model_type>/train_checkpoints/<sft>/from_base_run_<run_number>/last/ \
    #         --embedding-type <embedding_type> --batch-size 32
    #
    #    Special note in your example:
    #    - The first two used data/tatsu-lab/alpaca_farm/eval.json
    #    - The last used data/tatsu-lab/alpaca_eval/eval.json
    #    Below we switch if embedding_type == "single_emb"; adjust as needed.
    # ------------------------------------------------------------------
    for i, (embedding_type, (actual_model_type, run_number)) in enumerate(mapping.items()):
        port = port + i + 1

        # Example logic to mimic your snippet:
        # Use 'alpaca_farm/eval.json' unless embedding_type == 'single_emb'
        if embedding_type == "single_emb":
            data_path = "../data/tatsu-lab/alpaca_eval/alpaca_eval_eval.json"
        else:
            data_path = "../data/tatsu-lab/alpaca_farm/alpaca_farm_eval.json"
        use_input = embedding_type !='single_emb'
        cmd = (
            f"srun --chdir=evals --export=ALL torchrun --nproc_per_node=1 --master_port={port} "
            f"get_alpaca_outputs.py --data-path {data_path} {'--use-input True ' if use_input else ''}"
            f"--model ../models/{model_name}/{actual_model_type}/train_checkpoints/{sft}/from_base_run_{run_number}/last/ "
            f"--embedding-type {embedding_type} --batch-size 32"
        )
        commands.append(cmd)

    # ------------------------------------------------------------------
    # 3) test_on_struq.py
    #    Example:
    #    srun --export=ALL torchrun --nproc_per_node=1 --master_port=29901 \
    #         test_on_struq.py --domain all --attack all \
    #         --model ../models/<model_name>/<actual_model_type>/train_checkpoints/<sft>/from_base_run_<run_number>/last/ \
    #         --embedding_type <embedding_type> --batch_size 32
    #
    #    In your example the ports (29901, 29904, 29905) are not consecutive.
    #    You can adjust to consecutive or keep your custom pattern.
    # ------------------------------------------------------------------
    for i, (embedding_type, (actual_model_type, run_number)) in enumerate(mapping.items()):
        # If you want consecutive:
        port = port + i + 1
        # Or replicate the example’s pattern exactly by embedding_type,
        # but that would require a custom mapping.

        cmd = (
            f"srun --chdir=struq --export=ALL torchrun --nproc_per_node=1 --master_port={port} "
            f"test_on_struq.py --domain all --attack all "
            f"--model ../models/{model_name}/{actual_model_type}/train_checkpoints/{sft}/from_base_run_{run_number}/last/ "
            f"--embedding_type {embedding_type} --batch_size 32"
        )
        commands.append(cmd)
        # ----------------------------------------------------------------
        # 4) alpaca_eval commands
        #    Example:
        #    IS_ALPACA_EVAL_2=False alpaca_eval --model_outputs ./data/tatsu-lab/alpaca_farm/llama_3.1_8b_ise_train_checkpoints_...
        #    or for single_emb, use "./data/tatsu-lab/alpaca_eval/..."
        # ------------------------------------------------------------------
    for embedding_type, (actual_model_type, run_number) in mapping.items():
        if embedding_type == "single_emb":
            directory = "alpaca_eval"
        else:
            directory = "alpaca_farm"

        # Build the JSON filename. Follows the pattern:
        # "./data/tatsu-lab/<directory>/llama_3.1_8b_<actual_model_type>_train_checkpoints_<sft>_from_base_run_<run_number>_last__l-1_s42.json"
        json_path = (
            f"./data/tatsu-lab/{directory}/"
            f"{model_name}_{actual_model_type}_train_checkpoints_{sft}_from_base_run_{run_number}_last__l-1_s42.json"
        )

        cmd = (
            f"IS_ALPACA_EVAL_2=False alpaca_eval --model_outputs {json_path}"
        )
        commands.append(cmd)

    return commands



mapping = {
    "single_emb": ("single_emb", "27"),
    "ise": ("ise", "34"),
    "forward_rot": ("forward_rot", "4"),
}
model_name = "gemma-3-4b-pt"
sft = "SFTv70"

all_commands = generate_commands(mapping, model_name, sft)

with open("gemma-3-4b_evals.sh", "w") as file:
    file.write("\n".join([slurm_prefix_evals] + all_commands))

print(f"{len(all_commands)} commands have been written")


12 commands have been written


In [30]:
commands = []
master_port = 29700
emb_type = "forward_rot"#"forward_rot" 
sft = "SFTv111"
for run_number in range(16):
    commands.append(
        f"srun --export=ALL torchrun --nproc_per_node=1 --master_port={master_port + run_number} get_model_outputs.py {emb_type} llama_3.1_8b 1 {sft} {emb_type} {run_number}"
    )
print("\n".join(commands))

srun --export=ALL torchrun --nproc_per_node=1 --master_port=29700 get_model_outputs.py single_emb llama_3.1_8b 1 SFTv111 single_emb 0
srun --export=ALL torchrun --nproc_per_node=1 --master_port=29701 get_model_outputs.py single_emb llama_3.1_8b 1 SFTv111 single_emb 1
srun --export=ALL torchrun --nproc_per_node=1 --master_port=29702 get_model_outputs.py single_emb llama_3.1_8b 1 SFTv111 single_emb 2
srun --export=ALL torchrun --nproc_per_node=1 --master_port=29703 get_model_outputs.py single_emb llama_3.1_8b 1 SFTv111 single_emb 3
srun --export=ALL torchrun --nproc_per_node=1 --master_port=29704 get_model_outputs.py single_emb llama_3.1_8b 1 SFTv111 single_emb 4
srun --export=ALL torchrun --nproc_per_node=1 --master_port=29705 get_model_outputs.py single_emb llama_3.1_8b 1 SFTv111 single_emb 5
srun --export=ALL torchrun --nproc_per_node=1 --master_port=29706 get_model_outputs.py single_emb llama_3.1_8b 1 SFTv111 single_emb 6
srun --export=ALL torchrun --nproc_per_node=1 --master_port=29

In [35]:
commands = []
master_port = 29600
emb_type = "forward_rot"#"forward_rot" 
sft = "SFTv111"
for i in range(16):
    commands.append(
        f"srun --export=ALL torchrun --nproc_per_node=1 --master_port={master_port + i} get_alpaca_outputs.py --data-path data/tatsu-lab/alpaca_farm/eval.json --use-input True --model ../models/llama_3.1_8b/{emb_type}/train_checkpoints/{sft}/from_base_run_{i}/last/ --embedding-type forward_rot --batch-size 32"
    )

emb_type = "single_emb"#"forward_rot" 
sft = "SFTv111"
for i in range(16):
    commands.append(
        f"srun --export=ALL torchrun --nproc_per_node=1 --master_port={master_port + i} get_alpaca_outputs.py --data-path data/tatsu-lab/alpaca_farm/eval.json --use-input True --model ../models/llama_3.1_8b/{emb_type}/train_checkpoints/{sft}/from_base_run_{i}/last/ --embedding-type forward_rot --batch-size 32"
    )

emb_type = "forward_rot"#"forward_rot" 
sft = "SFTv110"
for i in range(16):
    commands.append(
        f"srun --export=ALL torchrun --nproc_per_node=1 --master_port={master_port + i} get_alpaca_outputs.py --data-path data/tatsu-lab/alpaca_farm/eval.json --use-input True --model ../models/llama_3.1_8b/{emb_type}/train_checkpoints/{sft}/from_base_run_{i}/last/ --embedding-type forward_rot --batch-size 32"
    )

emb_type = "single_emb"#"forward_rot" 
sft = "SFTv110"
for i in range(16):
    commands.append(
        f"srun --export=ALL torchrun --nproc_per_node=1 --master_port={master_port + i} get_alpaca_outputs.py --data-path data/tatsu-lab/alpaca_farm/eval.json --use-input True --model ../models/llama_3.1_8b/{emb_type}/train_checkpoints/{sft}/from_base_run_{i}/last/ --embedding-type forward_rot --batch-size 32"
    )



with open("mar31_alpaca.txt", "w") as file:
    file.write("\n".join(commands))
print(f"{len(commands)} commands have been written")



64 commands have been written


In [44]:
commands = []

for emb_type in ["forward_rot", "pretrained_vanilla"]:
    if emb_type == "forward_rot": 
        alpaca_folder = "alpaca_farm"
    else:
        alpaca_folder = "alpaca_eval"
    for sft in ["SFTv110", "SFTv111"]:
        for i in range(16):
                commands.append(
        f"IS_ALPACA_EVAL_2=False alpaca_eval --model_outputs ./data/tatsu-lab/{alpaca_folder}/llama_3.1_8b_{emb_type}_train_checkpoints_{sft}_from_base_run_{i}_last__l-1_s42.json"    
                )

with open("get_alpaca_scores_mar31.sh", "w") as file:
    file.write("\n".join(["#!/usr/bin/env bash"] + commands))
print(f"{len(commands)} commands have been written")
num_of_nodes = 8
for i in range(len(commands) // num_of_nodes): 
    cur_commands = commands[num_of_nodes * i: num_of_nodes * (i + 1)]
    with open(f"get_alpaca_scores_mar31_{i}.sh", "w") as file:
        file.write("\n".join(["#!/usr/bin/env bash"] + cur_commands))
    print(f"commands [{num_of_nodes * i}: {num_of_nodes * (i + 1)}] have been written")




64 commands have been written
commands [0: 8] have been written
commands [8: 16] have been written
commands [16: 24] have been written
commands [24: 32] have been written
commands [32: 40] have been written
commands [40: 48] have been written
commands [48: 56] have been written
commands [56: 64] have been written
