## Training

In [None]:
import os
import subprocess

CUDA_DEVICES = "1,2,3,4"  # Modify as needed
OUTPUT_DIR = "path"
NUM_EPOCHS = 5
LR_POLICY = "stepLR"
OPTION = "custom"

# Set CUDA devices
os.environ["CUDA_VISIBLE_DEVICES"] = CUDA_DEVICES

command = f"""
python -m torch.distributed.launch --nproc_per_node='{len(CUDA_DEVICES.split(","))}' finetune.py \
    --base_model 'meta-llama/Llama-2-7b-hf' \
    --data_path 'tatsu-lab/alpaca' \
    --output_dir '{OUTPUT_DIR}' \
    --batch_size 128 \
    --micro_batch_size 4 \
    --num_epochs {NUM_EPOCHS} \
    --learning_rate 1e-3 \
    --learning_rate_policy {LR_POLICY} \
    --option {OPTION} \
    --cutoff_len 512 \
    --val_set_size 2000 \
    --lora_r 8 \
    --lora_alpha 16 \
    --lora_dropout 0.1
"""

subprocess.run(command, shell=True, check=True)

## Evaluation

In [None]:
import subprocess

tasks = ["piqa", "openbookqa", "social_iqa", "commonsense_qa", "mnli", "truthfulqa", "hellaswag", "ai2_arc"]
task_str = ",".join(tasks)

num_fewshot = 0
model_args = "meta-llama/Llama-2-7b-hf"
checkpoint = "/path/to/checkpoint"
BASE_DIR = "/path/to/project"
PEFT_CHECKPOINT = f"{BASE_DIR}/{checkpoint}"
OUTPUT_DIR = f"{BASE_DIR}/{checkpoint}_results/output.json"

command = f"""
python -m lm_eval \
    --model hf \
    --model_args pretrained={model_args},peft={PEFT_CHECKPOINT} \
    --tasks {task_str} \
    --device cuda \
    --batch_size auto:4 \
    --num_fewshot {num_fewshot} \
    --output_path {OUTPUT_DIR} \
    --log_samples
"""

subprocess.run(command, shell=True, check=True)