In [None]:
# This code is ran on Colab Pro with A100 (V100 is also possible)
# Change paths accordingly

!git clone https://github.com/hiyouga/LLaMA-Efficient-Tuning.git
!pip install --upgrade pip
!pip install bitsandbytes>=0.39.0
!pip install -r LLaMA-Efficient-Tuning/requirements.txt
!pip install trl==0.7.2

In [None]:
import os

efficient_finetuning_folder = "LLaMA-Efficient-Tuning"

train_gpt_4_balanced = "train_gpt-4_balanced.json"
train_gpt_4_balanced_reversed = "train_gpt-4_balanced_reversed.json"
train_gpt_4_unbalanced = "train_gpt-4_unbalanced.json"
train_gpt_4_unbalanced_reversed = "train_gpt-4_unbalanced_reversed.json"

train_gpt_3_5_balanced = "train_gpt-3_5_balanced.json"
train_gpt_3_5_balanced_reversed = "train_gpt-3_5_balanced_reversed.json"
train_gpt_3_5_unbalanced = "train_gpt-3_5_unbalanced.json"
train_gpt_3_5_unbalanced_reversed = "train_gpt-3_5_unbalanced_reversed.json"

train_llama_balanced = "train_llama_2_70b_balanced.json"
train_llama_balanced_reversed = "train_llama_2_70b_balanced_reversed.json"
train_llama_unbalanced = "train_llama_2_70b_unbalanced.json"
train_llama_unbalanced_reversed = "train_llama_2_70b_unbalanced_reversed.json"

train_gpt_4_balanced_boosted = "train_gpt-4_balanced_boosted.json"

test = "test.json"
test_r = "test_r.json"

In [None]:
import json
def add_json_file(efficient_finetuning_folder, json_file_name):
    # Replace {username} with your actual username
    data_info_file = f"{efficient_finetuning_folder}/data/dataset_info.json"

    # Load the data_info.json file
    with open(data_info_file, 'r') as f:
        data_info = json.load(f)

    # Create a new key by removing the .json extension from the file name
    new_key = json_file_name.replace('.json', '')

    # Add the new key to the data_info dictionary
    data_info[new_key] = {
        'file_name': json_file_name
    }

    # Save the updated data_info.json file
    with open(data_info_file, 'w') as f:
        json.dump(data_info, f, indent=4)

    print(f'Added {new_key} to data_info.json')

add_json_file(efficient_finetuning_folder, train_gpt_4_balanced)
add_json_file(efficient_finetuning_folder, train_gpt_4_balanced_reversed)
add_json_file(efficient_finetuning_folder, train_gpt_4_unbalanced)
add_json_file(efficient_finetuning_folder, train_gpt_4_unbalanced_reversed)

add_json_file(efficient_finetuning_folder, train_gpt_3_5_balanced)
add_json_file(efficient_finetuning_folder, train_gpt_3_5_balanced_reversed)
add_json_file(efficient_finetuning_folder, train_gpt_3_5_unbalanced)
add_json_file(efficient_finetuning_folder, train_gpt_3_5_unbalanced_reversed)

add_json_file(efficient_finetuning_folder, train_llama_balanced)
add_json_file(efficient_finetuning_folder, train_llama_balanced_reversed)
add_json_file(efficient_finetuning_folder, train_llama_unbalanced)
add_json_file(efficient_finetuning_folder, train_llama_unbalanced_reversed)

add_json_file(efficient_finetuning_folder, train_gpt_4_balanced_boosted)

add_json_file(efficient_finetuning_folder, test)
add_json_file(efficient_finetuning_folder, test_r)

In [None]:
os.environ["TRANSFORMERS_CACHE"] = ".cache/huggingface/"
!huggingface-cli login --token ### HUGGINGFACE API KEY ###

# FACT-GPT

In [None]:
def train_valid_llama(model_size, train_data):

    command = f"""!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py \
        --stage sft \
        --model_name_or_path "meta-llama/Llama-2-{model_size}-chat-hf" \
        --do_train \
        --dataset "{train_data}" \
        --template "default" \
        --finetuning_type "lora" \
        --lora_target "q_proj,v_proj" \
        --output_dir "data/train_valid_{model_size}_{train_data}" \
        --overwrite_cache \
        --per_device_train_batch_size 4 \
        --gradient_accumulation_steps 4 \
        --lr_scheduler_type "cosine" \
        --logging_steps 1 \
        --save_steps 61 \
        --val_size 0.2 \
        --evaluation_strategy steps \
        --eval_steps 61 \
        --learning_rate "5e-5" \
        --num_train_epochs 3.0 \
        --plot_loss True \
        --fp16"""

    print(command)

In [None]:
def test_llama(model_size, train_data):

    if 'reverse' in train_data:
        suffix='_r'
    else:
        suffix=''

    command = f"""!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py \
    --stage sft \
    --model_name_or_path 'meta-llama/Llama-2-{model_size}-chat-hf' \
    --do_predict \
    --dataset 'test{suffix}' \
    --template 'default' \
    --finetuning_type 'lora' \
    --checkpoint_dir 'data/train_valid_{model_size}_{train_data}' \
    --output_dir 'data/train_valid_{model_size}_{train_data}/test-endpoint' \
    --per_device_eval_batch_size 8 \
    --max_samples 10000 \
    --temperature 0.01 \
    --top_p 0.01 \
    --predict_with_generate"""

    print(command)

In [None]:
def test_llama_checkpoint(model_size, train_data, checkpoint):

    if 'reverse' in train_data:
        suffix='_r'
    else:
        suffix=''

    command = f"""!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py \
    --stage sft \
    --model_name_or_path 'meta-llama/Llama-2-{model_size}-chat-hf' \
    --do_predict \
    --dataset 'test{suffix}' \
    --template 'default' \
    --finetuning_type 'lora' \
    --checkpoint_dir 'data/train_valid_{model_size}_{train_data}/checkpoint-{checkpoint}' \
    --output_dir 'data/train_valid_{model_size}_{train_data}/test-{checkpoint}' \
    --per_device_eval_batch_size 8 \
    --max_samples 10000 \
    --temperature 0.01 \
    --top_p 0.01 \
    --predict_with_generate"""

    print(command)

### Train-test 13b

In [None]:
train_valid_llama('13b', 'train_gpt-4_balanced')
train_valid_llama('13b', 'train_gpt-4_balanced_reversed')
train_valid_llama('13b', 'train_gpt-4_unbalanced')
train_valid_llama('13b', 'train_gpt-4_unbalanced_reversed')

train_valid_llama('13b', 'train_gpt-3_5_balanced')
train_valid_llama('13b', 'train_gpt-3_5_balanced_reversed')
train_valid_llama('13b', 'train_gpt-3_5_unbalanced')
train_valid_llama('13b', 'train_gpt-3_5_unbalanced_reversed')

train_valid_llama('13b', 'train_llama_2_70b_unbalanced')
train_valid_llama('13b', 'train_llama_2_70b_unbalanced_reversed')
train_valid_llama('13b', 'train_llama_2_70b_balanced')
train_valid_llama('13b', 'train_llama_2_70b_balanced_reversed')

In [None]:
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_gpt-4_balanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_gpt-4_balanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_gpt-4_balanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_gpt-4_balanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_gpt-4_unbalanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_gpt-4_unbalanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_gpt-4_unbalanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_gpt-4_unbalanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_gpt-3_5_balanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_gpt-3_5_balanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_gpt-3_5_balanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_gpt-3_5_balanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_gpt-3_5_unbalanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_gpt-3_5_unbalanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_gpt-3_5_unbalanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_gpt-3_5_unbalanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_llama_2_70b_unbalanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_llama_2_70b_unbalanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_llama_2_70b_unbalanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_llama_2_70b_unbalanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_llama_2_70b_balanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_llama_2_70b_balanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-13b-chat-hf"         --do_train         --dataset "train_llama_2_70b_balanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_13b_train_llama_2_70b_balanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16

In [None]:
test_llama('13b', 'train_gpt-4_balanced')
test_llama('13b', 'train_gpt-4_balanced_reversed')
test_llama('13b', 'train_gpt-4_unbalanced')
test_llama('13b', 'train_gpt-4_unbalanced_reversed')

test_llama('13b', 'train_gpt-3_5_balanced')
test_llama('13b', 'train_gpt-3_5_balanced_reversed')
test_llama('13b', 'train_gpt-3_5_unbalanced')
test_llama('13b', 'train_gpt-3_5_unbalanced_reversed')

test_llama('13b', 'train_llama_2_70b_unbalanced')
test_llama('13b', 'train_llama_2_70b_unbalanced_reversed')
test_llama('13b', 'train_llama_2_70b_balanced')
test_llama('13b', 'train_llama_2_70b_balanced_reversed')

In [None]:
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate

In [None]:
train_valid_llama('7b', 'train_gpt-4_balanced')
train_valid_llama('7b', 'train_gpt-4_balanced_reversed')
train_valid_llama('7b', 'train_gpt-4_unbalanced')
train_valid_llama('7b', 'train_gpt-4_unbalanced_reversed')

train_valid_llama('7b', 'train_gpt-3_5_balanced')
train_valid_llama('7b', 'train_gpt-3_5_balanced_reversed')
train_valid_llama('7b', 'train_gpt-3_5_unbalanced')
train_valid_llama('7b', 'train_gpt-3_5_unbalanced_reversed')

train_valid_llama('7b', 'train_llama_2_70b_unbalanced')
train_valid_llama('7b', 'train_llama_2_70b_unbalanced_reversed')
train_valid_llama('7b', 'train_llama_2_70b_balanced')
train_valid_llama('7b', 'train_llama_2_70b_balanced_reversed')

In [None]:
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_gpt-4_balanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_gpt-4_balanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_gpt-4_balanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_gpt-4_balanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_gpt-4_unbalanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_gpt-4_unbalanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_gpt-4_unbalanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_gpt-4_unbalanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_gpt-3_5_balanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_gpt-3_5_balanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_gpt-3_5_balanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_gpt-3_5_balanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_gpt-3_5_unbalanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_gpt-3_5_unbalanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_gpt-3_5_unbalanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_gpt-3_5_unbalanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_llama_2_70b_unbalanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_llama_2_70b_unbalanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_llama_2_70b_unbalanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_llama_2_70b_unbalanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_llama_2_70b_balanced"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_llama_2_70b_balanced"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py         --stage sft         --model_name_or_path "meta-llama/Llama-2-7b-chat-hf"         --do_train         --dataset "train_llama_2_70b_balanced_reversed"         --template "default"         --finetuning_type "lora"         --lora_target "q_proj,v_proj"         --output_dir "data/train_valid_7b_train_llama_2_70b_balanced_reversed"         --overwrite_cache         --per_device_train_batch_size 4         --gradient_accumulation_steps 4         --lr_scheduler_type "cosine"         --logging_steps 1         --save_steps 61         --val_size 0.2         --evaluation_strategy steps         --eval_steps 61         --learning_rate "5e-5"         --num_train_epochs 3.0         --plot_loss True         --fp16

In [None]:
test_llama('7b', 'train_gpt-4_balanced')
test_llama('7b', 'train_gpt-4_balanced_reversed')
test_llama('7b', 'train_gpt-4_unbalanced')
test_llama('7b', 'train_gpt-4_unbalanced_reversed')

test_llama('7b', 'train_gpt-3_5_balanced')
test_llama('7b', 'train_gpt-3_5_balanced_reversed')
test_llama('7b', 'train_gpt-3_5_unbalanced')
test_llama('7b', 'train_gpt-3_5_unbalanced_reversed')

test_llama('7b', 'train_llama_2_70b_unbalanced')
test_llama('7b', 'train_llama_2_70b_unbalanced_reversed')
test_llama('7b', 'train_llama_2_70b_balanced')
test_llama('7b', 'train_llama_2_70b_balanced_reversed')

In [None]:
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/test-endpoint'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate

### Test-checkpoint

In [None]:
for checkpoint in range(61, 550, 61):
    test_llama_checkpoint('13b', 'train_gpt-4_balanced', checkpoint)
    test_llama_checkpoint('13b', 'train_gpt-4_balanced_reversed', checkpoint)
    test_llama_checkpoint('13b', 'train_gpt-4_unbalanced', checkpoint)
    test_llama_checkpoint('13b', 'train_gpt-4_unbalanced_reversed', checkpoint)

for checkpoint in range(61, 550, 61):
    test_llama_checkpoint('13b', 'train_gpt-3_5_balanced', checkpoint)
    test_llama_checkpoint('13b', 'train_gpt-3_5_balanced_reversed', checkpoint)
    test_llama_checkpoint('13b', 'train_gpt-3_5_unbalanced', checkpoint)
    test_llama_checkpoint('13b', 'train_gpt-3_5_unbalanced_reversed', checkpoint)

for checkpoint in range(61, 550, 61):
    test_llama_checkpoint('13b', 'train_llama_2_70b_unbalanced', checkpoint)
    test_llama_checkpoint('13b', 'train_llama_2_70b_unbalanced_reversed', checkpoint)
    test_llama_checkpoint('13b', 'train_llama_2_70b_balanced', checkpoint)
    test_llama_checkpoint('13b', 'train_llama_2_70b_balanced_reversed', checkpoint)

for checkpoint in range(61, 550, 61):
    test_llama_checkpoint('7b', 'train_gpt-4_balanced', checkpoint)
    test_llama_checkpoint('7b', 'train_gpt-4_balanced_reversed', checkpoint)
    test_llama_checkpoint('7b', 'train_gpt-4_unbalanced', checkpoint)
    test_llama_checkpoint('7b', 'train_gpt-4_unbalanced_reversed', checkpoint)

for checkpoint in range(61, 550, 61):
    test_llama_checkpoint('7b', 'train_gpt-3_5_balanced', checkpoint)
    test_llama_checkpoint('7b', 'train_gpt-3_5_balanced_reversed', checkpoint)
    test_llama_checkpoint('7b', 'train_gpt-3_5_unbalanced', checkpoint)
    test_llama_checkpoint('7b', 'train_gpt-3_5_unbalanced_reversed', checkpoint)

for checkpoint in range(61, 550, 61):
    test_llama_checkpoint('7b', 'train_llama_2_70b_unbalanced', checkpoint)
    test_llama_checkpoint('7b', 'train_llama_2_70b_unbalanced_reversed', checkpoint)
    test_llama_checkpoint('7b', 'train_llama_2_70b_balanced', checkpoint)
    test_llama_checkpoint('7b', 'train_llama_2_70b_balanced_reversed', checkpoint)

In [None]:
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced/checkpoint-61'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced/checkpoint-61'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced/checkpoint-122'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced/checkpoint-122'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced/checkpoint-183'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced/checkpoint-183'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced/checkpoint-244'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced/checkpoint-244'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced/checkpoint-305'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced/checkpoint-305'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced/checkpoint-366'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced/checkpoint-366'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced/checkpoint-427'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced/checkpoint-427'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced/checkpoint-488'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced/checkpoint-488'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced/checkpoint-549'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_13b_train_gpt-4_balanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced/checkpoint-549'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_13b_train_gpt-4_unbalanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced/checkpoint-61'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/checkpoint-61'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced/checkpoint-122'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/checkpoint-122'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced/checkpoint-183'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/checkpoint-183'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced/checkpoint-244'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/checkpoint-244'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced/checkpoint-305'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/checkpoint-305'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced/checkpoint-366'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/checkpoint-366'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced/checkpoint-427'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/checkpoint-427'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced/checkpoint-488'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/checkpoint-488'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced/checkpoint-549'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_13b_train_gpt-3_5_balanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/checkpoint-549'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_13b_train_gpt-3_5_unbalanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/checkpoint-61'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced/checkpoint-61'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/checkpoint-122'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced/checkpoint-122'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/checkpoint-183'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced/checkpoint-183'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/checkpoint-244'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced/checkpoint-244'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/checkpoint-305'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced/checkpoint-305'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/checkpoint-366'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced/checkpoint-366'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/checkpoint-427'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced/checkpoint-427'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/checkpoint-488'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced/checkpoint-488'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/checkpoint-549'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_13b_train_llama_2_70b_unbalanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced/checkpoint-549'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-13b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_13b_train_llama_2_70b_balanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced/checkpoint-61'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced/checkpoint-61'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced/checkpoint-122'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced/checkpoint-122'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced/checkpoint-183'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced/checkpoint-183'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced/checkpoint-244'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced/checkpoint-244'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced/checkpoint-305'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced/checkpoint-305'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced/checkpoint-366'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced/checkpoint-366'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced/checkpoint-427'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced/checkpoint-427'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced/checkpoint-488'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced/checkpoint-488'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced/checkpoint-549'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_7b_train_gpt-4_balanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced/checkpoint-549'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_7b_train_gpt-4_unbalanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced/checkpoint-61'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/checkpoint-61'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced/checkpoint-122'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/checkpoint-122'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced/checkpoint-183'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/checkpoint-183'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced/checkpoint-244'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/checkpoint-244'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced/checkpoint-305'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/checkpoint-305'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced/checkpoint-366'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/checkpoint-366'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced/checkpoint-427'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/checkpoint-427'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced/checkpoint-488'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/checkpoint-488'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced/checkpoint-549'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_7b_train_gpt-3_5_balanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/checkpoint-549'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_7b_train_gpt-3_5_unbalanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/checkpoint-61'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced/checkpoint-61'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/checkpoint-61'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/test-61'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/checkpoint-122'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced/checkpoint-122'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/checkpoint-122'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/test-122'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/checkpoint-183'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced/checkpoint-183'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/checkpoint-183'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/test-183'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/checkpoint-244'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced/checkpoint-244'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/checkpoint-244'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/test-244'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/checkpoint-305'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced/checkpoint-305'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/checkpoint-305'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/test-305'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/checkpoint-366'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced/checkpoint-366'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/checkpoint-366'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/test-366'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/checkpoint-427'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced/checkpoint-427'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/checkpoint-427'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/test-427'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/checkpoint-488'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced/checkpoint-488'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/checkpoint-488'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/test-488'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/checkpoint-549'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_7b_train_llama_2_70b_unbalanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced/checkpoint-549'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate
!CUDA_VISIBLE_DEVICES=0 python LLaMA-Efficient-Tuning/src/train_bash.py     --stage sft     --model_name_or_path 'meta-llama/Llama-2-7b-chat-hf'     --do_predict     --dataset 'test_r'     --template 'default'     --finetuning_type 'lora'     --checkpoint_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/checkpoint-549'     --output_dir 'data/train_valid_7b_train_llama_2_70b_balanced_reversed/test-549'     --per_device_eval_batch_size 8     --max_samples 10000     --temperature 0.01     --top_p 0.01     --predict_with_generate