In [1]:
from google.colab import drive

drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
import os

# Add your Google Drive path to Python path
drive_path = "/content/drive/MyDrive/NumEval"  # Update this path
sys.path.append(drive_path)

In [3]:
!pip install gcsfs==2024.9.0
!pip install -r "/content/drive/MyDrive/NumEval/colab_requirements.txt"

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118


In [4]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [5]:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [8]:
import torch
import gc
from llama_qnli_stress import run
from argparse import Namespace


def run_experiment(data_train_pth, data_dev_pth, data_test_pth, has_demonstrations, is_digit_base):
    try:
        output_model_path = f"{drive_path}/models/llama/{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}"
        output_file_name = f"results_{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}.json"
        # Create args namespace directly instead of using argparse
        args = Namespace(
            data_train_pth=data_train_pth,
            data_dev_pth=data_dev_pth,
            data_test_pth=data_test_pth,
            num_splits=10,
            select_split_idx=2,
            is_digit_base=is_digit_base,
            has_demonstrations=has_demonstrations,
            model_name="unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
            seed=42,
            model_checkpoint="",
            task="train",
            evaluation_strategy="no",
            save_strategy="no",
            per_device_train_batch_size=2,
            per_device_eval_batch_size=2,
            lr=3e-4,
            warm_up_radio=0.1,
            gradient_accumulation_steps=2,
            num_train_epochs=3,
            output_model_path=output_model_path,
            weight_decay=0.01,
            output_file_name=output_file_name,
            output_dir=f"{drive_path}/NumEval/save_res/llama",
            max_seq_length=2048,
            dtype=None,
            load_in_4bit=True,
            dataset_num_proc=2,
            f1_metric_pth=f"{drive_path}/f1.py",
            rank=8,
            lora_alpha=16,
            has_dev=True
        )
        run(args)
    finally:
        # Ensure cleanup in any case
        if "args" in locals():
            del args  # Remove args variable
        # Manually delete any additional objects in run() (like model if instantiated)
        # Assuming run loads models, you can check their namespace or module content
        if torch.cuda.is_available():
            torch.cuda.empty_cache()  # Clear unused GPU memory
            torch.cuda.synchronize()  # Wait for all operations to finish

        gc.collect()  # Force garbage collection

        # Logging to confirm cleanup
        print("Memory cleanup completed after iteration.")


def automate_experiments():
    data_folder_pth = "/content/drive/MyDrive/NumEval/Quantitative 101/QNLI/QNLI-Stress Test"
    data_train_pth = f"{data_folder_pth}/QNLI-Stress Test_train.json"
    data_dev_pth = f"{data_folder_pth}/QNLI-Stress Test_dev.json"
    data_test_pth = f"{data_folder_pth}/QNLI-Stress Test_test.json"

    demonstrations_options = [True, False]

    # demonstrations_options = [True]

    digit_base_options = [True, False]

    # digit_base_options = [False]

    for has_demonstrations in demonstrations_options:
        for is_digit_base in digit_base_options:
            print(
                f"Running experiment with {data_train_pth}, has_demonstrations={has_demonstrations}, is_digit_base={is_digit_base}"
            )
            run_experiment(
              data_train_pth,
              data_dev_pth,
              data_test_pth,
              has_demonstrations,
              is_digit_base,)

In [9]:
automate_experiments()

Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/QNLI-Stress Test/QNLI-Stress Test_train.json, has_demonstrations=True, is_digit_base=True


Map:   0%|          | 0/6475 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/6475 [00:00<?, ? examples/s]

Map:   0%|          | 0/6475 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 6,475 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 4,857
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.5006
1000,0.132
1500,0.0956
2000,0.0715
2500,0.0652
3000,0.0552
3500,0.0505
4000,0.037
4500,0.0343


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/QNLI-Stress Test/QNLI-Stress Test_train.json, has_demonstrations=True, is_digit_base=False


Map:   0%|          | 0/6475 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/6475 [00:00<?, ? examples/s]

Map:   0%|          | 0/6475 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 6,475 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 4,857
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.4999
1000,0.1575
1500,0.1417
2000,0.0837
2500,0.0562
3000,0.0637
3500,0.0545
4000,0.036
4500,0.0335


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/QNLI-Stress Test/QNLI-Stress Test_train.json, has_demonstrations=False, is_digit_base=True


Map:   0%|          | 0/6475 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/6475 [00:00<?, ? examples/s]

Map:   0%|          | 0/6475 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 6,475 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 4,857
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.4463
1000,0.1373
1500,0.086
2000,0.0736
2500,0.0535
3000,0.0537
3500,0.0512
4000,0.0349
4500,0.0318


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/QNLI-Stress Test/QNLI-Stress Test_train.json, has_demonstrations=False, is_digit_base=False


Map:   0%|          | 0/6475 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/6475 [00:00<?, ? examples/s]

Map:   0%|          | 0/6475 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 6,475 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 4,857
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.4454
1000,0.1103
1500,0.0947
2000,0.0696
2500,0.0545
3000,0.051
3500,0.0428
4000,0.032
4500,0.0305


Trainging end..
Memory cleanup completed after iteration.
