In [None]:
# from google.colab import drive

# drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import sys
import os

# Add your Google Drive path to Python path
# drive_path = "/content/drive/MyDrive/NumEval"  # Update this path
drive_path = "/content"
sys.path.append(drive_path)

In [None]:
!pip install gcsfs==2024.9.0
requirement_path = f"{drive_path}/colab_requirements.txt"
!pip install -r {requirement_path}

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118


In [None]:
!pip install unsloth

!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
!pip install --upgrade xformers

In [5]:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [None]:
import torch
import gc
from llama_qp import run
from argparse import Namespace


def run_experiment(
    data_train_pth,
    data_dev_pth,
    data_test_pth,
    has_demonstrations,
    is_digit_base,
    dataset_type,
    is_text_base,
):
    try:
        output_model_path = f"{drive_path}/models/llama/{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}_text_{is_text_base}"
        output_file_name = f"results_{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}_text_{is_text_base}.json"
        # Create args namespace directly instead of using argparse
        args = Namespace(
            data_train_pth=data_train_pth,
            data_dev_pth=data_dev_pth,
            data_test_pth=data_test_pth,
            num_splits=10,
            select_split_idx=2,
            is_digit_base=is_digit_base,
            has_demonstrations=has_demonstrations,
            model_name="unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
            seed=42,
            model_checkpoint="",
            task="train",
            evaluation_strategy="no",
            save_strategy="no",
            per_device_train_batch_size=5,
            per_device_eval_batch_size=5,
            lr=3e-4,
            warm_up_radio=0.1,
            gradient_accumulation_steps=2,
            num_train_epochs=1,
            output_model_path=output_model_path,
            weight_decay=0.01,
            output_file_name=output_file_name,
            output_dir=f"{drive_path}/NumEval/save_res/llama",
            max_seq_length=2048,
            dtype=None,
            load_in_4bit=True,
            dataset_num_proc=2,
            f1_metric_pth=f"{drive_path}/f1.py",
            rank=8,
            lora_alpha=16,
            has_dev=True,
            dataset_type=dataset_type,
            is_text_base=is_text_base,
        )
        run(args)
    finally:
        # Ensure cleanup in any case
        if "args" in locals():
            del args  # Remove args variable
        # Manually delete any additional objects in run() (like model if instantiated)
        # Assuming run loads models, you can check their namespace or module content
        if torch.cuda.is_available():
            torch.cuda.empty_cache()  # Clear unused GPU memory
            torch.cuda.synchronize()  # Wait for all operations to finish

        gc.collect()  # Force garbage collection

        # Logging to confirm cleanup
        print("Memory cleanup completed after iteration.")


def automate_experiments():
    data_folder_pth = f"{drive_path}/converted_jsons"
    training_files = [
        (
            "comment_subsampled_train_converted.json",
            "comment_subsampled_dev_converted.json",
            "Numeracy600K_comment_test_converted.json",
            "comment",
        ),
        (
            "headline_subsampled_train_converted.json",
            "headline_subsampled_dev_converted.json",
            "Numeracy600K_headline_test_converted.json",
            "headline",
        ),
    ]

    demonstrations_options = [True, False]

    # demonstrations_options = [False]

    # digit_base_options = [True, False]

    digit_base_options = [False]

    is_text_base = True

    for train_file, dev_file, test_file, dataset_type in training_files:
        data_train_pth = f"{data_folder_pth}/{train_file}"
        data_dev_pth = f"{data_folder_pth}/{dev_file}"
        data_test_pth = f"{data_folder_pth}/{test_file}"
        for has_demonstrations in demonstrations_options:
            for is_digit_base in digit_base_options:
                print(
                    f"Running experiment with {data_train_pth}, has_demonstrations={has_demonstrations}, is_digit_base={is_digit_base}, is_text_base={is_text_base}"
                )
                run_experiment(
                    data_train_pth,
                    data_dev_pth,
                    data_test_pth,
                    has_demonstrations,
                    is_digit_base,
                    dataset_type,
                    is_text_base,
                )

In [None]:
automate_experiments()

Running experiment with /content/drive/MyDrive/NumEval/comment_subsampled_train.json, has_demonstrations=True, is_digit_base=True


Map:   0%|          | 0/19772 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.10: Fast Llama patching. Transformers:4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2024.11.10 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


Map (num_proc=2):   0%|          | 0/19772 [00:00<?, ? examples/s]

Map:   0%|          | 0/19772 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 19,772 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 5 | Gradient Accumulation steps = 2
\        /    Total batch size = 10 | Total steps = 1,977
 "-____-"     Number of trainable parameters = 5,636,096
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33ma55570869[0m ([33ma55570869-northeastern-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
500,0.5922
1000,0.2632
1500,0.2349


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/comment_subsampled_train.json, has_demonstrations=True, is_digit_base=False


Map:   0%|          | 0/19772 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.10: Fast Llama patching. Transformers:4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/19772 [00:00<?, ? examples/s]

Map:   0%|          | 0/19772 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 19,772 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 5 | Gradient Accumulation steps = 2
\        /    Total batch size = 10 | Total steps = 1,977
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.5831
1000,0.2433
1500,0.2148


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/comment_subsampled_train.json, has_demonstrations=False, is_digit_base=True


Map:   0%|          | 0/19772 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.10: Fast Llama patching. Transformers:4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/19772 [00:00<?, ? examples/s]

Map:   0%|          | 0/19772 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 19,772 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 5 | Gradient Accumulation steps = 2
\        /    Total batch size = 10 | Total steps = 1,977
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.6198
1000,0.2612
1500,0.2306


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/comment_subsampled_train.json, has_demonstrations=False, is_digit_base=False


Map:   0%|          | 0/19772 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.10: Fast Llama patching. Transformers:4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/19772 [00:00<?, ? examples/s]

Map:   0%|          | 0/19772 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 19,772 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 5 | Gradient Accumulation steps = 2
\        /    Total batch size = 10 | Total steps = 1,977
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
