In [1]:
from google.colab import drive

drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
import os

# Add your Google Drive path to Python path
drive_path = "/content/drive/MyDrive/NumEval"  # Update this path
sys.path.append(drive_path)

In [3]:
!pip install gcsfs==2024.9.0
!pip install -r "/content/drive/MyDrive/NumEval/colab_requirements.txt"

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118


In [5]:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [None]:
import pandas as pd
from instruction_tuning_qp import run
from argparse import Namespace
import gc
import torch


def run_experiment(
    data_train_pth,
    data_dev_pth,
    data_test_pth,
    has_demonstrations,
    is_digit_base,
    is_text_base,
    dataset_type,
):
    try:
        output_model_path = f"{drive_path}/models/{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}"
        output_file_name = f"results_{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}.json"
        # Create args namespace directly instead of using argparse
        args = Namespace(
            data_train_pth=data_train_pth,
            data_dev_pth=data_dev_pth,
            data_test_pth=data_test_pth,
            num_splits=10,
            select_split_idx=2,
            is_digit_base=is_digit_base,
            has_demonstrations=has_demonstrations,
            model_name="google/flan-t5-base",
            seed=42,
            model_checkpoint="",
            task="test",
            evaluation_strategy="epoch",
            save_strategy="no",
            per_device_train_batch_size=10,
            per_device_eval_batch_size=10,
            lr=5e-7,
            warm_up_radio=0.1,
            gradient_accumulation_steps=1,
            num_train_epochs=30,
            output_model_path=output_model_path,
            weight_decay=0.01,
            output_file_name=output_file_name,
            output_dir=f"{drive_path}/save_res",
            is_text_base=is_text_base,
            has_dev=True,
            dataset_type=dataset_type,
        )
        micro_f1, macro_f1 = run(args)
        return micro_f1, macro_f1
    finally:
        # Ensure cleanup in any case
        if "args" in locals():
            del args  # Remove args variable

        # Manually delete any additional objects in run() (like model if instantiated)
        # Assuming run loads models, you can check their namespace or module content
        if torch.cuda.is_available():
            torch.cuda.empty_cache()  # Clear unused GPU memory
            torch.cuda.synchronize()  # Wait for all operations to finish

        gc.collect()  # Force garbage collection

        # Logging to confirm cleanup
        print("Memory cleanup completed after iteration.")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
def automate_experiments():
    data_folder_pth = f"{drive_path}"
    training_files = [
        {
            "dataset_type": "comment",
            "train": f"{data_folder_pth}/comment_subsampled_train.json",
            "dev": f"{data_folder_pth}/comment_subsampled_dev_train.json",
            "test": f"{data_folder_pth}/Quantitative 101/QP/Numeracy600K_comment_test.json",
        },
        {
            "dataset_type": "headline",
            "train": f"{data_folder_pth}/headline_subsampled_train.json",
            "dev": f"{data_folder_pth}/headline_subsampled_dev.json",
            "test": f"{data_folder_pth}/Quantitative 101/QP/Numeracy600K_headline_test.json",
        },
    ]

    dataset_names = ["comment", "headline"]
    metrics = ["micro_f1", "macro_f1"]
    columns = pd.MultiIndex.from_product([dataset_names, metrics])

    # Initialize results DataFrame
    results = pd.DataFrame(
        index=["icl_org", "inst_org", "icl_digit", "inst_digit"], columns=columns
    )
    # Run all combinations and store both metrics
    for data_train_pth, data_dev_pth, data_test_pth, dataset_type in training_files:
        data_train_pth = f"{data_folder_pth}/{data_train_pth}"
        data_dev_pth = f"{data_folder_pth}/{data_dev_pth}"
        data_test_pth = f"{data_folder_pth}/{data_test_pth}"
        # Run all combinations and store both metrics
        for setting, (demo, digit, text) in {
            "icl_org": (True, False, False),
            "inst_org": (False, False, False),
            "icl_digit": (True, True, False),
            "inst_digit": (False, True, False),
        }.items():
            micro, macro = run_experiment(
                data_train_pth,
                data_dev_pth,
                data_test_pth,
                demo,
                digit,
                text,
                dataset_type,
            )
            results.loc[setting, ("micro_f1")] = micro
            results.loc[setting, ("macro_f1")] = macro

        # Save results
        results.to_csv(f"{drive_path}/save_res/qp_text_experiment_results.csv")
        print("\nFinal Results:")
        print(results)

In [8]:
automate_experiments()

Map:   0%|          | 0/162 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2024.11.9 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


Model loaded to:  cuda


100%|██████████| 7/7 [00:20<00:00,  2.95s/it]


micro_f1: 56.7901
macro_f1: 56.7308
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_QQA_train_demonstrations_True_digit_False.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/162 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded to:  cuda


100%|██████████| 7/7 [00:10<00:00,  1.53s/it]


micro_f1: 54.9383
macro_f1: 54.8953
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_QQA_train_demonstrations_False_digit_False.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/162 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded to:  cuda


100%|██████████| 7/7 [00:15<00:00,  2.27s/it]


micro_f1: 54.321
macro_f1: 54.314
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_QQA_train_demonstrations_True_digit_True.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/162 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded to:  cuda


100%|██████████| 7/7 [00:09<00:00,  1.38s/it]


micro_f1: 54.321
macro_f1: 54.1463
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_QQA_train_demonstrations_False_digit_True.json
Memory cleanup completed after iteration.

Final Results:
           micro_f1 macro_f1
icl_org     56.7901  56.7308
inst_org    54.9383  54.8953
icl_digit    54.321   54.314
inst_digit   54.321  54.1463
