In [2]:
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [3]:
import sys
import os

# Add your Google Drive path to Python path
drive_path = "/content/drive/MyDrive/NumEval"  # Update this path
sys.path.append(drive_path)

In [4]:
!pip install gcsfs==2024.9.0
!pip install -r "/content/drive/MyDrive/NumEval/colab_requirements.txt"

Collecting gcsfs==2024.9.0
  Downloading gcsfs-2024.9.0-py2.py3-none-any.whl.metadata (1.6 kB)
Collecting fsspec==2024.6.1 (from gcsfs==2024.9.0)
  Downloading fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB)
Reason for being yanked: requirements incorrect[0m[33m
[0mDownloading gcsfs-2024.9.0-py2.py3-none-any.whl (34 kB)
Downloading fsspec-2024.6.1-py3-none-any.whl (177 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.6/177.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, gcsfs
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2024.10.0
    Uninstalling fsspec-2024.10.0:
      Successfully uninstalled fsspec-2024.10.0
  Attempting uninstall: gcsfs
    Found existing installation: gcsfs 2024.10.0
    Uninstalling gcsfs-2024.10.0:
      Successfully uninstalled gcsfs-2024.10.0
Successfully installed fsspec-2024.6.1 gcsfs-2024.9.0
Looking in indexes: https://pypi.org/simple, https://download.pytorch.

In [5]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [6]:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [None]:
import subprocess
import pandas as pd
from llama_qnli import run
from argparse import Namespace
import gc
import torch


def run_experiment(data_train_pth, has_demonstrations, is_digit_base):
    try:
        output_model_path = f"{drive_path}/models/llama/{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}"
        output_file_name = f"results_{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}.json"
        # Create args namespace directly instead of using argparse
        args = Namespace(
            data_train_pth=data_train_pth,
            num_splits=10,
            select_split_idx=2,
            is_digit_base=is_digit_base,
            has_demonstrations=has_demonstrations,
            model_name="unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
            seed=42,
            model_checkpoint="",
            task="test",
            evaluation_strategy="no",
            save_strategy="no",
            per_device_train_batch_size=2,
            per_device_eval_batch_size=2,
            lr=2e-4,
            warm_up_radio=0.1,
            gradient_accumulation_steps=4,
            num_train_epochs=30,
            output_model_path=output_model_path,
            weight_decay=0.01,
            output_file_name=output_file_name,
            output_dir=f"{drive_path}/save_res/llama",
            max_seq_length=2048,
            dtype=None,
            load_in_4bit=True,
            dataset_num_proc=2,
            f1_metric_pth=f"{drive_path}/f1.py",
            rank=8,
            lora_alpha=16,
        )
        micro_f1, macro_f1 = run(args)
        return micro_f1, macro_f1
    finally:
        # Ensure cleanup in any case
        if "args" in locals():
            del args  # Remove args variable

        # Manually delete any additional objects in run() (like model if instantiated)
        # Assuming run loads models, you can check their namespace or module content
        if torch.cuda.is_available():
            torch.cuda.empty_cache()  # Clear unused GPU memory
            torch.cuda.synchronize()  # Wait for all operations to finish

        gc.collect()  # Force garbage collection

        # Logging to confirm cleanup
        print("Memory cleanup completed after iteration.")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [8]:
def automate_experiments():
    datasets = [
        f"{drive_path}/Quantitative 101/QNLI/AWPNLI.json",
        f"{drive_path}/Quantitative 101/QNLI/NewsNLI.json",
        f"{drive_path}/Quantitative 101/QNLI/RedditNLI.json",
        f"{drive_path}/Quantitative 101/QNLI/RTE_Quant.json",
    ]
    # Create multi-level columns for DataFrame
    dataset_names = [d.split("/")[-1].split(".")[0] for d in datasets]
    metrics = ["micro_f1", "macro_f1"]
    columns = pd.MultiIndex.from_product([dataset_names, metrics])

    # Initialize results DataFrame
    results = pd.DataFrame(
        index=["icl_org", "inst_org", "icl_digit", "inst_digit"], columns=columns
    )

    for data_train_pth in datasets:
        dataset_name = data_train_pth.split("/")[-1].split(".")[0]

        # Run all combinations and store both metrics
        for setting, (demo, digit) in {
            "icl_org": (True, False),
            "inst_org": (False, False),
            "icl_digit": (True, True),
            "inst_digit": (False, True),
        }.items():
            micro, macro = run_experiment(data_train_pth, demo, digit)
            results.loc[setting, (dataset_name, "micro_f1")] = micro
            results.loc[setting, (dataset_name, "macro_f1")] = macro

    # Save results
    results.to_csv(f"{drive_path}/save_res/llama/qnli_experiment_results.csv")
    print("\nFinal Results:")
    print(results)

In [9]:
automate_experiments()

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

Map:   0%|          | 0/73 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/1.03G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

Unsloth 2024.11.9 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


micro_f1: 41.0959
macro_f1: 40.6953
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_AWPNLI_demonstrations_True_digit_False.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/73 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 49.3151
macro_f1: 48.699
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_AWPNLI_demonstrations_False_digit_False.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/73 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 38.3562
macro_f1: 38.3562
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_AWPNLI_demonstrations_True_digit_True.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/73 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 47.9452
macro_f1: 47.9354
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_AWPNLI_demonstrations_False_digit_True.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/97 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 76.2887
macro_f1: 76.0339
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_NewsNLI_demonstrations_True_digit_False.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/97 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 71.134
macro_f1: 70.4268
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_NewsNLI_demonstrations_False_digit_False.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/97 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 71.134
macro_f1: 70.6061
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_NewsNLI_demonstrations_True_digit_True.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/97 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 71.134
macro_f1: 70.6061
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_NewsNLI_demonstrations_False_digit_True.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 48.0
macro_f1: 22.2222
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_RedditNLI_demonstrations_True_digit_False.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 68.0
macro_f1: 45.2874
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_RedditNLI_demonstrations_False_digit_False.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 60.0
macro_f1: 39.6552
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_RedditNLI_demonstrations_True_digit_True.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 24.0
macro_f1: 17.004
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_RedditNLI_demonstrations_False_digit_True.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/17 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 58.8235
macro_f1: 56.4103
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_RTE_Quant_demonstrations_True_digit_False.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/17 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 47.0588
macro_f1: 43.956
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_RTE_Quant_demonstrations_False_digit_False.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/17 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 52.9412
macro_f1: 52.7778
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_RTE_Quant_demonstrations_True_digit_True.json
Memory cleanup completed after iteration.


Map:   0%|          | 0/17 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
micro_f1: 47.0588
macro_f1: 46.3158
save predict res to: /content/drive/MyDrive/NumEval/save_res/llama/results_RTE_Quant_demonstrations_False_digit_True.json
Memory cleanup completed after iteration.

Final Results:
             AWPNLI           NewsNLI          RedditNLI          RTE_Quant  \
           micro_f1 macro_f1 micro_f1 macro_f1  micro_f1 macro_f1  micro_f1   
icl_org     41.0959  40.6953  76.2887  76.0339      48.0  22.2222   58.8235   
inst_org    49.3151   48.699   71.134  70.4268      68.0  45.2874   47.0588   
icl_digit   38.3562  