In [1]:
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
import sys
import os

# Add your Google Drive path to Python path
drive_path = "/content/drive/MyDrive/NumEval"  # Update this path
sys.path.append(drive_path)

In [3]:
!pip install gcsfs==2024.9.0
!pip install -r "/content/drive/MyDrive/NumEval/colab_requirements.txt"

Collecting gcsfs==2024.9.0
  Downloading gcsfs-2024.9.0-py2.py3-none-any.whl.metadata (1.6 kB)
Collecting fsspec==2024.6.1 (from gcsfs==2024.9.0)
  Downloading fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB)
Reason for being yanked: requirements incorrect[0m[33m
[0mDownloading gcsfs-2024.9.0-py2.py3-none-any.whl (34 kB)
Downloading fsspec-2024.6.1-py3-none-any.whl (177 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.6/177.6 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, gcsfs
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2024.10.0
    Uninstalling fsspec-2024.10.0:
      Successfully uninstalled fsspec-2024.10.0
  Attempting uninstall: gcsfs
    Found existing installation: gcsfs 2024.10.0
    Uninstalling gcsfs-2024.10.0:
      Successfully uninstalled gcsfs-2024.10.0
Successfully installed fsspec-2024.6.1 gcsfs-2024.9.0
Looking in indexes: https://pypi.org/simple, https://download.pytorch.

In [4]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [5]:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [None]:
import torch
import gc
from llama_qnli import run
from argparse import Namespace


def run_experiment(data_train_pth, has_demonstrations, is_digit_base):
    try:
        output_model_path = f"{drive_path}/models/llama/{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}"
        output_file_name = f"results_{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}.json"
        # Create args namespace directly instead of using argparse
        args = Namespace(
            data_train_pth=data_train_pth,
            num_splits=10,
            select_split_idx=2,
            is_digit_base=is_digit_base,
            has_demonstrations=has_demonstrations,
            model_name="unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
            seed=42,
            model_checkpoint="",
            task="train",
            evaluation_strategy="no",
            save_strategy="no",
            per_device_train_batch_size=2,
            per_device_eval_batch_size=2,
            lr=3e-4,
            warm_up_radio=0.1,
            gradient_accumulation_steps=2,
            num_train_epochs=30,
            output_model_path=output_model_path,
            weight_decay=0.01,
            output_file_name=output_file_name,
            output_dir=f"{drive_path}/NumEval/save_res/llama",
            max_seq_length=2048,
            dtype=None,
            load_in_4bit=True,
            dataset_num_proc=2,
            f1_metric_pth=f"{drive_path}/f1.py",
            rank=8,
            lora_alpha=16,
        )
        run(args)
    finally:
        # Ensure cleanup in any case
        if "args" in locals():
            del args  # Remove args variable
        # Manually delete any additional objects in run() (like model if instantiated)
        # Assuming run loads models, you can check their namespace or module content
        if torch.cuda.is_available():
            torch.cuda.empty_cache()  # Clear unused GPU memory
            torch.cuda.synchronize()  # Wait for all operations to finish

        gc.collect()  # Force garbage collection

        # Logging to confirm cleanup
        print("Memory cleanup completed after iteration.")


def automate_experiments():

    # Update dataset paths

    datasets = [
        # f"{drive_path}/Quantitative 101/QNLI/RedditNLI.json",
        # f"{drive_path}/Quantitative 101/QNLI/RTE_Quant.json",
        f"{drive_path}/Quantitative 101/QNLI/NewsNLI.json",
        f"{drive_path}/Quantitative 101/QNLI/AWPNLI.json",
    ]

    demonstrations_options = [True, False]

    # demonstrations_options = [True]

    digit_base_options = [True, False]

    # digit_base_options = [False]

    for data_train_pth in datasets:

        for has_demonstrations in demonstrations_options:

            for is_digit_base in digit_base_options:

                print(
                    f"Running experiment with {data_train_pth}, has_demonstrations={has_demonstrations}, is_digit_base={is_digit_base}"
                )

                run_experiment(data_train_pth, has_demonstrations, is_digit_base)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [7]:
automate_experiments()

Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/NewsNLI.json, has_demonstrations=True, is_digit_base=True


tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/1.03G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

Unsloth 2024.11.9 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


Map (num_proc=2):   0%|          | 0/871 [00:00<?, ? examples/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 871 | Num Epochs = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 6,540
 "-____-"     Number of trainable parameters = 5,636,096
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
500,0.447
1000,0.3132
1500,0.0927
2000,0.0187
2500,0.0101
3000,0.0113
3500,0.0032
4000,0.0042
4500,0.0049
5000,-0.0002


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/NewsNLI.json, has_demonstrations=True, is_digit_base=False


Map:   0%|          | 0/871 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/871 [00:00<?, ? examples/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 871 | Num Epochs = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 6,540
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.4304
1000,0.1624
1500,0.0662
2000,0.0193
2500,0.0065
3000,0.0038
3500,0.0009
4000,-0.0002
4500,-0.0002
5000,-0.0002


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/NewsNLI.json, has_demonstrations=False, is_digit_base=True


Map:   0%|          | 0/871 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/871 [00:00<?, ? examples/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 871 | Num Epochs = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 6,540
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.3996
1000,0.1194
1500,0.0502
2000,0.0203
2500,0.0137
3000,0.0077
3500,0.008
4000,0.013
4500,0.0007
5000,-0.0002


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/NewsNLI.json, has_demonstrations=False, is_digit_base=False


Map:   0%|          | 0/871 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/871 [00:00<?, ? examples/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 871 | Num Epochs = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 6,540
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.4013
1000,0.1347


Step,Training Loss
500,0.4013
1000,0.1347
1500,0.0474
2000,0.0097
2500,0.005
3000,0.005
3500,0.0134
4000,0.0082
4500,0.0041
5000,0.0005


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/AWPNLI.json, has_demonstrations=True, is_digit_base=True


Map:   0%|          | 0/649 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/649 [00:00<?, ? examples/s]

Map:   0%|          | 0/649 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 649 | Num Epochs = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 4,860
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.3423
1000,0.1769
1500,0.1489
2000,0.1472
2500,0.1444
3000,0.147
3500,0.1415
4000,0.1363
4500,0.1172


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/AWPNLI.json, has_demonstrations=True, is_digit_base=False


Map:   0%|          | 0/649 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/649 [00:00<?, ? examples/s]

Map:   0%|          | 0/649 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 649 | Num Epochs = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 4,860
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.3381
1000,0.1759
1500,0.1485
2000,0.1606
2500,0.1432
3000,0.1652
3500,0.0972
4000,0.0584
4500,0.0149


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/AWPNLI.json, has_demonstrations=False, is_digit_base=True


Map:   0%|          | 0/649 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/649 [00:00<?, ? examples/s]

Map:   0%|          | 0/649 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 649 | Num Epochs = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 4,860
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.3344
1000,0.1769
1500,0.149
2000,0.1518
2500,0.1628
3000,0.1456
3500,0.1439
4000,0.1413
4500,0.1398


Trainging end..
Memory cleanup completed after iteration.
Running experiment with /content/drive/MyDrive/NumEval/Quantitative 101/QNLI/AWPNLI.json, has_demonstrations=False, is_digit_base=False


Map:   0%|          | 0/649 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Map (num_proc=2):   0%|          | 0/649 [00:00<?, ? examples/s]

Map:   0%|          | 0/649 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 649 | Num Epochs = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 2
\        /    Total batch size = 4 | Total steps = 4,860
 "-____-"     Number of trainable parameters = 5,636,096


Step,Training Loss
500,0.3239
1000,0.1818
1500,0.145
2000,0.1914
2500,0.1132
3000,0.1099
3500,0.0376
4000,0.0084
4500,0.0004


Trainging end..
Memory cleanup completed after iteration.
