In [1]:
from google.colab import drive

drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
import os

# Add your Google Drive path to Python path
drive_path = "/content/drive/MyDrive/NumEval"  # Update this path
sys.path.append(drive_path)

In [3]:
!pip install gcsfs==2024.9.0
!pip install -r "/content/drive/MyDrive/NumEval/colab_requirements.txt"

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118


In [None]:
import torch
import gc
from instruction_tuning_qp import run
from argparse import Namespace


def run_experiment(
    data_train_pth,
    data_dev_pth,
    data_test_pth,
    has_demonstrations,
    is_digit_base,
    dataset_type,
):
    try:
        # Update paths for Colab
        output_model_path = f"/content/drive/MyDrive/NumEval/models/{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}"
        output_file_name = f"results_{data_train_pth.split('/')[-1].split('.')[0]}_demonstrations_{has_demonstrations}_digit_{is_digit_base}.json"

        # Create args namespace
        args = Namespace(
            data_train_pth=data_train_pth,
            data_dev_pth=data_dev_pth,
            data_test_pth=data_test_pth,
            num_splits=10,
            select_split_idx=2,
            is_digit_base=is_digit_base,
            has_demonstrations=has_demonstrations,
            model_name="google/flan-t5-base",
            seed=42,
            model_checkpoint="",
            task="train",
            evaluation_strategy="epoch",
            save_strategy="no",
            per_device_train_batch_size=10,
            per_device_eval_batch_size=10,
            lr=5e-7,
            warm_up_radio=0.1,
            gradient_accumulation_steps=1,
            num_train_epochs=30,
            output_model_path=output_model_path,
            weight_decay=0.01,
            output_file_name=output_file_name,
            output_dir="/content/drive/MyDrive/NumEval/save_res",
            has_dev=True,
            dataset_type=dataset_type,
        )

        run(args)  # Execute the model training/testing pipeline

    finally:
        # Ensure cleanup in any case
        if "args" in locals():
            del args  # Remove args variable

        # Manually delete any additional objects in run() (like model if instantiated)
        # Assuming run loads models, you can check their namespace or module content
        if torch.cuda.is_available():
            torch.cuda.empty_cache()  # Clear unused GPU memory
            torch.cuda.synchronize()  # Wait for all operations to finish

        gc.collect()  # Force garbage collection

        # Logging to confirm cleanup
        print("Memory cleanup completed after iteration.")


def automate_experiments():

    data_folder_pth = f"{drive_path}/Quantitative 101/QP"
    datasets = [
        {
            "dataset_type": "comment",
            "train": f"{data_folder_pth}/Numeracy600K_comment_train.json",
            "dev": f"{data_folder_pth}/Numeracy600K_comment_dev.json",
            "test": f"{data_folder_pth}/Numeracy600K_comment_test.json",
        },
        {
            "dataset_type": "headline",
            "train": f"{data_folder_pth}/Numeracy600K_headline_train.json",
            "dev": f"{data_folder_pth}/Numeracy600K_headline_dev.json",
            "test": f"{data_folder_pth}/Numeracy600K_headline_test.json",
        },
    ]

    demonstrations_options = [True, False]

    # demonstrations_options = [False]

    digit_base_options = [True, False]

    # digit_base_options = [False]
    for dataset in datasets:
        for has_demonstrations in demonstrations_options:

            for is_digit_base in digit_base_options:
                data_train_pth = dataset["train"]
                data_dev_pth = dataset["dev"]
                data_test_pth = dataset["test"]
                print(
                    f"Running experiment with {data_train_pth}, has_demonstrations={has_demonstrations}, is_digit_base={is_digit_base}"
                )

                run_experiment(
                    data_train_pth,
                    data_dev_pth,
                    data_test_pth,
                    has_demonstrations,
                    is_digit_base,
                    dataset["dataset_type"],
                )

In [None]:
automate_experiments()

In [None]:
# Importing the necessary module to manage Colab's runtime
from google.colab import runtime

# This will disconnect and delete the runtime
runtime.unassign()