In [None]:
!nvidia-smi

In [None]:
!curl -L -O 'https://raw.github.com/sid-unizar/LRC/main/datasets/datasets.zip'
!unzip -o datasets.zip
!curl -L -O 'https://raw.github.com/sid-unizar/LRC/main/scripts/scripts.zip'
!unzip -o scripts.zip

In [None]:
!pip install transformers
!pip install datasets
!pip install accelerate

In [None]:
import subprocess
from datetime import datetime

# All the models used in the paper, we used just roberta-base because of time and GPU constraints
models = ["roberta-base", "roberta-large", "bert-large-uncased-whole-word-masking", "bert-base-uncased"]

train_templates = [
    "' <W1> ' <SEP> ' <W2> '",  # T1
     " <W1> <SEP> <W2> ",  # T2
    "Today, I finally discovered the relation between <W1> and <W2>.", #T3
    "Today, I finally discovered the relation between <W1> and <W2>: <W1> is the <LABEL> of <W2>." #T4
]

test_templates = [
    "' <W1> ' <SEP> ' <W2> '",  # T1
     " <W1> <SEP> <W2> ",  # T2
    "Today, I finally discovered the relation between <W1> and <W2>.", #T3
    "Today, I finally discovered the relation between <W1> and <W2>." #T4
]

datasets = ["hyperlex/lexical", "hyperlex/random"]

# Directory paths
script_path = "/content/scripts/gradedLE_train_evaluate.py"
dataset_base_path = "/content/datasets/"
output_dir = "/content/res/"

# Iterate over each dataset, model, and template
for dataset in datasets:
    train_file = f"{dataset_base_path}{dataset}/train.tsv"
    test_file = f"{dataset_base_path}{dataset}/test.tsv"
    val_file = f"{dataset_base_path}{dataset}/val.tsv"

    for model in models:
      for index, template in enumerate(train_templates):
          # Construct the command

        command = [
            "python", script_path,
            "--train_templates", template,
            "--test_templates", test_templates[index],
            "--model", "roberta-base",
            "--nepochs", "10",
            "--dir_output_results", output_dir,
            "--batch_size", "32",
            "--warm_up", "0.1",
            "--nrepetitions", "1",
            "--dataset", "hyperlex",
            "--date", datetime.now().strftime("%D-%H:%M:%S"),
            "--train_file", train_file,
            "--test_file", test_file,
            "--val_file", val_file  # Omit or modify this line if there's no validation dataset
        ]

          # Execute the command
        with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True) as process:
            for line in process.stdout:
                print(line, end='')