In [1]:
# This cell makes sure modules are auto-loaded when you change external python files
%load_ext autoreload
%autoreload 2

In [None]:
# If you are working in Colab, then consider mounting your assignment folder to your drive
from google.colab import drive
drive.mount('/content/drive')

# Direct to your assignment folder.
%cd /content/drive/MyDrive/OML---mini-project/src

In [6]:
import numpy as np
import torch
from torch import optim

from transformers import (
    TrainingArguments, 
    Trainer, 
)

from itertools import product

from sa import SATask
from nli import MNLITask
from ner import NERTask
from qa import SQuADTask

In [3]:
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

In [4]:
if torch.cuda.is_available():
  print("Good to go!")
else:
  print("Please connect to a GPU to run this notebook.")

Please connect to a GPU to run this notebook.


In [12]:
def retrieve_model(fine_tunning_task, model_name="roberta-base"):
    if fine_tunning_task == "sa":
        task = SATask(model_name)
    elif fine_tunning_task == "nli":
        task = MNLITask(model_name)
    elif fine_tunning_task == "ner":
        task = NERTask(model_name)
    elif fine_tunning_task == "qa":
        task = SQuADTask(model_name)
    else:
        raise ValueError("Invalid task")

    return task

In [13]:
def trainer(task, scheduler):
  scheduler_name, scheduler_func = scheduler

  training_args = TrainingArguments(
    output_dir="{}-finetuned-{}-lr_scheduler-{}".format(task.model_name, task.fine_tunning_task, scheduler_name),
    learning_rate=2e-5,
    num_train_epochs=1,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    # weight_decay=0.01,
    evaluation_strategy="steps", # "no" to avoid evaluation
    eval_steps=250, 
    save_strategy="epoch", # "no" to avoid saving
    logging_strategy="steps",
    logging_steps=100,
    report_to="tensorboard",
    logging_dir="./tensorboard/{}-finetuned-{}-lr_scheduler-{}".format(task.model_name, task.fine_tunning_task, scheduler_name),
  )
    
  optimizer = optim.SGD(task.model.parameters(), lr=training_args.learning_rate)
  optimizer = optim.AdamW(task.model.parameters(), lr=training_args.learning_rate)
  scheduler = scheduler_func(optimizer)

  return Trainer(
      task.model,
      args=training_args,
      optimizers=(optimizer, scheduler),
      train_dataset=task.encoded_dataset["train"],
      eval_dataset=task.encoded_dataset["validation"],
      tokenizer=task.tokenizer,
      data_collator=task.data_collator,
      compute_metrics=task.compute_metrics
  )

In [10]:
model_name = "roberta-base"
fine_tunning_tasks = ["sa", "nli", "ner", "qa"]
learning_rate_schedulers = [
  ("ExponentialLR", lambda optimizer: optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)),
]

# All possible combinations of tasks and schedulers
tasks_and_schedulers = list(product(fine_tunning_tasks, learning_rate_schedulers))

for fine_tunning_task, scheduler in tasks_and_schedulers:
  task = retrieve_model(fine_tunning_task, model_name)
  train = trainer(task, scheduler)
  train.train()
  # train.save_model("models/{}-finetuned-{}-lr_scheduler-{}".format(task.model_name, task.fine_tunning_task, scheduler_name))
  