**Install jiant and torch**


In [None]:
!python --version

In [None]:
!git clone https://github.com/nyu-mll/jiant.git

In [None]:
!pip install torch==1.9.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
# Remove numpy from requirements-no-torch.txt before installing the requirements
with open("jiant/requirements-no-torch.txt", "r") as f:
    lines = f.readlines()
with open("jiant/requirements-no-torch.txt", "w") as f:
    for line in lines:
        if not line.strip("\n").startswith("numpy"):
            f.write(line)

In [None]:
!pip install -r jiant/requirements-no-torch.txt

In [None]:
import sys
sys.path.insert(0, "/content/jiant")
from ipywidgets import IntProgress

In [None]:
import os

import jiant.utils.python.io as py_io
import jiant.proj.simple.runscript as simple_run
import jiant.scripts.download_data.runscript as downloader


**Add Google Drive and Google Sheets credentials**

In [None]:
from google.colab import drive
drive.mount('/gdrive')

In [None]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)

#### Define the parameters

In [None]:
TASK_NAME = "boolq"
HF_PRETRAINED_MODEL_NAME = "microsoft/mdeberta-v3-base"
RUN_NAME = "14"
MODEL_NAME = HF_PRETRAINED_MODEL_NAME.split("/")[-1]
EXP_DIR = "content/exp"
DATA_DIR = "content/exp/tasks"
TRAIN_BATCH_SIZE = 16
EVAL_BATCH_SIZE = 16
EVAL_STEPS = 2000
EPOCHS = 10
LEARNING_RATE = 1e-5
ADAM_EPSILON=1e-8
MAX_GRAD_NORM=1.2

**Download the model and copy the Macedonian data**

In [None]:
# Use the downloader (it's makes the creation of config files easier)
downloader.download_data([TASK_NAME], DATA_DIR)

In [None]:
# Run this line when you run the script for the first time after connecting to the runtime in colab
# It deletes the original EN data and uploads the MK data
!rm -r /content/content/exp/tasks/data/boolq/*

In [None]:
# Run this when you start another training session (to free some memory from colab)
# !rm -r /content/content/exp/runs/*
# !rm -r /content/content/exp/run_configs/*

In [None]:
# Copy the Macedonian data 
!cp -r /gdrive/MyDrive/BoolQ/* /content/content/exp/tasks/data/boolq

**Set the training parameters**

In [None]:
args = simple_run.RunConfiguration(
    run_name=RUN_NAME,
    exp_dir=EXP_DIR,
    data_dir=DATA_DIR,
    hf_pretrained_model_name_or_path=HF_PRETRAINED_MODEL_NAME,
    tasks=TASK_NAME,
    train_batch_size=TRAIN_BATCH_SIZE,
    num_train_epochs=EPOCHS,
#     model_weights_path = MODEL_WEIGHTS_PATH,
#     model_cache_path = MODEL_CACHE_PATH,
    eval_every_steps=EVAL_STEPS,
    save_every_steps=EVAL_STEPS,
    do_save=True,
    do_save_last=True,
    do_save_best=True,
    save_checkpoint_every_steps=EVAL_STEPS,
    keep_checkpoint_when_done=True,
    write_val_preds=True,
    write_test_preds=True,
    learning_rate=LEARNING_RATE,
    adam_epsilon=ADAM_EPSILON,
    max_grad_norm=MAX_GRAD_NORM
)

**Run the model**

In [None]:
simple_run.run_simple(args)

**Write the results in Google Sheets**

In [None]:
# Sheet 1: BoolQ (0)
# Sheet 2: COPA (1)
# Sheet 3: MultiRC (2)
# Sheet 4: Multi-task (3)
# Sheet 5: Cross-lingual BoolQ (4)
# Sheet 6: Cross-lingual COPA (5)
# Sheet 7: Cross-lingual MultiRC (6)

worksheet = gc.open('QA_results').get_worksheet(0)

# Don't forget to move to the next row before writing the current results!!
cell_list = worksheet.range(f'A{int(RUN_NAME)+1}:L{int(RUN_NAME) +1}')

# MultiRC data:
values = [RUN_NAME, HF_PRETRAINED_MODEL_NAME, TRAIN_BATCH_SIZE, EPOCHS,LEARNING_RATE, 
          "AdamW", ADAM_EPSILON, MAX_GRAD_NORM,
          1.234834410204977, 0.7623853211009174, 
          0.7623853211009174, 0.7623853211009174]


for idx, cell in enumerate(cell_list):
    cell.value = values[idx]
worksheet.update_cells(cell_list)

**Copy the best models to Google Drive**

In [None]:
!cp /content/content/exp/runs/* -r /gdrive/MyDrive/BestBoolQ/