**Install jiant and torch**

In [None]:
!git clone https://github.com/nyu-mll/jiant.git

In [None]:
!python --version

In [None]:
!pip install torch==1.9.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
# Remove numpy from requirements-no-torch.txt before installing the requirements
with open("jiant/requirements-no-torch.txt", "r") as f:
    lines = f.readlines()
with open("jiant/requirements-no-torch.txt", "w") as f:
    for line in lines:
        if not line.strip("\n").startswith("numpy"):
            f.write(line)

In [None]:
!pip install -r jiant/requirements-no-torch.txt

In [None]:
import sys
sys.path.insert(0, "/content/jiant")
from ipywidgets import IntProgress

In [None]:
import jiant.proj.main.tokenize_and_cache as tokenize_and_cache
import jiant.proj.main.export_model as export_model
import jiant.proj.main.scripts.configurator as configurator
import jiant.proj.main.runscript as main_runscript
import jiant.shared.caching as caching
import jiant.utils.python.io as py_io
import jiant.utils.display as display
import jiant.scripts.download_data.runscript as downloader
import os

#### Add Google Drive and Google Sheets credentials

In [None]:
from google.colab import drive
drive.mount('/gdrive')

In [None]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)


#### Define the parameters

In [None]:
TASK_NAMES = ["copa", "boolq", "multirc"]
DATA_DIR = "content/tasks"
MODEL_NAME = "bert-base-multilingual-cased"
TRAIN_BATCH_SIZE = 16
EVAL_BATCH_SIZE = 16
EVAL_STEPS = 3000
EPOCHS = 10
LEARNING_RATE = 1e-5
ADAM_EPSILON=1e-8
MAX_GRAD_NORM=1.2
RUN_NAME = 7

In [None]:
# Run this when you start another training session (to free some memory from colab)
# !rm -r "/content/cache"
# !rm -r "/content/models"
# !rm -r "/content/runs"

#### Download the model and copy the Macedonian data

In [None]:
export_model.export_model(
    hf_pretrained_model_name_or_path=f"{MODEL_NAME}",
    output_base_path=f"./models/{MODEL_NAME}",
)

In [None]:
downloader.download_data(TASK_NAMES, DATA_DIR)

In [None]:
!rm -r /content/content/tasks/data/multirc/*
!rm -r /content/content/tasks/data/copa/*
!rm -r /content/content/tasks/data/boolq/*

In [None]:
!cp -r /gdrive/MyDrive/COPA/* /content/content/tasks/data/copa
!cp -r /gdrive/MyDrive/MultiRC/* /content/content/tasks/data/multirc
!cp -r /gdrive/MyDrive/BoolQ/* /content/content/tasks/data/boolq

In [None]:
# !cp -r /content/tasks /content/content/tasks

#### Tokenize and cache the data

In [None]:
for task in TASK_NAMES:
    tokenize_and_cache.main(tokenize_and_cache.RunConfiguration(
      task_config_path=f"./content/tasks/configs/{task}_config.json",
      hf_pretrained_model_name_or_path=f"{MODEL_NAME}",
      output_dir=f"./cache/{task}",
      phases=["train", "val", "test"],
      # max_seq_length = 160 # CHANGE TO 200 FOR BERT-BASED MODELS
      max_seq_length = 200
  ))


In [None]:
row1 = caching.ChunkedFilesDataCache("./cache/copa/train").load_chunk(0)[0]["data_row"]
print(row.tokens_list)
row2 = caching.ChunkedFilesDataCache("./cache/boolq/train").load_chunk(0)[0]["data_row"]
print(row2.tokens)
row3 = caching.ChunkedFilesDataCache("./cache/multirc/train").load_chunk(0)[0]["data_row"]
print(row3.tokens)

In [None]:
# Copy the config files everywhere
!mkdir -p tasks/configs && cp -r /content/content/tasks/configs/* tasks/configs/

#### Set the parameters

In [None]:
jiant_run_config = configurator.SimpleAPIMultiTaskConfigurator(
    task_config_base_path="./tasks/configs",
    task_cache_base_path="./cache",
    train_task_name_list=TASK_NAMES,
    val_task_name_list=TASK_NAMES,
    train_batch_size=TRAIN_BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    epochs=EPOCHS
).create_config()
os.makedirs("./run_configs/", exist_ok=True)
py_io.write_json(jiant_run_config, "./run_configs/jiant_run_config.json")
display.show_json(jiant_run_config)

In [None]:
run_args = main_runscript.RunConfiguration(
    jiant_task_container_config_path="./run_configs/jiant_run_config.json",
    output_dir=f"./runs/{RUN_NAME}",
    hf_pretrained_model_name_or_path=MODEL_NAME,
    model_path=f"./models/{MODEL_NAME}/model/model.p",
    model_config_path=f"./models/{MODEL_NAME}/model/config.json",
    learning_rate=LEARNING_RATE,
    eval_every_steps=EVAL_STEPS,
    do_train=True,
    do_val=True,
    do_save=True,
    do_save_last=True,
    do_save_best=True,
    keep_checkpoint_when_done=True,
    write_val_preds=True,
    write_test_preds=True,
    save_every_steps=EVAL_STEPS,
    force_overwrite=True,
)

#### Run the model

In [None]:
main_runscript.run_loop(run_args)

**Write the results in Google Sheets**

In [None]:
# Sheet 1: BoolQ (0)
# Sheet 2: COPA (1)
# Sheet 3: MultiRC (2)
# Sheet 4: Multi-task (3)
# Sheet 5: Cross-lingual BoolQ (4)
# Sheet 6: Cross-lingual COPA (5)
# Sheet 7: Cross-lingual MultiRC (6)

worksheet = gc.open('QA_results').get_worksheet(3)

# Don't forget to move to the next row before writing the current results
cell_list = worksheet.range(f'A{RUN_NAME - 2}:S{RUN_NAME -2}')

# Multi-task data:
values = [RUN_NAME - 2, MODEL_NAME, TRAIN_BATCH_SIZE, EPOCHS ,LEARNING_RATE, "AdamW",
          ADAM_EPSILON, MAX_GRAD_NORM, 
          0.42109550504710164,
          0.6631170180432663, 0.6217125382262997, 0.6217125382262997,
          0.6928955133144672, 0.64, 0.64,
          0.6830855436844401, 0.0015739769150052466, 0.0031479538300104933, 0.0]


for idx, cell in enumerate(cell_list):
    cell.value = values[idx]
worksheet.update_cells(cell_list)

#### Copy the best models to Google drive

In [None]:
!cp /content/runs/* -r /gdrive/MyDrive/BestMultiTask/