# Welcome to `jiant`
This notebook contains an example of fine-tuning a `bert-base-uncased` model on the MultiRC task using the simple `jiant` API.

# Install dependencies

In [1]:
%%capture
!git clone https://github.com/nyu-mll/jiant.git
# This Colab notebook already has its CUDA-runtime compatible versions of torch and torchvision installed
!pip install -r jiant/requirements-no-torch.txt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Imports

In [2]:
import sys
sys.path.insert(0, "/content/jiant")

In [3]:
import os

import jiant.utils.python.io as py_io
import jiant.proj.simple.runscript as simple_run
import jiant.scripts.download_data.runscript as downloader

# Define task and model

In [4]:
# See https://github.com/nyu-mll/jiant/blob/master/guides/tasks/supported_tasks.md for supported tasks
TASK_NAME = "multirc"

# See https://huggingface.co/models for supported models
HF_PRETRAINED_MODEL_NAME = "bert-base-uncased"

# Create directories for task data and experiment

In [5]:
# Remove forward slashes so RUN_NAME can be used as path
MODEL_NAME = HF_PRETRAINED_MODEL_NAME.split("/")[-1]
RUN_NAME = f"simple_{TASK_NAME}_{MODEL_NAME}"
EXP_DIR = "/content/exp"
DATA_DIR = "/content/exp/tasks"

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(EXP_DIR, exist_ok=True)

#Download data (uses `nlp` or direct download depending on task)

In [6]:
downloader.download_data([TASK_NAME], DATA_DIR)

Downloaded and generated configs for 'multirc' (1/1)


#Run simple `jiant` pipeline (train and evaluate on MultiRC)

In [None]:
args = simple_run.RunConfiguration(
    run_name=RUN_NAME,
    exp_dir=EXP_DIR,
    data_dir=DATA_DIR,
    hf_pretrained_model_name_or_path=HF_PRETRAINED_MODEL_NAME,
    tasks=TASK_NAME,
    train_batch_size=16,
    num_train_epochs=1,
    seed=42,
    do_save=True
)
simple_run.run_simple(args)

# Convert to PyTorch format

In [8]:
import torch

def remove_prefix(s, prefix):
    assert s.startswith(prefix)
    return s[len(prefix) :]

def load_encoder_from_transformers_weights(
    encoder, weights_dict
):
    """Find encoder weights in weights dict, load them into encoder, return any remaining weights.
    TODO: clarify how we know the encoder weights will be prefixed by transformer name.
    Args:
        encoder (PreTrainedModel): Transformer w/o heads (embedding layer + self-attention layer).
        weights_dict (Dict): model weights.
        return_remainder (bool): If True, return any leftover weights.
    Returns:
        Dict containing any leftover weights.
    """
    remainder_weights_dict = {}
    load_weights_dict = {}
    encoder_prefix = 'bert.'
    # Encoder
    for k, v in weights_dict.items():
        if k.startswith(encoder_prefix):
            load_weights_dict[remove_prefix(k, encoder_prefix)] = v
        elif k.startswith(encoder_prefix.split("-")[0]):
            # workaround for deberta-v2
            # remove "-v2" suffix. weight names are prefixed with "deberta" and not "deberta-v2"
            load_weights_dict[remove_prefix(k, encoder_prefix.split("-")[0] + ".")] = v
        else:
            remainder_weights_dict[k] = v
    encoder.load_state_dict(load_weights_dict, strict=False)
    # return encoder

In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')

### Change path to last model

In [None]:
og_format = torch.load('last_model.p')

In [None]:
load_encoder_from_transformers_weights(model, og_format)

In [None]:
model.save_pretrained('/content/drive/MyDrive/models/multirc/')