### Check Environment

In [2]:
!/opt/bin/nvidia-smi

Wed Apr  5 09:18:47 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
!python -V

Python 3.9.16


### Import GitHub Project
https://github.com/huggingface/transformers

In [4]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [5]:
%%capture

import os
from shutil import rmtree

os.chdir('/content/drive/My Drive/git')

GIT_PATH='/content/drive/My Drive/git/transformers'

FORCE_UPDATE=False
if FORCE_UPDATE:
  rmtree(GIT_PATH)

if not os.path.exists(GIT_PATH):
  !git clone https://github.com/huggingface/transformers.git -b v4.27.0

### Import Modules

In [6]:
os.chdir('/content/drive/My Drive/git/transformers')

In [7]:
%%capture

!pip install transformers==4.27.0
!pip install -r examples/pytorch/question-answering/requirements.txt

### Training

In [8]:
os.chdir('/content/drive/My Drive/git/transformers')

### Baseline BERT
distilbert-base-uncased


In [None]:
!python examples/pytorch/question-answering/run_qa.py \
            --model_name_or_path distilbert-base-uncased \
            --dataset_name squad_v2 \
            --do_train \
            --do_eval \
            --version_2_with_negative \
            --learning_rate 3e-5 \
            --num_train_epochs 2 \
            --max_seq_length 384 \
            --doc_stride 128 \
            --per_device_eval_batch_size=16  \
            --per_device_train_batch_size=16   \
            --output_dir DISTILBERT_OUTPUT_DIR \
            --overwrite_output_dir \
            --save_steps 3000 \

04/03/2023 11:44:03 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=no,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_priv

### Improved Type 1 ALBERT
albert-base-v2

In [None]:
OUTPUT_PATH = '/content/drive/My Drive/git/transformers/ALBERT_OUTPUT_DIR'

TRAIN_OR_RESUME = 'TRAIN' # change to RESUME if you want to resume from checkpoint
if TRAIN_OR_RESUME == 'TRAIN':
  rmtree(OUTPUT_PATH)

In [None]:
if TRAIN_OR_RESUME == 'TRAIN':
  !python examples/pytorch/question-answering/run_qa.py \
              --model_name_or_path albert-base-v2 \
              --dataset_name squad_v2 \
              --do_train \
              --do_eval \
              --version_2_with_negative \
              --learning_rate 3e-5 \
              --num_train_epochs 2 \
              --max_seq_length 384 \
              --doc_stride 128 \
              --per_device_eval_batch_size 8  \
              --per_device_train_batch_size 8   \
              --output_dir ALBERT_OUTPUT_DIR \
              --overwrite_output_dir \
              --save_strategy "steps" \
              --save_steps 3000
elif TRAIN_OR_RESUME == 'RESUME':
  !python examples/pytorch/question-answering/run_qa.py \
              --model_name_or_path albert-base-v2 \
              --dataset_name squad_v2 \
              --do_train \
              --do_eval \
              --version_2_with_negative \
              --learning_rate 3e-5 \
              --num_train_epochs 2 \
              --max_seq_length 384 \
              --doc_stride 128 \
              --per_device_eval_batch_size 8  \
              --per_device_train_batch_size 8   \
              --output_dir ALBERT_OUTPUT_DIR \
              --save_strategy "steps" \
              --save_steps 3000

### Improved Type 1 SpanBERT
SpanBERT/spanbert-large-cased

In [1]:
OUTPUT_PATH = '/content/drive/My Drive/git/transformers/SPANBERT_OUTPUT_DIR'

TRAIN_OR_RESUME = 'RESUME' # change to RESUME if you want to resume from checkpoint
if TRAIN_OR_RESUME == 'TRAIN':
  if os.path.exists(OUTPUT_PATH):
    rmtree(OUTPUT_PATH)

In [None]:
if TRAIN_OR_RESUME == 'TRAIN':
  !python examples/pytorch/question-answering/run_qa.py \
              --model_name_or_path SpanBERT/spanbert-large-cased \
              --dataset_name squad_v2 \
              --do_train \
              --do_eval \
              --version_2_with_negative \
              --learning_rate 3e-5 \
              --num_train_epochs 2 \
              --max_seq_length 384 \
              --doc_stride 128 \
              --per_device_eval_batch_size 8  \
              --per_device_train_batch_size 8   \
              --output_dir SPANBERT_OUTPUT_DIR \
              --overwrite_output_dir \
              --save_strategy "steps" \
              --save_steps 3000
elif TRAIN_OR_RESUME == 'RESUME':
  !python examples/pytorch/question-answering/run_qa.py \
              --model_name_or_path SpanBERT/spanbert-large-cased \
              --dataset_name squad_v2 \
              --do_train \
              --do_eval \
              --version_2_with_negative \
              --learning_rate 3e-5 \
              --num_train_epochs 2 \
              --max_seq_length 384 \
              --doc_stride 128 \
              --per_device_eval_batch_size 8  \
              --per_device_train_batch_size 8   \
              --output_dir SPANBERT_OUTPUT_DIR \
              --save_strategy "steps" \
              --save_steps 3000