## **HYPERPARAMETERS**

In [1]:
# training arguments:
# https://huggingface.co/docs/transformers/v4.17.0/en/main_classes/trainer#transformers.TrainingArguments


base_model = 'bert-base-chinese'
batchsize = 6
prompt_len = n_tokens = 12
lr = 5e-4
epochs = 20
scheduler_type = "linear"
wd = 0.005
warmup_ratio = 0.1
myseed = 1126

nclass = 19
numchoices = 2
TESTSIZE = 0.2

## Loading dataset & Imports

In [2]:
!pip -q install transformers
!pip -q install datasets

In [25]:
# Server Paths 
# ..data/avo727/PromptTuning/CWNdata/Sean_PT2_encoded_dataset
maindir = "/mnt/md0/data/avo727/PromptTuning"
datadir = f"{maindir}/CWN_data"
preddir = f"{maindir}/model_predictions"
###########################
datasetdir= f"{maindir}/CWNdata/Sean_PT2_encoded_dataset"

In [26]:
import pickle
from datasets import Dataset, load_metric
import datasets
from transformers import AutoModelForMultipleChoice, BertTokenizerFast
import random
import torch
import numpy as np
encoded_dataset = datasets.load_from_disk(datasetdir)

In [5]:
encoded_dataset

DatasetDict({
    train: Dataset({
        features: ['type_class', 'eng word', 'word', 'pos', 'instance', 'src', 'dot_type_2', 'dot_type_1', 'label', 'zh_type_class', 'zh_dot_type', 'zh_dot_type_2', 'zh_dot_gloss', 'is_2choice', 'is_one_ans', 'class_selector', '__index_level_0__', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 455
    })
    test: Dataset({
        features: ['type_class', 'eng word', 'word', 'pos', 'instance', 'src', 'dot_type_2', 'dot_type_1', 'label', 'zh_type_class', 'zh_dot_type', 'zh_dot_type_2', 'zh_dot_gloss', 'is_2choice', 'is_one_ans', 'class_selector', '__index_level_0__', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 114
    })
})

## Seeding (skipped, seed in 🤗 trainer)

In [6]:
'''
def seeding(myseed):
  torch.manual_seed(myseed)
  torch.cuda.manual_seed(myseed)
  torch.cuda.manual_seed_all(myseed)
  torch.backends.cudnn.benchmark = False
  torch.backends.cudnn.deterministic = True
  random.seed(myseed)
  np.random.seed(myseed)
'''

'\ndef seeding(myseed):\n  torch.manual_seed(myseed)\n  torch.cuda.manual_seed(myseed)\n  torch.cuda.manual_seed_all(myseed)\n  torch.backends.cudnn.benchmark = False\n  torch.backends.cudnn.deterministic = True\n  random.seed(myseed)\n  np.random.seed(myseed)\n'

## Data collator 

In [7]:
tokenizer = BertTokenizerFast.from_pretrained(base_model)

In [29]:
from dataclasses import dataclass
@dataclass
class DataCollatorForMultipleChoice:
    """
    Data collator that will dynamically pad the inputs for multiple choice received.
    """
    tokenizer = tokenizer
    padding, trunc = True, True
    max_length =  None
    pad_to_multiple_of = None

    def __call__(self, features):
        label_name = "label" if "label" in features[0].keys() else "labels"
        pin_label = True if "label" in features[0].keys() else False
        accepted_keys = ["input_ids", "attention_mask", "label", "token_type_ids", 'class_selector']
        labels = [feature.pop(label_name) for feature in features]
        seq_classes = [feature.pop('class_selector') for feature in features]
        batch_size = len(features)
        num_choices = len(features[0]["input_ids"])
        flattened_features = [[{k: v[i] for k, v in feature.items() if k in accepted_keys} 
                               for i in range(num_choices)] for feature in features]
        flattened_features = sum(flattened_features, [])
        batch = self.tokenizer.pad(
            flattened_features,
            padding= "longest",
            max_length= self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="pt",
        )
        
        # filtering
        # Un-flatten
        batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items() if k in accepted_keys}
        
        # prompt selectors
        batch["class_selector"] = torch.tensor(seq_classes)
        batch["labels"] = torch.tensor(labels, dtype=torch.int64)
  
        return batch

In [9]:
RANGE = len(encoded_dataset['train'])
features = [{k: v for k, v in encoded_dataset["train"][i].items()} for i in range(RANGE)]
batch = DataCollatorForMultipleChoice()(features)

In [10]:
'''
for i in range(RANGE):
    print([len(encoded_dataset["train"][i]['input_ids'][j]) for j in range(numchoices)])
    a_set_of_inputs = [tokenizer.decode(batch["input_ids"][i][j].tolist()) for j in range(numchoices)]
    # batch["input_ids"].shape: (455, 2, 300), (batch_size, numchoice, max_length)
    # print([len(x) for x in a_set_of_inputs]) # not 300 because of extra spaces, and [PAD] is considered 5 words instead of one token
    print('--------')
'''

'\nfor i in range(RANGE):\n    print([len(encoded_dataset["train"][i][\'input_ids\'][j]) for j in range(numchoices)])\n    a_set_of_inputs = [tokenizer.decode(batch["input_ids"][i][j].tolist()) for j in range(numchoices)]\n    # batch["input_ids"].shape: (455, 2, 300), (batch_size, numchoice, max_length)\n    # print([len(x) for x in a_set_of_inputs]) # not 300 because of extra spaces, and [PAD] is considered 5 words instead of one token\n    print(\'--------\')\n'

## model

In [11]:
import torch
import torch.nn
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss
from transformers import BertModel, BertPreTrainedModel
from transformers.modeling_outputs import MultipleChoiceModelOutput

from PromptTuningBERT import BertPromptForMultipleChoice

In [12]:
%load_ext autoreload
%autoreload 2

## logging

In [13]:
!pip install wandb
import wandb
wandb.login() # 87f450abf77ebb78b46b7cf9516b1bad9d6ef540



Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnana2929[0m (use `wandb login --relogin` to force relogin)


True

In [14]:
from datetime import datetime
now = datetime.now()
timeprefix = now.strftime("%m%d-%H%M")
runname = f'{timeprefix}_RPBert'
print('Name of the run:', runname)
wandb.init(project="prompt_tuning_rp_v2", 
           name = runname,
           tags=["prompt-tuning", "regular-polysemy"],
           group="bert")
wandb.config.update({'n_tokens':n_tokens})
# https://docs.wandb.ai/guides/integrations/huggingface

Name of the run: 0312-1300_RPBert


## freezed bert

In [15]:
config = {
    'n_tokens':prompt_len ,
    'n_class':nclass,
    'numchoices': numchoices,
    'train_bert': False
}
model = BertPromptForMultipleChoice.from_pretrained(base_model, config)

Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertPromptForMultipleChoice: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertPromptForMultipleChoice from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertPromptForMultipleChoice from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertPromptForMultipleChoice were not initialized from the model checkpoint at bert-ba

** total param is 175873
** train bert? False


In [16]:
model.to('cuda')

BertPromptForMultipleChoice(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwi

In [17]:
from transformers import TrainingArguments, Trainer
args = TrainingArguments(
    runname, 
    learning_rate=lr,
    per_device_train_batch_size=batchsize,
    per_device_eval_batch_size=batchsize,
    num_train_epochs=epochs,
    gradient_accumulation_steps=1, 
    weight_decay=wd,
    warmup_ratio = warmup_ratio,
    lr_scheduler_type=scheduler_type,
    logging_strategy="epoch",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    report_to="wandb",
    load_best_model_at_end = False, # True: use eval loss
    seed = myseed
)

In [18]:
def compute_metrics(eval_predictions):
    predictions, label_ids = eval_predictions
    preds = np.argmax(predictions, axis=1)
    return {'accuracy': (preds == label_ids).astype(np.float32).mean().item()}

In [19]:
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorForMultipleChoice(),
    compute_metrics=compute_metrics,
)

In [20]:
# bert freeze version 
trainer.train()

The following columns in the training set  don't have a corresponding argument in `BertPromptForMultipleChoice.forward` and have been ignored: zh_dot_gloss, instance, zh_dot_type_2, eng word, dot_type_2, src, type_class, dot_type_1, is_one_ans, word, __index_level_0__, pos, zh_type_class, zh_dot_type, is_2choice.
***** Running training *****
  Num examples = 455
  Num Epochs = 20
  Instantaneous batch size per device = 6
  Total train batch size (w. parallel, distributed & accumulation) = 6
  Gradient Accumulation steps = 1
  Total optimization steps = 1520
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7019,0.720685,0.438596
2,0.7025,0.69796,0.508772
3,0.6854,0.715204,0.54386
4,0.6691,0.830253,0.473684
5,0.6524,0.704666,0.587719
6,0.6415,0.694773,0.631579
7,0.5895,0.721976,0.614035
8,0.5561,0.812229,0.640351
9,0.5746,0.762866,0.631579
10,0.5279,0.794231,0.605263


The following columns in the evaluation set  don't have a corresponding argument in `BertPromptForMultipleChoice.forward` and have been ignored: zh_dot_gloss, instance, zh_dot_type_2, eng word, dot_type_2, src, type_class, dot_type_1, is_one_ans, word, __index_level_0__, pos, zh_type_class, zh_dot_type, is_2choice.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 6
Saving model checkpoint to 0312-1300_RPBert/checkpoint-76
Configuration saved in 0312-1300_RPBert/checkpoint-76/config.json
Model weights saved in 0312-1300_RPBert/checkpoint-76/pytorch_model.bin
tokenizer config file saved in 0312-1300_RPBert/checkpoint-76/tokenizer_config.json
Special tokens file saved in 0312-1300_RPBert/checkpoint-76/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `BertPromptForMultipleChoice.forward` and have been ignored: zh_dot_gloss, instance, zh_dot_type_2, eng word, dot_type_2, src, type_class, dot_type_1, is_one_ans, wor

Special tokens file saved in 0312-1300_RPBert/checkpoint-836/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `BertPromptForMultipleChoice.forward` and have been ignored: zh_dot_gloss, instance, zh_dot_type_2, eng word, dot_type_2, src, type_class, dot_type_1, is_one_ans, word, __index_level_0__, pos, zh_type_class, zh_dot_type, is_2choice.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 6
Saving model checkpoint to 0312-1300_RPBert/checkpoint-912
Configuration saved in 0312-1300_RPBert/checkpoint-912/config.json
Model weights saved in 0312-1300_RPBert/checkpoint-912/pytorch_model.bin
tokenizer config file saved in 0312-1300_RPBert/checkpoint-912/tokenizer_config.json
Special tokens file saved in 0312-1300_RPBert/checkpoint-912/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `BertPromptForMultipleChoice.forward` and have been ignored: zh_dot_gloss, i

TrainOutput(global_step=1520, training_loss=0.5190150612278989, metrics={'train_runtime': 61.4679, 'train_samples_per_second': 148.045, 'train_steps_per_second': 24.728, 'total_flos': 615881950804344.0, 'train_loss': 0.5190150612278989, 'epoch': 20.0})

## Inference

In [31]:
print("Evaluating...")
import numpy as np
# timeprefix = 0312-1300
best_ckpt = 'checkpoint-1064'
best_ckpt_path = f'{runname}/{best_ckpt}'
model = BertPromptForMultipleChoice.from_pretrained(best_ckpt_path, config)

trainer = Trainer(
    model,
    tokenizer=tokenizer,
    data_collator=DataCollatorForMultipleChoice(),
    compute_metrics=compute_metrics,)

PredOutput = trainer.predict(
    test_dataset = encoded_dataset["test"])
    
labels = PredOutput.label_ids
print('acc:', compute_metrics((PredOutput.predictions, labels)))


logits_path = f"{preddir}/{timeprefix}_{best_ckpt}_logits"
preds_path = f"{preddir}/{timeprefix}_{best_ckpt}_predictions"
labels_path = f"{preddir}/{timeprefix}_{best_ckpt}_labels" # save because the dataset is shuffled
preds = np.argmax(PredOutput.predictions, axis=-1)

np.save(logits_path, PredOutput.predictions)
np.save(preds_path, preds)
np.save(labels_path,labels)

loading configuration file 0312-1300_RPBert/checkpoint-1064/config.json
Model config BertConfig {
  "_name_or_path": "bert-base-chinese",
  "architectures": [
    "BertPromptForMultipleChoice"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.16.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

loading weights file 0312-1300_RPBert/checkpoint-1064/pytorch_model.bin

Evaluating...


All model checkpoint weights were used when initializing BertPromptForMultipleChoice.

All the weights of BertPromptForMultipleChoice were initialized from the model checkpoint at 0312-1300_RPBert/checkpoint-1064.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertPromptForMultipleChoice for predictions without further training.
No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set  don't have a corresponding argument in `BertPromptForMultipleChoice.forward` and have been ignored: zh_dot_gloss, instance, zh_dot_type_2, eng word, dot_type_2, src, type_class, dot_type_1, is_one_ans, word, __index_le

** total param is 175873
** train bert? False


acc: {'accuracy': 0.7105262875556946}


In [22]:
wandb.finish()




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/accuracy,▁▃▄▂▅▆▆▆▆▅▇▆▇█▆▇▇▇▇▇
eval/loss,▂▁▁▄▁▁▂▄▂▃▄▄▄▄▅▇▇█▆▆
eval/runtime,▃▁▁▃▁▂▃▂▃▃▁▃▅▆▅▇▇▇▆█
eval/samples_per_second,▆██▆█▇▆▇▆▆█▆▄▃▄▂▂▂▃▁
eval/steps_per_second,▆██▆█▇▆▇▆▆█▆▄▃▄▂▂▂▃▁
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/learning_rate,▅██▇▇▆▆▆▅▅▅▄▄▃▃▃▂▂▁▁
train/loss,███▇▇▇▆▅▆▅▅▃▃▂▃▂▂▁▁▁
train/total_flos,▁

0,1
eval/accuracy,0.66667
eval/loss,0.94496
eval/runtime,0.2117
eval/samples_per_second,538.526
eval/steps_per_second,89.754
train/epoch,20.0
train/global_step,1520.0
train/learning_rate,0.0
train/loss,0.3553
train/total_flos,615881950804344.0
