# T5 Triplet Augmentation 
使用 MCQ dataset訓練<br>
直接使用 trainer 訓練 <br>
Triplet 只找跟 Answer 有關的<br>

>

### GPU

In [1]:
!nvidia-smi

Tue Aug 29 07:23:49 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02              Driver Version: 530.30.02    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA TITAN RTX                On | 00000000:01:00.0 Off |                  N/A |
| 40%   38C    P8               20W / 280W|      3MiB / 24576MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA TITAN RTX                On | 00000000:02:00.0 Off |  

### Weight and Bias (Assisting Metrics, Optional)

In [2]:
!pip install wandb
!wandb login
project_name = "test on MCQ with T5 Triplet Augmentation"
import os

os.environ["WANDB_PROJECT"] = project_name

[34m[1mwandb[0m: Currently logged in as: [33mhankystyle[0m. Use [1m`wandb login --relogin`[0m to force relogin


### import

In [3]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

2023-08-29 07:23:57.940744: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-29 07:23:58.047401: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-08-29 07:23:58.521234: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvrtc.so.11.0: cannot open shared object file: No such file or directory
2023-08-29 07:23:58.521349: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvrtc.so.11.0: cannot open shared object file: No such file or direc

### Loading the dataset

In [4]:
import json
import os, sys
import fnmatch

In [7]:
def read_data(item):
    path = '../../../../../data/mcq/{}.kag.sentence.transformer.json'.format(item)
    with open(path) as f:
        data = json.load(f)
    return data

In [8]:
train = read_data('train')
test = read_data('test')

In [9]:
len(train), len(test)

(2321, 259)

In [10]:
train[0]

{'sentence': '**blank** causes rocks to roll downhill',
 'distractors': ['friction', 'erosion', 'magnetism'],
 'answer': 'gravity',
 'triplets': [['isa', 'gravity', 'force', 1.4813329875469208],
  ['relatedto', 'force', 'gravity', 1.4813329875469208],
  ['relatedto', 'gravity', 'force', 1.4813329875469208],
  ['relatedto', 'erosion', 'friction', 1.4012206196784973],
  ['relatedto', 'erosion', 'rock', 0.4012206196784973],
  ['antonym', 'sand', 'rock', 0.3557915687561035],
  ['antonym', 'rock', 'sand', 0.3557915687561035],
  ['relatedto', 'sand', 'rock', 0.3557915687561035],
  ['hassubevent', 'roll', 'rock', 0.3557915687561035],
  ['antonym', 'rock', 'water', 0.3557915687561035],
  ['antonym', 'roll', 'rock', 0.3557915687561035],
  ['relatedto', 'rocks', 'rock', 0.3557915687561035],
  ['atlocation', 'rock', 'water', 0.3557915687561035],
  ['relatedto', 'rock', 'roll', 0.3557915687561035],
  ['relatedto', 'rock', 'water', 0.3557915687561035],
  ['relatedto', 'roll', 'rock', 0.355791568756

In [11]:
train[0].keys()

dict_keys(['sentence', 'distractors', 'answer', 'triplets'])

Select Triplet with Ans Entity

In [13]:
def filter_triplet(data):
    for i, d in enumerate(data):
        ans = d['answer']
        triplets_set = d['triplets']
        filter_triplet = []
        for each_triplet in triplets_set:
            rel, source, target, score = each_triplet
            if source == ans or target == ans:
                filter_triplet.append(each_triplet)

        d['triplets'] = filter_triplet
    return data

In [14]:
train = filter_triplet(train)
test = filter_triplet(test)

In [15]:
train[0]['triplets']

[['isa', 'gravity', 'force', 1.4813329875469208],
 ['relatedto', 'force', 'gravity', 1.4813329875469208],
 ['relatedto', 'gravity', 'force', 1.4813329875469208]]

In [16]:
len(train), len(test)

(2321, 259)

In [17]:
train[0]

{'sentence': '**blank** causes rocks to roll downhill',
 'distractors': ['friction', 'erosion', 'magnetism'],
 'answer': 'gravity',
 'triplets': [['isa', 'gravity', 'force', 1.4813329875469208],
  ['relatedto', 'force', 'gravity', 1.4813329875469208],
  ['relatedto', 'gravity', 'force', 1.4813329875469208]]}

### Prepare data

In [18]:
from sklearn.model_selection import train_test_split

train, valid = train_test_split(train, random_state=777, train_size=0.9)
len(train), len(valid)

(2088, 233)

In [19]:
def processData(data, task_prefix):
    
    sentences = []
    labels = []
    answers = []
    for d in data:
        sentence = d['sentence']
        distractors = d['distractors']
        triplets = d['triplets']
        answer = d['answer']

        triplet_set = []
        for each_triplet in triplets:
            rel, source, target, weight = each_triplet
            triplet_set.append('{} {} {}'.format(source,rel,target))

        

        sentence = sentence.replace('**blank**', '_')
        # 避免dataset的label有空白
        distractors = [dis.strip() for dis in distractors]
        
        sentences.append(sentence + '</s>' + answer + '</s>' + ', '.join(triplet_set)+ '</s>')

        labels.append('_ of distractors are ' + ', '.join(distractors) + '</s>')
        answers.append(answer)
        
    return sentences, answers, labels

In [20]:
task_prefix = 'distractor generation with triplet: '
train_sent, train_answer, train_label = processData(train,  task_prefix)
valid_sent, valid_answer, valid_label = processData(valid, task_prefix)
test_sent, test_answer, test_label = processData(test, task_prefix)

In [21]:
def get_blank(lst):
    for k in range(len(lst)):
        if '**blank**' in lst[k]:
            return k
    return -1

In [22]:
for idx in range(2):
    print(train_sent[idx])
    print(train_answer[idx])
    print(train_label[idx])
    print()

In _ reinforcement, the reinforcer follows every correct response.</s>continuous</s>continuous relatedto sustained, progressive relatedto continuous, sustained relatedto continuous</s>
continuous
_ of distractors are intermittent, partial, negative</s>

_ of glucose units is found in plants and serves a structural purpose</s>cellulose</s>cellulose relatedto plant, plant relatedto cellulose, cellulose relatedto glucose, cellulose partof plants, pectin isa cellulose, cellulose relatedto unit</s>
cellulose
_ of distractors are frucose, carbonate, sucrose</s>



In [23]:
len(train_sent), len(train_answer), len(train_label)

(2088, 2088, 2088)

### Tokenization

In [24]:
tokenizer = T5Tokenizer.from_pretrained('t5-base')

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [25]:
train_encodings = tokenizer(train_sent, truncation=True, padding=True)
valid_encodings = tokenizer(valid_sent, truncation=True, padding=True)
test_encodings = tokenizer(test_sent, truncation=True, padding=True)



In [26]:
train_encodings.keys()

dict_keys(['input_ids', 'attention_mask'])

In [27]:
print(train_encodings.input_ids[0])

[86, 3, 834, 28050, 6, 8, 19452, 52, 6963, 334, 2024, 1773, 5, 1, 7558, 1, 7558, 1341, 235, 14399, 6, 9018, 1341, 235, 7558, 6, 14399, 1341, 235, 7558, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [28]:
len(train_encodings.input_ids)

2088

In [29]:
def add_labels(encodings, distractors):
    
    distractors_encodings = tokenizer(distractors, padding=True)
    labels = []
    for i in range(len(distractors_encodings.input_ids)):
        labels.append(distractors_encodings.input_ids[i])
    
    encodings["labels"] = labels
    return encodings

In [30]:
train_encodings = add_labels(train_encodings, train_label)
valid_encodings = add_labels(valid_encodings, valid_label)
test_encodings = add_labels(test_encodings, test_label)

In [31]:
train_encodings.keys()

dict_keys(['input_ids', 'attention_mask', 'labels'])

In [32]:
tokenizer.decode(train_encodings['labels'][0])

'_ of distractors are intermittent, partial, negative</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>'

### Dataset

In [33]:
class MCQDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}

    def __len__(self):
        return len(self.encodings.input_ids)

train_dataset = MCQDataset(train_encodings)
valid_dataset = MCQDataset(valid_encodings)
test_dataset = MCQDataset(test_encodings)

In [34]:
len(train_dataset), len(valid_dataset), len(test_dataset)

(2088, 233, 259)

### Fine-tuning

In [35]:
from transformers import T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
import torch

model = T5ForConditionalGeneration.from_pretrained("t5-base")

In [36]:
batch_size = 32
args = Seq2SeqTrainingArguments(
    output_dir = "./results",
    save_strategy = "epoch",
    evaluation_strategy = "epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=40,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="P@1",
    weight_decay=0.01,
    predict_with_generate=True,
    gradient_accumulation_steps=1,
    report_to="wandb" if os.getenv("WANDB_PROJECT") else "none"
)

In [37]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [38]:
import numpy as np
def compute_metrics(p):
    predictions, labels = p
    
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    
    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # store all sentences
    predicted = []
    true_label = []
    
    for k in range(len(decoded_labels)):
        pred = decoded_preds[k]
        label = decoded_labels[k]

        pred_list = pred.split(', ')
        label_list = label.split(', ')
        
        pred_list[0] = pred_list[0].split(' ')[-1]
        label_list[0] = label_list[0].split(' ')[-1]

        predicted.append(pred_list)
        true_label.append(label_list)

    # evaluation metrics
    p1 = 0
    p3 = 0
    r3 = 0
    f3 = 0
    for idx in range(len(true_label)):
        distractors = predicted[idx]
        labels = true_label[idx]

        act_set = set(labels)
        pred1_set = set(distractors[:1])
        pred3_set = set(distractors[:3])

        p_1 = len(act_set & pred1_set) / float(1)
        p_3 = len(act_set & pred3_set) / float(3)
        r_3 = len(act_set & pred3_set) / float(len(act_set))

        if p_3 == 0 and r_3 == 0:
            f1_3 = 0
        else:
            f1_3 = 2 * (p_3 * r_3 / (p_3 + r_3))

        p1+=p_1
        p3+=p_3
        r3+=r_3
        f3+=f1_3

    avg_p1 = p1 / len(true_label)
    avg_p3 = p3 / len(true_label)
    avg_r3 = r3 / len(true_label)
    avg_f3 = f3 / len(true_label)

    result = {'P@1': avg_p1,
              'P@3': avg_p3,
              'R@3': avg_r3,
              'F1@3': avg_f3}
    
    return result

In [39]:
trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [40]:
from transformers.trainer_callback import PrinterCallback
trainer.remove_callback(PrinterCallback)

In [None]:
trainer.train()

In [43]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 233
  Batch size = 64
Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}



Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}



{'eval_loss': 0.5637132525444031,
 'eval_P@1': 0.26180257510729615,
 'eval_P@3': 0.15736766809728178,
 'eval_R@3': 0.1494992846924177,
 'eval_F1@3': 0.15287144900878802,
 'eval_runtime': 7.6336,
 'eval_samples_per_second': 30.523,
 'eval_steps_per_second': 0.524,
 'epoch': 49.97}

### Test Result

In [44]:
# ,encoder_no_repeat_ngram_size =2,no_repeat_ngram_size = 2,diversity_penalty = 0.5,num_beams = 6,num_beam_groups =2
predictions, labels, metrics = trainer.predict(test_dataset,num_beams = 6)
print('test: ')
metrics

***** Running Prediction *****
  Num examples = 259
  Batch size = 64
  Num examples = 259
  Batch size = 64
Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config

test: 


{'test_loss': 0.8859273195266724,
 'test_P@1': 0.2084942084942085,
 'test_P@3': 0.12998712998712994,
 'test_R@3': 0.12998712998712994,
 'test_F1@3': 0.12998712998712994,
 'test_runtime': 11.6295,
 'test_samples_per_second': 22.271,
 'test_steps_per_second': 0.43}

In [45]:
predictions, labels, metrics = trainer.predict(test_dataset)
print('test: ')
metrics

***** Running Prediction *****
  Num examples = 259
  Batch size = 64
Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}



Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_

test: 


{'test_loss': 0.8859273195266724,
 'test_P@1': 0.2084942084942085,
 'test_P@3': 0.12355212355212346,
 'test_R@3': 0.12355212355212346,
 'test_F1@3': 0.12355212355212346,
 'test_runtime': 6.5116,
 'test_samples_per_second': 39.775,
 'test_steps_per_second': 0.768}

In [46]:
trainer.save_model('../../../../../saved_models/KAG/mcq/t5/t5-triplet-augmentation-ans-only-on-mcq')

Saving model checkpoint to ../../../../saved_models/t5/t5-triplet-augmentation-ans-only-on-mcq
Configuration saved in ../../../../saved_models/t5/t5-triplet-augmentation-ans-only-on-mcq/config.json
Configuration saved in ../../../../saved_models/t5/t5-triplet-augmentation-ans-only-on-mcq/generation_config.json
Model weights saved in ../../../../saved_models/t5/t5-triplet-augmentation-ans-only-on-mcq/pytorch_model.bin
tokenizer config file saved in ../../../../saved_models/t5/t5-triplet-augmentation-ans-only-on-mcq/tokenizer_config.json
Special tokens file saved in ../../../../saved_models/t5/t5-triplet-augmentation-ans-only-on-mcq/special_tokens_map.json


### Save Distractor Data

In [49]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
import torch

#tokenizer = T5Tokenizer.from_pretrained("../../../../saved_models/t5/t5-triplet-augmentation-ans-only-on-mcq")
#model = T5ForConditionalGeneration.from_pretrained("../../../../saved_models/t5/t5-triplet-augmentation-ans-only-on-mcq")
model = T5ForConditionalGeneration.from_pretrained("./results/checkpoint-800/")

loading configuration file ./results/checkpoint-800/config.json
Model config T5Config {
  "_name_or_path": "t5-base",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "d_ff": 3072,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dense_act_fn": "relu",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": false,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 12,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0,
      "max_length": 200,
      "min_length": 30,
      "no_repeat_ngram_size": 3,
      "num_beams": 4,
      "prefix": "summarize: "
    },
    "translation_en_to_de": {
      "ear

In [50]:
trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

### Last Epoch Result

In [51]:
test_predictions, test_labels, test_metrics = trainer.predict(test_dataset)
test_metrics

***** Running Prediction *****
  Num examples = 259
  Batch size = 64
Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}



Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_

{'test_loss': 0.9369543194770813,
 'test_P@1': 0.20077220077220076,
 'test_P@3': 0.13513513513513511,
 'test_R@3': 0.13513513513513511,
 'test_F1@3': 0.13513513513513511,
 'test_runtime': 6.4489,
 'test_samples_per_second': 40.162,
 'test_steps_per_second': 0.775}

In [53]:
# ,encoder_no_repeat_ngram_size =2,no_repeat_ngram_size = 2,diversity_penalty = 0.5,num_beams = 6,num_beam_groups =2
predictions, labels, metrics = trainer.predict(test_dataset,no_repeat_ngram_size = 2,num_beams = 6)
test_metrics

***** Running Prediction *****
  Num examples = 259
  Batch size = 64
Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}



Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_

{'test_loss': 0.9369543194770813,
 'test_P@1': 0.20077220077220076,
 'test_P@3': 0.13513513513513511,
 'test_R@3': 0.13513513513513511,
 'test_F1@3': 0.13513513513513511,
 'test_runtime': 6.4489,
 'test_samples_per_second': 40.162,
 'test_steps_per_second': 0.775}

In [None]:
# epoch = 50 F1@3
# {'test_loss': 0.9411434531211853,
#  'test_P@1': 0.18146718146718147,
#  'test_P@3': 0.1415701415701416,
#  'test_R@3': 0.1415701415701416,
#  'test_F1@3': 0.1415701415701416,
#  'test_runtime': 13.3961,
#  'test_samples_per_second': 19.334,
#  'test_steps_per_second': 0.373}

In [None]:
import json
def write_json(data, path):
    
    jsonString = json.dumps(data)
    jsonFile = open(path, "w")
    jsonFile.write(jsonString)
    jsonFile.close()

In [None]:
def save_data(data, predictions, labels, file_name):

    

    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    
    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # store all sentences
    predicted = []
    true_label = []
    
    for k in range(len(decoded_labels)):
        pred = decoded_preds[k]
        label = decoded_labels[k]

        pred_list = pred.split(', ')
        label_list = label.split(', ')
        
        pred_list[0] = pred_list[0].split(' ')[-1]
        label_list[0] = label_list[0].split(' ')[-1]

        predicted.append(pred_list)
        true_label.append(label_list)
    
    # evaluation metrics
    for idx in range(len(true_label)):
        
        distractors = predicted[idx]
        labels = true_label[idx]

        act_set = set(labels)
        pred1_set = set(distractors[:1])
        pred3_set = set(distractors[:3])

        p1 = len(act_set & pred1_set) / float(1)
        p3 = len(act_set & pred3_set) / float(3)
        r3 = len(act_set & pred3_set) / float(len(act_set))

        if p3 == 0 and r3 == 0:
            f1_3 = 0
        else:
            f1_3 = 2 * (p3 * r3 / (p3 + r3))
        
        data[idx]['pred_distractors'] = distractors
        data[idx]['metric'] = {'P@1': p1, 'P@3': p3, 'R@3': r3, 'F1@3': f1_3}
        if 'triplets' in data[idx]:

            del data[idx]['triplets']
    
    write_json(data, file_name)
    print(file_name + ' is saved :)')

In [None]:
save_data(test, predictions, labels, '../../../../../predictions/t5/mcq/t5-triplet-augmentation-ans-only-on-mcq.json')

../../../../predictions/t5/mcq/t5-triplet-augmentation-ans-only-on-mcq.json is saved :)


### 統計 Prediction 初先 Repeat Distractor 的次數

In [None]:
decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
repeat_count = 0
for k in range(len(decoded_preds)):
    pred = decoded_preds[k]
    pred_list = pred.split(', ')
    pred_list[0] = pred_list[0].split(' ')[-1]
    
    duplicates = set([x for x in pred_list if pred_list.count(x) > 1])
    if duplicates:
        repeat_count += 1
print('重複 distractor 筆數 = ', repeat_count)

重複 distractor 筆數 =  40


### 統計 Prediction 初先 Distractor 與 答案相同的次數

In [None]:
same_count = 0
for i in range(len(test_answer)):
    pred = decoded_preds[k]
    pred_list = pred.split(', ')
    pred_list[0] = pred_list[0].split(' ')[-1]

    if test_answer[i].lower() in pred:
        same_count += 1
print('答案與 distractor 相同的筆數 = ', same_count)

答案與 distractor 相同的筆數 =  1
