In [2]:
!nvidia-smi

Mon Aug 21 17:33:23 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.86.10              Driver Version: 535.86.10    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        On  | 00000000:01:00.0  On |                  Off |
| 45%   50C    P8              25W / 450W |      3MiB / 24564MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce RTX 4090        On  | 00000000:04:00.0 Off |  

### 測試 deberta 的填空能力

In [25]:
from transformers import AutoTokenizer, DebertaForMaskedLM
import torch

tokenizer = AutoTokenizer.from_pretrained("lsanochkin/deberta-large-feedback")
model = DebertaForMaskedLM.from_pretrained("lsanochkin/deberta-large-feedback")

inputs = tokenizer("The capital of France is [MASK].", return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

# retrieve index of [MASK]
mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0]

predicted_token_id = logits[0, mask_token_index].argmax(axis=-1)
print(tokenizer.decode(predicted_token_id))

labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"]
# mask labels of non-[MASK] tokens
labels = torch.where(inputs.input_ids == tokenizer.mask_token_id, labels, -100)

outputs = model(**inputs, labels=labels)
round(outputs.loss.item(), 2)

 Paris


0.54

In [26]:
import json
def read_clothf_data():
    path = '../../data/CLOTH-F/clean_cloth-f_dataset.json'
    with open(path) as f:
        data = json.load(f)
    return data

clothf_data = read_clothf_data()

In [27]:
from pprint import pprint
pprint(clothf_data["test"][0])

{'answer': 'eating',
 'distractors': ['working', 'preparing', 'thinking'],
 'index': 0,
 'sentence': 'I met Kurt Kampmeir of Success Motivation Incorporation for '
             'breakfast. While we were _ ,Kurt askedme, " John, what is your '
             'plan for personal growth? Never at a loss for words, I tried to '
             'find things in my life that might qualify for growth.I toldhim '
             'about the many activities in which I was involved . '}


In [34]:
for cate in clothf_data:
    # print(i)
    for s in clothf_data[cate]:
        sent = s["sentence"]
        resent = sent.replace('_','[MASK]')
        
        inputs = tokenizer(resent, return_tensors="pt")
        model.eval()
        with torch.no_grad():
            logits = model(**inputs).logits
        # retrieve index of [MASK]
        mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0]
        k = 15

        probs, indices = torch.topk(torch.softmax(logits[0, mask_token_index], -1), k)
        s_probs = probs.squeeze(0)
        s_indices = indices.squeeze(0)
        
        # 查看生成的單字
        # for i,(p,t) in enumerate(zip(s_probs, s_indices)):
        #     print(f"{i} {p} {tokenizer.decode(t)}")
        
        word_list = tokenizer.batch_decode(s_indices)
        # print(word_list)
        options = []
        for j in word_list:
            js = j.strip()
            # print(js)
            if js in [s["answer"]]:
                # print(js)
                continue
            elif js in s["distractors"]:
                continue
            else:
                options.append(js)
        connect_option = s["distractors"].copy()
        connect_option.extend(options)
        s["ranked_distractors"] = connect_option[:6]
        
        # print(s["answer"] ,s["distractors"])
        # print(options)

In [35]:
pprint(clothf_data["test"][0])

{'answer': 'eating',
 'distractors': ['working', 'preparing', 'thinking'],
 'index': 0,
 'ranked_distractors': ['working',
                        'preparing',
                        'thinking',
                        'chatting',
                        'talking',
                        'dining'],
 'sentence': 'I met Kurt Kampmeir of Success Motivation Incorporation for '
             'breakfast. While we were _ ,Kurt askedme, " John, what is your '
             'plan for personal growth? Never at a loss for words, I tried to '
             'find things in my life that might qualify for growth.I toldhim '
             'about the many activities in which I was involved . '}


In [37]:
pprint(clothf_data["test"][1])

{'answer': 'plan',
 'distractors': ['suggestion', 'demand', 'request'],
 'index': 1,
 'ranked_distractors': ['suggestion',
                        'demand',
                        'request',
                        'idea',
                        'ideal',
                        'vision'],
 'sentence': 'I met Kurt Kampmeir of Success Motivation Incorporation for '
             'breakfast. While we were eating ,Kurt askedme, " John, what is '
             'your _ for personal growth? Never at a loss for words, I tried '
             'to find things in my life that might qualify for growth.I '
             'toldhim about the many activities in which I was involved . '}


In [36]:
def save_json(test_sample):
    with open("../../data/deberta_negative/clothf_negative.json","w",encoding="UTF-8") as afile:
        json.dump(test_sample,afile,ensure_ascii=False,indent=4)