In [1]:
# Initialize the environment
# We recommend you to use anaconda3 to config the virtual python environment
# !conda install pytorch torchvision torchaudio -c pytorch
# !pip install transformers==4.24.0 scipy==1.5.0 datasets==2.7.0 promptsource==0.2.3

In [1]:
import torch
from datetime import datetime
from tqdm import tqdm
import torch.nn.functional as F
import warnings

import numpy as np

from datasets import load_dataset
from promptsource.templates import DatasetTemplates

from scipy.linalg import block_diag
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from typing import List

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoConfig, get_linear_schedule_with_warmup
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score

import logging
import sys

from pdb import set_trace as st

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Download the model and the tokenizer
# DEFAULT: glm-2b, which is able for good zero-shot short blanking infilling ([MASK]) and long left-to-right generation ([gMASK])
# If you want to do fine-tuning on language understanding or generation,
# try smaller glm-roberta-large (335M, not for zero-shot)
model_type = 'BAAI/glm-roberta-large'#"BAAI/glm-2b"
tokenizer = AutoTokenizer.from_pretrained(model_type, trust_remote_code=True, revision='main')
model = AutoModelForSeq2SeqLM.from_pretrained(model_type, trust_remote_code=True, revision='main').half().float().cuda()
print(f"Model {model_type} loaded.")

Downloading: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 438/438 [00:00<00:00, 223kB/s]
Downloading: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 16.0k/16.0k [00:00<00:00, 108kB/s]
Downloading: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 899k/899k [00:01<00:00, 609kB/s]
Downloading: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 456k/456k [00:01<00:00, 289kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 77.0/77.0 [00:00<00:00, 73.5kB/s]
Downloading: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 963/963 [00:00<00:00, 1.01MB/s]
Downloading: 100%|██████████████████████████████████████████████

Model BAAI/glm-roberta-large loaded.


In [4]:
torch.save(model, 'glm_roberta_large_original.pt')

## Classification

In [None]:
# For the classification task, in a Seq2Seq model like GLM, we need to calculate the conditional probability of choices for the given context.
# Remember to refer to code example (https://github.com/THUDM/GLM#classification) in GLM's repo.

# The `cond_log_prob` could be used for both multiple-choice problem (i.e., classification) or text generation (i.e., summurization).
def cond_log_prob_single_sample(model, context, choices):
    """
    Compute conditonal probability for one or more continuation/infilling options, single-sample only.
    General solution to all classification/multiple-choice tasks.
    :param context: prompted inputs. For example, "One plus one equals two, is it correct? Answer: [MASK]"
    :param choices: classification labels or choices. For example, ["No", "Yes"]
    """
    context_id = tokenizer(context)['input_ids']
    probs = []
    for choice in choices:
        choice_id = tokenizer(' ' + choice)['input_ids'][1:-1]  # Feature of SentencePiece tokenizer
        input_ids = torch.tensor(context_id + [tokenizer.sop_token_id] + choice_id[:-1], dtype=torch.long)
        attention_mask = torch.tril(torch.ones(len(input_ids), len(input_ids), dtype=torch.long))
        attention_mask[:len(context_id), :len(context_id)] = 1
        mask_position = context_id.index(tokenizer.mask_token_id)
        position_id = torch.cat([torch.arange(len(context_id)), torch.ones(len(choice_id)) * mask_position])
        block_position_id = torch.cat([torch.zeros(len(context_id)), torch.arange(1, 1 + len(choice_id))])
        position_id = torch.stack((position_id, block_position_id), dim=0).long()
        logits = model.forward(input_ids=input_ids.view(1, -1).cuda(),
                            attention_mask=attention_mask.unsqueeze(0).unsqueeze(0).cuda(),
                            position_ids=position_id.view(1, 2, -1).cuda())['logits']
        logits = F.log_softmax(logits, dim=-1)
        probs.append(logits[0, range(len(context_id), len(context_id) + len(choice_id)), choice_id].sum())
    return torch.stack(probs)

# Forward results by single sample is slow. The following codes organize a batch of inputs to speed up training.
def build_multiple_choice_sample(context, choices):
    context_id = tokenizer(context)['input_ids']

    division = len(context_id)
    mask_position = context_id.index(tokenizer.mask_token_id)

    token = np.array(context_id, dtype=np.int64)
    attention_mask = [np.ones((division, division), dtype=np.int64)]
    position_id = np.arange(division, dtype=np.int64)
    block_position_id = np.zeros(division, dtype=np.int64)

    choice_target_id = []
    choice_id = []

    for choice_str in choices:
        choice = np.array(tokenizer(choice_str)['input_ids'][1:-1], dtype=np.int64)

        choice_id.append(choice)
        choice_target_id.append(np.arange(len(token), len(token) + len(choice), dtype=np.int64))
        attention_mask.append(np.tril(np.ones((len(choice), len(choice)), dtype=np.int64)))

        token = np.concatenate((token, [tokenizer.sop_token_id], choice[:-1]))
        position_id = np.concatenate((position_id, [mask_position] * len(choice)))
        block_position_id = np.concatenate((block_position_id, np.arange(1, 1 + len(choice), dtype=np.int64)))

    attention_mask = block_diag(*attention_mask)
    attention_mask[division:, :division] = 1

    return {
        "token": token,
        "position_id": np.stack((position_id, block_position_id)),
        "attention_mask": attention_mask,
        "choices": choice_id,
        "choice_target_ids": choice_target_id
    }


def pad_batch(tokens, position_ids, attention_mask, max_seq_length):
    pad_length = max_seq_length - len(tokens)
    attention_mask = np.pad(
        attention_mask,
        pad_width=((0, pad_length),),
        mode="constant",
        constant_values=0,
    )
    tokens = np.concatenate((tokens, np.zeros(pad_length, dtype=np.int64)))
    position_ids = np.concatenate((position_ids, position_ids[..., -1:].repeat(pad_length, -1)), axis=-1)
    return tokens, position_ids, attention_mask


def collate_fn(samples):
    TILE = 16
    length_to_pad = (max(map(lambda spl: len(spl["token"]), samples)) + TILE - 1) // TILE * TILE

    token_batch, position_id_batch, attention_mask_batch = [], [], []
    choices_batch, choice_target_ids_batch = [], []

    for sample in samples:
        token, position_id, attention_mask = pad_batch(
            sample["token"], sample["position_id"], sample["attention_mask"], length_to_pad
        )
        token_batch.append(token)
        position_id_batch.append(position_id)
        attention_mask_batch.append(attention_mask)
        choices_batch.append(sample["choices"])
        choice_target_ids_batch.append(sample["choice_target_ids"])

    return {
        "tokens": torch.tensor(np.array(token_batch), dtype=torch.int64),
        "position_ids": torch.tensor(np.array(position_id_batch), dtype=torch.int64),
        "attention_mask": torch.tensor(np.array(attention_mask_batch), dtype=torch.int64),
        "choices": choices_batch,
        "choice_target_ids": choice_target_ids_batch,
    }

def cond_log_prob(model, context: List[str], choices: List[List[str]]) -> List[List[float]]:
    """
    Compute conditonal probability for one or more continuation/infilling options.
    :return The log probablity of each option.
    """
    if not isinstance(context, list):
        context = [context]
        choices = [choices]
    choices = [[(' ' + choice) for choice in choice_pair] for choice_pair in choices]  # Feature of SentencePiece tokenizer

    samples = [build_multiple_choice_sample(ctx, ch) for ctx, ch in zip(context, choices)]

    batch = collate_fn(samples)

    logits = model.forward(input_ids=batch['tokens'].cuda(),
                        attention_mask=batch['attention_mask'].cuda().unsqueeze(1),
                        position_ids=batch['position_ids'].cuda())['logits']

    log_probs = []

    for output, choices, choice_target_ids in zip(F.log_softmax(logits, dim=-1), batch['choices'], batch['choice_target_ids']):
        log_probs_single = []
        for choice, choice_target_id in zip(choices, choice_target_ids):
            tmp = output[choice_target_id, choice]
            log_probs_single.append(tmp.sum())
        log_probs.append(torch.stack(log_probs_single))

    return torch.stack(log_probs)

# print("Single sample:", cond_log_prob_single_sample(model, "One plus one equals two, is it correct? Answer: [MASK]", ["No", "Yes"]))
# print("Batch samples:", cond_log_prob(model, ["Tsinghua University is located in [MASK] .",
#                                        "One minus one equals zero, is it correct? Answer: [MASK]"],
#                                       [["Beijing", "Shanghai"],
#                                        ["No", "Yes"]]))


'\nprint("Single sample:", cond_log_prob_single_sample(model, "One plus one equals two, is it correct? Answer: [MASK]", ["No", "Yes"]))\nprint("Batch samples:", cond_log_prob(model, ["Tsinghua University is located in [MASK] .",\n                                       "One minus one equals zero, is it correct? Answer: [MASK]"],\n                                      [["Beijing", "Shanghai"],\n                                       ["No", "Yes"]]))\n'

In [6]:
# # Example: evaluating glm's zero-shot perfomance on glue/sst2 using prompt from promptsource
# dataset = load_dataset("hans", split="validation")
# glue_sst2_prompts = DatasetTemplates('hans')  # WARNING: glue/super_glue/twitter_eval datasets are not allowed in your submission. This is only an example implementation.
# print("Prompt names:", [prompt.get_name() for prompt in glue_sst2_prompts.templates.values()])
# # Remember to choose those prompts annotated as `original_task: true`; they are standard prompts.
# prompt = glue_sst2_prompts["GPT-3 style"]
# choices = prompt.answer_choices.split(' ||| ')
# print("Choices:", choices)

# correct = 0
# print(len(dataset))
# for i, sample in enumerate(dataset):
#     if i % 1000 == 0:
#         print(i, datetime.now())
#     result = prompt.apply(sample)
#     context = result[0] + "Answer: [MASK]"
#     probs = cond_log_prob(model, context, choices)
#     pred = torch.argmax(probs).item()
#     correct += pred == sample['label']

# print(correct, correct / len(dataset))




In [7]:
class MultipleChoiceDataset(torch.utils.data.Dataset):
    def __init__(self, dataset_name, split, prompt_name, tokenizer):
        super(MultipleChoiceDataset, self).__init__()
        self.dataset_name = dataset_name
        self.split = split
        self.prompt = DatasetTemplates(self.dataset_name)[prompt_name]
        self.tokenizer = tokenizer

        # Ensure that the dataset split is valid.
        self.data = []
        if '/' in dataset_name:
            iters = load_dataset(dataset_name.split('/')[0], dataset_name.split('/')[1], split=split)
        else:
            iters = load_dataset(dataset_name, split=split)
        for sample in tqdm(iters):
            self.data.append(dict(zip(
                ['inputs_pretokenized', 'choices_pretokenized', 'label'],
                self.prompting_single_sample(sample)
            )))

    def get_choices(self, sample):
        """
        Default solution for text classification.
        TODO: not applicable to multiple-choice problem. Please customize choices from `sample`.
        """
        return self.prompt.answer_choices.split(' ||| ')

    def prompting_single_sample(self, sample):
        """
        Format a sample into a prompted sample.
        :return inputs_pretokenized, choices_pretokenized
        """
        inputs_pretokenized, groundtruth_choice = tuple(self.prompt.apply(sample))
        choices_pretokenized = self.get_choices(sample)

        # TODO: Use default label. Please customize according to your dataset.
        label = sample['label']
        return inputs_pretokenized + ' [MASK]', choices_pretokenized, label

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index]


In [8]:
# Remember to set model.float() and model.train() before fine-tuning, since fp16 training is instable without deepspeed.

def init_logger():
    logger = logging.getLogger("default")
    cmd_handler = logging.StreamHandler(sys.stdout)
    cmd_handler.setLevel(logging.DEBUG)
    cmd_handler.setFormatter(logging.Formatter(r"[%(asctime)s][%(levelname)s][%(filename)s:%(lineno)s] %(message)s"))
    logger.addHandler(cmd_handler)
    logger.setLevel(logging.INFO)
    return logger


def flatten_labels(compacted_labels):
    batch_size = len(compacted_labels[0])
    num_of_classes = len(compacted_labels)
    return [[compacted_labels[i][idx] for i in range(num_of_classes)] for idx in range(batch_size)]


class MultipleChoiceTrainer:
    def __init__(self, model, epochs, lr, train_bsz, dataset_name: str, prompt_name: str):
        self.train_bsz = train_bsz
        self.eval_bsz = 8
        self.epoch = epochs
        self.lr = lr
        # Load tokenizer & logger
        self.tokenizer = tokenizer  # use tokenizer from 3rd cell
        self.logger = init_logger()

        # Load dataset
        print(f'loading data')
        self.train_dataset = MultipleChoiceDataset(dataset_name, 'train', prompt_name, self.tokenizer)
        self.valid_dataset = MultipleChoiceDataset(dataset_name, 'validation', prompt_name, self.tokenizer)
        print(f'train dataset length: {len(self.train_dataset)}, val dataset length: {len(self.valid_dataset)}')
        #self.test_dataset = MultipleChoiceDataset(dataset_name, 'test', prompt_name, self.tokenizer)

        self.train_loader = DataLoader(self.train_dataset, batch_size=self.train_bsz, shuffle=True, drop_last=True)
        self.valid_loader = DataLoader(self.valid_dataset, batch_size=self.eval_bsz, shuffle=False)
        #self.test_loader = DataLoader(self.test_dataset, batch_size=self.eval_bsz, shuffle=False)

        # Configure training model, optimizer, and scheduler
        self.model = model  # use model from 3rd cell
        # there should be a model.float() here. but i shifted it up (my code calls model.float() immediately after initialization)
        self.model.train()
        num_training_steps = self.epoch * (len(self.train_dataset) // self.train_bsz)
        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)
        self.scheduler = get_linear_schedule_with_warmup(self.optimizer,
                                                         num_warmup_steps=int(num_training_steps * 0.06),
                                                         num_training_steps=num_training_steps)

    def evaluate(self, e, data_loader):
        valid_loss = 0.0
        valid_labels = []
        valid_preds = []
        self.model.eval()
        with torch.no_grad():
            for i, sample in tqdm(enumerate(data_loader, start=1), desc="valid", total=len(data_loader)):
                logits = cond_log_prob(self.model, sample["inputs_pretokenized"], flatten_labels(sample['choices_pretokenized']))
                labels = sample["label"].cuda()
                loss = F.nll_loss(logits, labels)
                valid_loss += loss.item()
                valid_preds.extend(torch.argmax(logits, dim=-1).cpu().numpy().tolist())
                valid_labels.extend(np.array(sample["label"]).tolist())
        valid_loss = valid_loss / len(data_loader)
        valid_acc = accuracy_score(valid_preds, valid_labels)
        self.logger.info(f"[VALID] epoch {e}: loss={valid_loss}, acc={valid_acc}")

    def train(self):
        for e in range(1, self.epoch + 1):
            self.logger.info(f"Epoch {e}")
            # train
            tqdm_vars = {"lr": np.nan, "loss": np.nan}
            tbar = tqdm(enumerate(self.train_loader, start=1), desc="train", total=len(self.train_loader),
                        postfix=tqdm_vars)
            train_loss_value = 0.0
            self.model.train()
            print('entering training loop')
            for i, sample in tbar:
                logits = cond_log_prob(self.model, sample["inputs_pretokenized"], flatten_labels(sample['choices_pretokenized']))
                labels = sample["label"].cuda()
                loss = F.nll_loss(logits, labels)
                train_loss_value += loss.item()
                loss.backward()
                self.optimizer.step()
                self.scheduler.step()
                self.optimizer.zero_grad()
                tqdm_vars["lr"] = self.optimizer.state_dict()["param_groups"][0]["lr"]
                tqdm_vars["loss"] = train_loss_value
                tbar.set_postfix(tqdm_vars)
                train_loss_value = 0.0
            # valid
            self.evaluate(e, self.valid_loader)
        # TODO: If there is a test dataset, please select the best-performed checkpoints on valid dataset to evaluate.
        # TODO: the example `glue/rte` has no public test set.

In [9]:
trainer = MultipleChoiceTrainer(
    model=model,
    epochs=10,
    lr=1e-5,
    train_bsz=8,
    dataset_name="hans",
    prompt_name="GPT-3 style")  # Choose a `original_task: true` prompt!

loading data


Found cached dataset hans (C:/Users/thisi/.cache/huggingface/datasets/hans/plain_text/1.0.0/452e93cf5383f5ae39088254215b517d0da98ccaaf0af8f7ab04d8f23f67dbd9)
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30000/30000 [01:18<00:00, 382.66it/s]
Found cached dataset hans (C:/Users/thisi/.cache/huggingface/datasets/hans/plain_text/1.0.0/452e93cf5383f5ae39088254215b517d0da98ccaaf0af8f7ab04d8f23f67dbd9)
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30000/30000 [01:18<00:00, 380.81it/s]

train dataset length: 30000, val dataset length: 30000





In [10]:
# trainer.evaluate(0, trainer.valid_loader)
# output: [2023-01-02 01:50:32,302][INFO][2370397064.py:68] [VALID] epoch 0: loss=5.620091435877482, acc=0.4997666666666667

valid: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3750/3750 [20:58<00:00,  2.98it/s]

[2023-01-02 01:50:32,302][INFO][2370397064.py:68] [VALID] epoch 0: loss=5.620091435877482, acc=0.4997666666666667





In [11]:
trainer.train()

[2023-01-02 01:50:32,380][INFO][2370397064.py:72] Epoch 1


train:   0%|                                                                                                     | 0/3750 [00:00<?, ?it/s, loss=nan, lr=nan]

entering training loop
1


train:   0%|                                                                                                     | 0/3750 [00:01<?, ?it/s, loss=nan, lr=nan]


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 2.00 GiB total capacity; 1.71 GiB already allocated; 0 bytes free; 1.73 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

u can del model and tokenizer later if u need.
btw maybe btr to just ask the ta for gpu now or ask yvon for help maybe.

## Generation

In [26]:
# For the generation task, we need to do conditional generation
# Remember to refer to code example (https://github.com/THUDM/GLM#generation) in GLM's repo to find code for loss implementation!!!
def generate_text(model, text, max_length=512):
    inputs = tokenizer(text, return_tensors="pt")
    inputs = tokenizer.build_inputs_for_generation(inputs, max_gen_length=max_length)
    inputs = {key: value.cuda() for key, value in inputs.items()}
    # greedy decode strategy (topk = 1)
    outputs = model.generate(**inputs, max_length=max_length, eos_token_id=tokenizer.eop_token_id, top_k=1)[0].tolist()
    sop_id = tokenizer.sop_token_id
    eop_id = tokenizer.eop_token_id
    end_idx = outputs.index(eop_id) if eop_id in outputs else len(outputs)
    return tokenizer.decode(outputs[outputs.index(sop_id) + 1: end_idx]).strip()


In [32]:
generate_text(model, ['i used to [MASK]', 'he used to [MASK]'])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50266 for open-end generation.


'" "'

In [14]:
#print(generate_text(model, "Ng is an adjunct professor at [MASK] (formerly associate professor and Director of its Stanford AI Lab or SAIL ). Also a pioneer in online education, Ng co-founded Coursera and deeplearning.ai."))
#print(generate_text(model, 'I used to rule the world. Seas would rise when I gave the word. Now, in the morning, [MASK]'))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50266 for open-end generation.


I will give the word


In [4]:
dataset = load_dataset("adversarial_qa", 'adversarialQA', split="validation")
glue_sst2_prompts = DatasetTemplates('adversarial_qa/adversarialQA')
print("Prompt names:", [prompt.get_name() for prompt in glue_sst2_prompts.templates.values()])
prompt = glue_sst2_prompts["answer_the_following_q"]


Found cached dataset adversarial_qa (C:/Users/thisi/.cache/huggingface/datasets/adversarial_qa/adversarialQA/1.0.0/92356be07b087c5c6a543138757828b8d61ca34de8a87807d40bbc0e6c68f04b)


Prompt names: ['generate_question', 'tell_what_it_is', 'question_context_answer', 'based_on', 'answer_the_following_q']


In [5]:
# sample = dataset[0]
# result = prompt.apply(sample)
# context = result[0] + " Answer: [MASK]"
# print(context)
# generated_text = generate_text(model, context).strip()
# generated_text

# # refer to hw3-tutrorial.pdf for guide on how to do loss. 

Given the following passage

"Another green space in Newcastle is the Town Moor, lying immediately north of the city centre. It is larger than London's famous Hyde Park and Hampstead Heath put together and the freemen of the city have the right to graze cattle on it. The right incidentally extends to the pitch of St. James' Park, Newcastle United Football Club's ground, though this is not exercised, although the Freemen do collect rent for the loss of privilege. Honorary freemen include Bob Geldof, King Harald V of Norway, Bobby Robson, Alan Shearer, the late Nelson Mandela and the Royal Shakespeare Company. The Hoppings funfair, said to be the largest travelling funfair in Europe, is held here annually in June.",

answer the following question. Note that the answer is present within the text.

Question: Where is the Hoppings funfair held? Answer: [MASK]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50266 for open-end generation.


'Newcastle, England.\n\nSource: Wikipedia'

In [6]:
# def calculate_generation_loss(model, texts, targets, max_length=512):
#     inputs = tokenizer(texts, return_tensors="pt")
#     inputs = tokenizer.build_inputs_for_generation(inputs, max_gen_length=max_length)
#     inputs = {key: value.cuda() for key, value in inputs.items()}
#     # greedy decode strategy (topk = 1)
#     outputs = model.generate(**inputs, max_length=max_length, eos_token_id=tokenizer.eop_token_id, top_k=1)[0].tolist()
#     sop_id = tokenizer.sop_token_id
#     eop_id = tokenizer.eop_token_id
#     end_idx = outputs.index(eop_id) if eop_id in outputs else len(outputs)
#     return tokenizer.decode(outputs[outputs.index(sop_id) + 1: end_idx]).strip()


In [14]:
# # currently tryna compute loss. if this dun work try using the other method in the tutorial pdf file.
# # top 3 lines r ur func args for calculate_generation_loss. just pass those into the fnc later.
# texts = [context]
# targets = [sample['answers']['text'][0]]
# max_length=512

# # now tryna make this thing work so we can calc loss. then shld be easy to train alr.
# # u have the targets var above; this needs to be passed into the build_inputs_for_generation. then u can get the loss later.
# inputs = tokenizer(texts, return_tensors="pt", padding=True)
# inputs = tokenizer.build_inputs_for_generation(inputs, max_gen_length=max_length).to('cuda')
# outputs = model(**inputs)
# outputs


RuntimeError: The size of tensor a (192) must match the size of tensor b (703) at non-singleton dimension 1

In [13]:
# parallel attempt to calc loss. for now might only work for one input at a time.
# have no idea if this'll work. but hope it does.
def calculate_generation_loss2(model, text, target, max_length=512):
    gt = generate_text(model, text)
    clp = cond_log_prob(model, [text], [[target, gt]])
    loss = F.cross_entropy(clp, torch.Tensor([0]).long().to('cuda')) # from pytorch docs: This criterion computes the cross entropy loss between input logits and target.
    return loss
calculate_generation_loss2(model, 'I used to rule the [MASK]', 'world') # rmb to pass in the prompt first.

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50266 for open-end generation.


NameError: name 'cond_log_prob' is not defined

In [11]:
def generate_validate(model, dataset, prompt):
    correct = 0
    print(len(dataset))
    for i, sample in enumerate(dataset):
        if i % 1000 == 0:
            print(i, datetime.now())
        result = prompt.apply(sample)
        context = result[0].strip() + " Answer: [MASK]"
        generated_text = generate_text(model, context)
        correct += generated_text == sample['answers']['text'][0]

    print(correct, correct / len(dataset))

In [14]:
generate_validate(model, dataset, prompt)

3000
0 2023-01-03 00:08:33.190440


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50266 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50266 for open-end generation.

KeyboardInterrupt



In [None]:


class ConditionalGenerationDataset(torch.utils.data.Dataset):
    """
    TODO: implement your generation task dataset.
    """
