In [66]:
from transformers import AutoTokenizer, RobertaForQuestionAnswering
import torch
import json


In [67]:
tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
model = RobertaForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")

In [68]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 124056578
all model parameters: 124056578
percentage of trainable model parameters: 100.00%


In [69]:
dataset = "Dados/dataset_SEDS.json"
len(dataset)

23

In [83]:
dataset = "Dados/dataset_SEDS.json"

with open(dataset, 'r', encoding='utf-8') as f:
    data = json.load(f)

index = 8
Question = data[index]['question']
Context = data[index]['context']

In [84]:
prompt = f"""
Context of the Question.

{Context}

"""

inputs = tokenizer(Question, Context, return_tensors="pt")
with torch.no_grad():
    outputs = model(**inputs)

answer_start_index = outputs.start_logits.argmax()
answer_end_index = outputs.end_logits.argmax()

predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
resposta = tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(f'Question:\n{Question}\n')
print(dash_line)
print(f'MODEL ANSWER - ZERO SHOT:\n{resposta}')

---------------------------------------------------------------------------------------------------
INPUT PROMPT:

Context of the Question.

O Dignidade beneficia com R$ 300 mensais pessoas que tenham entre 60 anos e 64 anos 11 meses e 29 dias em situação de pobreza ou de extrema pobreza.


Question:
O que é o Programa Dignidade?

---------------------------------------------------------------------------------------------------
MODEL ANSWER - ZERO SHOT:
 R$ 300 mensais pessoas


In [85]:
# Converter índices de início e fim em probabilidades de classe usando softmax
start_probs = torch.nn.functional.softmax(outputs.start_logits, dim=1)
end_probs = torch.nn.functional.softmax(outputs.end_logits, dim=1)

# Converter índices de início e fim em representações one-hot
start_one_hot = torch.nn.functional.one_hot(torch.tensor([answer_start_index]), num_classes=start_probs.size(1))
end_one_hot = torch.nn.functional.one_hot(torch.tensor([answer_end_index]), num_classes=end_probs.size(1))

# Calcular a perda usando a função de perda de entropia cruzada
start_loss = torch.nn.functional.binary_cross_entropy(start_probs, start_one_hot.float())
end_loss = torch.nn.functional.binary_cross_entropy(end_probs, end_one_hot.float())

# Calcular a perda total somando as perdas de início e fim
total_loss = start_loss + end_loss

# O valor da perda total pode ser acessado como um número decimal
total_loss_value = total_loss.item()

# Arredondar a perda para 2 casas decimais
rounded_loss = round(total_loss_value, 2)

print(f'Loss: {rounded_loss}')


Loss: 0.04


In [88]:
def tokenize_function(example):
    start_prompt = 'Answer the question.\n\n'
    end_prompt = '\n\nAnswer: '
    prompt = [start_prompt + question + end_prompt for dialogue in example["question"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    example['labels'] = tokenizer(example["context"], padding="max_length", truncation=True, return_tensors="pt").input_ids
    
    return example

tokenized_datasets = dataset(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])

TypeError: 'str' object is not callable