In [2]:
import json
from transformers import BertTokenizer, BertForSequenceClassification
import torch
from torch.nn.functional import softmax

# Carregar o modelo e o tokenizador do BERT pré-treinado
MODEL_NAME = "neuralmind/bert-base-portuguese-cased"
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=5)  # Notas de 0 a 4

# Função para processar texto com BERT e atribuir notas
def avaliar_resposta(resposta_aluno, respostas_referencia):
    """Avalia a resposta do aluno comparando com respostas de referência."""
    max_length = 512  # Limite máximo de tokens suportado pelo BERT
    inputs = tokenizer(resposta_aluno, respostas_referencia, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    scores = softmax(outputs.logits, dim=1).squeeze().tolist()
    nota_prevista = scores.index(max(scores))  # Pegamos a maior probabilidade
    return nota_prevista

# Carregar os dados
with open("ptbrData.json", "r", encoding="utf-8") as f:
    data = json.load(f)

output_data = []

# Processar cada questão e resposta
for item in data:
    numero_pergunta = item["number_question"]
    respostas_referencia = " ".join([resp["reference_response"] for resp in item["reference_responses"]])
    
    for resposta_aluno in item["responses_students"]:
        resposta_texto = resposta_aluno["answer_question"]
        nota_original = resposta_aluno["grade"]
        nota_corrigida = avaliar_resposta(resposta_texto, respostas_referencia)
        
        output_data.append({
            "number_question": numero_pergunta,
            "answer_question": resposta_texto,
            "original_grade": nota_original,
            "bert_grade": nota_corrigida
        })

# Salvar o resultado em um arquivo JSON
with open("correcao_bert.json", "w", encoding="utf-8") as f:
    json.dump(output_data, f, ensure_ascii=False, indent=4)

print("Correção concluída. Resultados salvos em 'correcao_bert.json'")




Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-base-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware,

Correção concluída. Resultados salvos em 'correcao_bert.json'
Execute o seguinte comando no terminal para instalar as dependências:
pip install torch transformers
