In [None]:
import torch
import torch.nn as nn
import requests
import io
from transformers import AutoTokenizer, AutoModel

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class QAModel(nn.Module):
    def __init__(self, transformer_model_name="DeepPavlov/rubert-base-cased"):
        super(QAModel, self).__init__()

        self.transformer = AutoModel.from_pretrained(transformer_model_name)

        hidden_size = self.transformer.config.hidden_size
        self.start_vector = nn.Linear(hidden_size, 1)
        self.end_vector = nn.Linear(hidden_size, 1)

        self.classifier = nn.Linear(hidden_size, 1)

    def forward(self, input_ids, attention_mask):
        transformer_output = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = transformer_output.last_hidden_state

        # Воспользуемся токеном начала последовательности для классификации
        has_answer = torch.sigmoid(self.classifier(hidden_states[:, 0, :])).squeeze(-1)

        start_logits = self.start_vector(hidden_states).squeeze(-1)

        start_pred = torch.argmax(start_logits, dim=-1)

        # Маскируем на всякий случай токены которые находятся до start_pred
        mask = torch.arange(hidden_states.size(1), device=device)[None, :] >= start_pred[:, None]
        end_hidden_states = hidden_states * mask[:, :, None]

        end_logits = self.end_vector(end_hidden_states).squeeze(-1)

        return start_logits, end_logits, has_answer

model = QAModel().to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of the model checkpoint at DeepPavlov/rubert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
weights_url = f"https://huggingface.co/AlBeBack/qa-model_with_context/resolve/main/qa-model.pth"

response = requests.get(weights_url)
response.raise_for_status()

weights_io = io.BytesIO(response.content)
state_dict = torch.load(weights_io, map_location=device)

model.load_state_dict(state_dict)

tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")

  state_dict = torch.load(weights_io, map_location=device)


tokenizer_config.json:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/1.65M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



In [None]:
def get_answer(new_data):
    context_tokens = new_data['input_ids'][:, new_data['token_type_ids'].bool().squeeze()]
    question_tokens = new_data['input_ids'][:, ~new_data['token_type_ids'].bool().squeeze()]
    len_seq = 512 - len(question_tokens[0])

    cur_answer = 'Ответа на этот вопрос в тексте не нашлось'
    answers = [('Ответа на этот вопрос в тексте не нашлось', 0)]

    if len(context_tokens[0]) > len_seq:
        for i in range((len(context_tokens[0]) // (len_seq // 2) - 1) * 2):
            print(i*(len_seq // 2), i*(len_seq // 2) + len_seq)

            querry = torch.cat((question_tokens, context_tokens[:, i*(len_seq // 2):i*(len_seq // 2) + len_seq]), dim=1)
            start_logits, end_logits, has_answer = model(input_ids=querry, attention_mask=torch.ones_like(querry))

            if has_answer >= 0.5:
                start_index = torch.argmax(start_logits, dim=-1).item()
                end_index = torch.argmax(end_logits, dim=-1).item()


                answer_tokens = querry[0][start_index:end_index + 1]
                answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)
                cur_answer = answer
                answers.append((cur_answer, has_answer.item()))



            if i*(len_seq // 2) + len_seq >= len(context_tokens[0]):
                break

        cur_answer = max(answers, key=lambda x: x[1])[0]

    else:
        start_logits, end_logits, has_answer = model(new_data['input_ids'], new_data['attention_mask'])

        if has_answer >= 0.5:
            start_index = torch.argmax(start_logits, dim=-1).item()
            end_index = torch.argmax(end_logits, dim=-1).item()

            answer_tokens = new_data['input_ids'][0][start_index:end_index + 1]
            answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)
            cur_answer = answer

    return cur_answer

In [None]:
new_context = input('Введите контекст: ')
new_question = input('Введите вопрос: ')
new_data = tokenizer(new_question, new_context, return_tensors="pt")
print(get_answer(new_data))

In [None]:
# Заменить вопрос при том же контексте
new_question = input('Введите вопрос: ')
new_data = tokenizer(new_question, new_context, return_tensors="pt")
print(get_answer(new_data))