In [1]:
!nvidia-smi

Wed May 29 06:28:29 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   45C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

# Импорт библиотек

In [2]:
from transformers import AutoModelForQuestionAnswering
from transformers import AutoTokenizer
from transformers import pipeline
from datasets import load_dataset
import torch
from google.colab import drive
import string, re
import numpy as np

In [3]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DEVICE

'cuda'

## Подключение Google Drive

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


# Загрузка моделей для сравнения

In [5]:
%%time

# кортежи (название модели, реализована на TensorFlow)
model_names = [
    ('/content/drive/MyDrive/PBOA_ruBERT_QA_1_epoch',    False),
    ('/content/drive/MyDrive/PBOA_ruBERT_QA_2_epoch',    False),
    ('/content/drive/MyDrive/PBOA_ruELECTRA_QA_1_epoch', False),
    ('/content/drive/MyDrive/PBOA_ruELECTRA_QA_2_epoch', False),
    ('/content/drive/MyDrive/ruBERT_QA_1_epoch',         False),
    ('/content/drive/MyDrive/ruBERT_QA_2_epoch',         False),
    ('/content/drive/MyDrive/ruBERT_QA_3_epoch',         False),
    ('/content/drive/MyDrive/ruT5_QA_1_epoch',           False),
    ('/content/drive/MyDrive/ruT5_QA_2_epoch',           False),
    ('/content/drive/MyDrive/ruELECTRA_QA_1_epoch',      False),
    ('/content/drive/MyDrive/ruELECTRA_QA_2_epoch',      False),
    ('/content/drive/MyDrive/ruELECTRA_QA_3_epoch',      False),
    ('/content/drive/MyDrive/ruELECTRA_QA_4_epoch',      False),
    ('/content/drive/MyDrive/ruELECTRA_QA_5_epoch',      False),
    ('AndrewChar/model-QA-5-epoch-RU',                   True ),
    ('KirrAno93/rubert-base-cased-finetuned-squad',      False),
    ('IooHooI/my_awesome_qa_model',                      False),
    ('Silxxor/qa_model',                                 False)
]

tokenizers = []
models = []
pipelines = []

# Загрузка всех моделей из списка
for model_name, from_tf in model_names:
    print(model_name)
    tokenizers.append(AutoTokenizer.from_pretrained(model_name))
    models.append(AutoModelForQuestionAnswering.from_pretrained(model_name, from_tf=from_tf).to(DEVICE))
    pipelines.append(pipeline('question-answering', model=models[-1], tokenizer=tokenizers[-1]))

CPU times: user 1.1 s, sys: 1.94 s, total: 3.04 s
Wall time: 19.2 s


# Загрузка набора данных и метрик

In [6]:
sberquad = load_dataset('sberquad')
sberquad

Downloading builder script:   0%|          | 0.00/4.22k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/4.96k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/5.84M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.93M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/45328 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5036 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/23936 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 45328
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 5036
    })
    test: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 23936
    })
})

In [7]:
# Нормализация текста: убрать лишние пробелы и знаки пунктуации
def normalize_text(s):
    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        for exclude in string.punctuation:
            text = text.replace(exclude, ' ')
        return text

    return white_space_fix(remove_punc(s.lower()))

# Метрика EM для текста
def compute_exact_match(prediction, truth):
    return int(normalize_text(prediction) == normalize_text(truth))

# Метрика F1 для текста
def compute_f1(prediction, truth):
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()

    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
        return int(pred_tokens == truth_tokens)

    common_tokens = set(pred_tokens) & set(truth_tokens)

    if len(common_tokens) == 0:
        return 0

    precision = len(common_tokens) / len(pred_tokens)
    recall = len(common_tokens) / len(truth_tokens)

    return 2 * (precision * recall) / (precision + recall)

In [8]:
# Данные из SberQuAD для валидации
validation_questions = sberquad['validation']['question']
validation_contexts = sberquad['validation']['context']
validation_answers = [ans['text'][0] for ans
                      in sberquad['validation']['answers']]

# Сравнение моделей

In [9]:
%%time

# Предсказания моделей
predicted_answers = [qa_pipeline(question=validation_questions,
                                 context=validation_contexts)
                     for qa_pipeline in pipelines]

CPU times: user 2min 30s, sys: 360 ms, total: 2min 30s
Wall time: 2min 33s


In [10]:
# Значения метрики EM у всех моделей
em_results = [[compute_exact_match(pred['answer'], truth)
               for pred, truth in zip(predicted, validation_answers)]
              for predicted in predicted_answers]

# Значения метрики F1 у всех моделей
f1_results = [[compute_f1(pred['answer'], truth)
               for pred, truth in zip(predicted, validation_answers)]
              for predicted in predicted_answers]

In [11]:
for (name, _), em, f1 in zip(model_names, em_results, f1_results):
    only_name = name.split('/')[-1]
    r_em = np.mean(em)
    r_f1 = np.mean(f1)
    print(f'{only_name:<35} | {r_em:<5} | {r_f1:<5}')

PBOA_ruBERT_QA_1_epoch              | 0.6340349483717236 | 0.8279802219062723
PBOA_ruBERT_QA_2_epoch              | 0.6221207307386815 | 0.8192004605987167
PBOA_ruELECTRA_QA_1_epoch           | 0.0017871326449563 | 0.06057979002981966
PBOA_ruELECTRA_QA_2_epoch           | 0.0023828435266084 | 0.07666543694697722
ruBERT_QA_1_epoch                   | 0.5992851469420175 | 0.7966346399312619
ruBERT_QA_2_epoch                   | 0.5806195393169182 | 0.7876980827327227
ruBERT_QA_3_epoch                   | 0.5773709293089754 | 0.7866356772708364
ruT5_QA_1_epoch                     | 0.5716838760921367 | 0.7837050514319427
ruT5_QA_2_epoch                     | 0.6084193804606831 | 0.8138671724431853
ruELECTRA_QA_1_epoch                | 0.2960683081810961 | 0.5101738295607546
ruELECTRA_QA_2_epoch                | 0.3234710087370929 | 0.541623642076742
ruELECTRA_QA_3_epoch                | 0.3262509928514694 | 0.5470222396760291
ruELECTRA_QA_4_epoch                | 0.3210881652104845 | 0.54