## Import

In [1]:
%matplotlib inline
import matplotlib
import random
import wget
import os
import pathlib
import torch
import datetime
import time
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.display import HTML
from pathlib import Path
from enum import Enum
from tqdm import tqdm
from functools import partial
from shutil import rmtree
from razdel import tokenize
from datasets import (
    load_metric,
    Dataset,
    DatasetDict
)
from sklearn.metrics import (
    f1_score,
    matthews_corrcoef
)
from torch.optim import AdamW
from torch.utils.data import Dataset as TorchDataset
from torch.utils.data import (
    random_split,
    TensorDataset,
    DataLoader,
    RandomSampler,
    SequentialSampler
)
from transformers import (
    AutoTokenizer,
    BertTokenizer,
    T5Tokenizer,
    AutoModelForMaskedLM,
    BertForSequenceClassification,
    T5ForConditionalGeneration,
    AutoModel,
    AutoModelForCausalLM,
    get_linear_schedule_with_warmup,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer
)

tqdm.pandas()

print('Import is done.')

  from .autonotebook import tqdm as notebook_tqdm


Import is done.


## Configuration

In [2]:
class Configurator:
    def __init__(self) -> None:
        self._params = {
            'random': {'seed': 42},
            
            'pretrained': {
                'path': {
                    'bert': 'ai-forever/ruBert-base',
                    'gpt': 'ai-forever/rugpt3large_based_on_gpt2',
                    't5': 'ai-forever/ruT5-base',
                }
            },
            
            'optimizer': {
                'lr': {
                    'bert': 2e-5,
                    't5': 2e-5
                },
                'eps': {
                    'bert': 1e-8,
                    't5': 1e-8
                }
            },
            
            'dataset': {
                'url': {
                    'train': 'https://github.com/RussianNLP/RuCoLA/blob/main/data/in_domain_train.csv?raw=true',
                    'test': 'https://github.com/RussianNLP/RuCoLA/blob/main/data/in_domain_dev.csv?raw=true'
                },
                'path': {
                    'train': './train_dataset.csv',
                    'test': './test_dataset.csv'
                },
                'name': {
                    'train': 'TRAIN',
                    'test': 'TEST'
                },
                'usecols': [1, 2]
            },
            
            'train': {
                'size': 0.9,
                'epochs': {
                    'bert': 1,
                    't5': 2
                }
            },
            
            'batch-size': 32
        }
    
    def __call__(self, *args, **kwargs):
        param_name = args[0]
        path = param_name.split('.')
        return self._get_next(path, self._params, param_name)
        
    def _get_next(self, path, params, param_name):
        result = None
        if isinstance(params, dict) and len(path) > 0:
            key = path.pop(0)
            if len(path) == 0:
                if key in params:
                    result = params[key]
            else:
                return self._get_next(path, params[key], param_name)
        if result is None:
            print(f'Bad param name: {param_name}')
        return result
        
conf = Configurator()

print('Configuration block is done.')

Configuration block is done.


## Set random seed

In [3]:
random.seed(conf('random.seed'))
np.random.seed(conf('random.seed'))
torch.manual_seed(conf('random.seed'))
torch.cuda.manual_seed(conf('random.seed'))

print('Random seed is set.')

Random seed is set.


## Initialize result

In [4]:
total_result = {
    'bert': None,
    'gpt-zero-shot': None,
    'gpt-few-shot-3': None,
    'gpt-few-shot-5': None,
    'gpt-few-shot-10': None,
    't5': None
}

print('Total result is inizialized.')

Total result is inizialized.


## Define device

In [5]:
if torch.cuda.is_available():
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print(f'We will use the GPU: {torch.cuda.get_device_name(0)}')
    device = torch.device('cuda')
else:
    print('No GPU available, using the GPU instead.')
    device = torch.device('cpu')
    
print('Device is defined.')

There are 1 GPU(s) available.
We will use the GPU: NVIDIA GeForce RTX 3080 Laptop GPU
Device is defined.


## Models & tokenizers & optimizers creation

In [6]:
bert_tokenizer = BertTokenizer.from_pretrained(conf('pretrained.path.bert'))
bert_model = BertForSequenceClassification.from_pretrained(
    conf('pretrained.path.bert'),
    num_labels=2,
    output_attentions=False,
    output_hidden_states=False
)
bert_optimizer = AdamW(bert_model.parameters(), lr=conf('optimizer.lr.bert'), eps=conf('optimizer.eps.bert'))

gpt_tokenizer = AutoTokenizer.from_pretrained(conf('pretrained.path.gpt'))
gpt_model = AutoModelForCausalLM.from_pretrained(conf('pretrained.path.gpt'))

t5_tokenizer = T5Tokenizer.from_pretrained(conf('pretrained.path.t5'), use_fast=False)
t5_model = T5ForConditionalGeneration.from_pretrained(conf('pretrained.path.t5'))
t5_optimizer = AdamW(t5_model.parameters(), lr=conf('optimizer.lr.t5'), eps=conf('optimizer.eps.t5'))

print('Models, tokenizers, optimizers are created.')

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ai-forever/ruBert-base and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
You are using the legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565


Models, tokenizers, optimizers are created.


## Downloading datasets on disk

In [7]:

def load_dataset(url: str, path: str, name: str):
    if os.path.exists(path):
        print('Dataset "' + name + '" is already downloaded.')
    else:
        wget.download(url, path)
        print(' Dataset "' + name + '" is downloaded.')
        
load_dataset(conf('dataset.url.train'), conf('dataset.path.train'), conf('dataset.name.train'))
load_dataset(conf('dataset.url.test'), conf('dataset.path.test'), conf('dataset.name.test'))

print('Datasets are downloaded.')

Dataset "TRAIN" is already downloaded.
Dataset "TEST" is already downloaded.
Datasets are downloaded.


## Loading dataframes from disk

In [8]:
train_eval_dataframe = pd.read_csv(conf('dataset.path.train'), usecols=conf('dataset.usecols'))
test_dataframe = pd.read_csv(conf('dataset.path.test'), usecols=conf('dataset.usecols'))

random_index = train_eval_dataframe.sample(frac=conf('train.size'), random_state=conf('random.seed')).index
eval_dataframe = train_eval_dataframe[~train_eval_dataframe.index.isin(random_index)]
train_dataframe = train_eval_dataframe[train_eval_dataframe.index.isin(random_index)]

print('Dataframes are loaded.')
print(f'Train dataframe size: {len(train_dataframe)}')
print(f'Evaluate dataframe size: {len(eval_dataframe)}')
print(f'Test dataframe size: {len(test_dataframe)}')

Dataframes are loaded.
Train dataframe size: 7082
Evaluate dataframe size: 787
Test dataframe size: 983


## Define max_length

In [9]:

def define_raw_max_length_by_bert(sentences, raw_max_length):
    for sentence in sentences:
        input_ids = bert_tokenizer.encode(sentence, add_special_tokens=True)
        raw_max_length = max(raw_max_length, len(input_ids))    
    return raw_max_length

def define_max_length(raw_max_length, threshold):
    return threshold if threshold >= raw_max_length else define_max_length(raw_max_length, threshold * 2)

raw_max_length = define_raw_max_length_by_bert(train_dataframe.sentence.values, 0)
raw_max_length = define_raw_max_length_by_bert(eval_dataframe.sentence.values, raw_max_length)
raw_max_length = define_raw_max_length_by_bert(test_dataframe.sentence.values, raw_max_length)

max_length = define_max_length(raw_max_length, 1)

print('Maximum length of sentences is defined.')
print(f'Raw max_length is {raw_max_length}')
print(f'Binary-based max_length is {max_length}')

Maximum length of sentences is defined.
Raw max_length is 45
Binary-based max_length is 64


## Datasets & dataloaders creation

In [10]:

class T5TestDataset(TorchDataset):
    def __init__(self, text, tokenizer, length, device):
        self._text = text.reset_index(drop=True)
        self._tokenizer = tokenizer
        self._length = length
        self._device = device

    def __len__(self):
        return self._text.shape[0]

    def __getitem__(self, item):
        output = self._tokenize(self._text[item])
        return {k: v.reshape(-1).to(self._device) for k, v in output.items()}

    def _tokenize(self, text):
        return self._tokenizer(text,
                               return_tensors='pt',
                               padding='max_length',
                               truncation=True,
                               max_length=self._length)


class T5TrainDataset(TorchDataset):
    POS_LABEL = 'верно'
    NEG_LABEL = 'неверно'

    def __init__(self, text, label, tokenizer, length, device):
        self._text = text.reset_index(drop=True)
        self._label = label.reset_index(drop=True)
        self._tokenizer = tokenizer
        self._length = length
        self._device = device

    def __len__(self):
        return self._label.shape[0]

    def __getitem__(self, item):
        output = self._tokenize(self._text[item], self._length)
        output = {k: v.reshape(-1).to(self._device) for k, v in output.items()}

        label = self.POS_LABEL if self._label[item] == 1 else self.NEG_LABEL
        label = self._tokenize(label, length=2).input_ids.reshape(-1).to(self._device)

        output.update({'labels': label})
        return output

    def _tokenize(self, text, length):
        return self._tokenizer(text,
                               return_tensors='pt',
                               padding='max_length',
                               truncation=True,
                               max_length=length)

    
def create_bert_dataset(sentences, acceptables, max_length):
    input_ids = []
    attention_masks = []
    for sentence in sentences:
        encoded_dict = bert_tokenizer.encode_plus(
            sentence,
            add_special_tokens=True,
            max_length=max_length,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt'
        )
        input_ids.append(encoded_dict['input_ids'])
        attention_masks.append(encoded_dict['attention_mask'])
    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    acceptables = torch.tensor(acceptables)

    return TensorDataset(input_ids, attention_masks, acceptables)

test_acceptable = test_dataframe.acceptable
print(f'Positive samples: {test_acceptable.sum()} of {len(test_acceptable)} ({100.0*test_acceptable.sum()/len(test_acceptable):.2f}%)')

train_sentences = train_dataframe.sentence.values
train_acceptables = train_dataframe.acceptable.values
eval_sentences = eval_dataframe.sentence.values
eval_acceptables = eval_dataframe.acceptable.values
test_sentences = test_dataframe.sentence.values
test_acceptables = test_dataframe.acceptable.values

bert_train_dataset = create_bert_dataset(train_sentences, train_acceptables, max_length)
bert_eval_dataset = create_bert_dataset(eval_sentences, eval_acceptables, max_length)
bert_test_dataset = create_bert_dataset(test_sentences, test_acceptables, max_length)

t5_train_dataset = T5TrainDataset(train_dataframe['sentence'], train_dataframe['acceptable'], t5_tokenizer, max_length, device)
t5_eval_dataset = T5TrainDataset(eval_dataframe['sentence'], eval_dataframe['acceptable'], t5_tokenizer, max_length, device)
t5_test_dataset = T5TestDataset(test_dataframe['sentence'], t5_tokenizer, max_length, device)

bert_train_dataloader = DataLoader(
    bert_train_dataset,
    sampler=RandomSampler(bert_train_dataset),
    batch_size=conf('batch-size')
)
bert_eval_dataloader = DataLoader(
    bert_eval_dataset,
    sampler=SequentialSampler(bert_eval_dataset),
    batch_size=conf('batch-size')
)
bert_test_dataloader = DataLoader(
    bert_test_dataset,
    sampler=SequentialSampler(bert_test_dataset),
    batch_size=conf('batch-size')
)

t5_train_dataloader = DataLoader(t5_train_dataset, batch_size=conf('batch-size'), shuffle=True)
t5_eval_dataloader = DataLoader(t5_eval_dataset, batch_size=conf('batch-size'))
t5_test_dataloader = DataLoader(t5_test_dataset, batch_size=conf('batch-size'))

print('BERT & T5 dataloaders are created.')

Positive samples: 733 of 983 (74.57%)
BERT & T5 dataloaders are created.


## Schedulers creation

In [11]:

bert_scheduler = get_linear_schedule_with_warmup(
    bert_optimizer,
    num_warmup_steps=0,
    num_training_steps=len(bert_train_dataloader) * conf('train.epochs.bert')
)

t5_scheduler = get_linear_schedule_with_warmup(
    t5_optimizer,
    num_warmup_steps=0,
    num_training_steps=len(t5_train_dataloader) * conf('train.epochs.t5')
)

print('BERT & T5 schedulers are created')

BERT & T5 schedulers are created


## BERT training

In [12]:
def flat_accuracy_bert(predictions, labels):
    predictions_flat = np.argmax(predictions, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(predictions_flat == labels_flat) / len(labels_flat)

def format_time_bert(elapsed):
    return str(datetime.timedelta(seconds=int(round(elapsed))))

def log_step(step: int, offset: int, length: int, t0):
    if ((step + 1) % offset == 0 and not step == 0) or (step == length - 1):
        elapsed = format_time_bert(time.time() - t0)
        print(f'\tBatch {step+1} of {len(bert_train_dataloader)}, elapsed: {elapsed}')    

def extract_from_batch(batch, device) -> tuple:
    b_input_ids = batch[0].to(device)
    b_input_mask = batch[1].to(device)
    b_labels = batch[2].to(device)
    return b_input_ids, b_input_mask, b_labels

bert_model.cuda()

training_stats = []
total_t0 = time.time()
train_dataloader_length = len(bert_train_dataloader)
val_dataloader_length = len(bert_eval_dataloader)

epochs = conf('train.epochs.bert')
for epoch_i in range(0, epochs):
    print(f'\n======= Epoch {epoch_i + 1} / {epochs} =======\n')
    print('Training...')
    
    t0 = time.time()
    total_train_loss = 0
    bert_model.train()
        
    for step, batch in enumerate(bert_train_dataloader):
        b_input_ids, b_input_mask, b_labels = extract_from_batch(batch, device)
        bert_model.zero_grad()
        
        res = bert_model(
            b_input_ids,
            token_type_ids=None,
            attention_mask=b_input_mask,
            labels=b_labels
        )
        loss = res['loss']

        total_train_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(bert_model.parameters(), 1.0)

        bert_optimizer.step()
        bert_scheduler.step()
        
        log_step(step, 40, train_dataloader_length, t0)
    
    avg_train_loss = total_train_loss / train_dataloader_length
    train_time = format_time_bert(time.time() - t0)
    print(f'\n\tAverage training loss: {avg_train_loss}\n\tTraining epcoh took: {train_time}')

    print('\nRunning validation...')
    t0 = time.time()
    total_eval_accuracy = 0
    total_eval_loss = 0
    
    bert_model.eval()
    
    for batch in bert_eval_dataloader:
        b_input_ids, b_input_mask, b_labels = extract_from_batch(batch, device)
        
        with torch.no_grad():
            res = bert_model(
                b_input_ids,
                token_type_ids=None,
                attention_mask=b_input_mask,
                labels=b_labels
            )
        loss = res['loss']
        logits = res['logits']
        total_eval_loss += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        total_eval_accuracy += flat_accuracy_bert(logits, label_ids)
        
    avg_val_accuracy = total_eval_accuracy / val_dataloader_length
    avg_val_loss = total_eval_loss / val_dataloader_length
    val_time = format_time_bert(time.time() - t0)
    print(f'\n\tAccuracy: {avg_val_accuracy}')
    print(f'\tValidation loss: {avg_val_loss}')
    print(f'\tValidation took: {val_time}')

print('\nTrainig complete!')
print(f'Total trainig took: {format_time_bert(time.time() - total_t0)}')



Training...
	Batch 40 of 222, elapsed: 0:00:10
	Batch 80 of 222, elapsed: 0:00:18
	Batch 120 of 222, elapsed: 0:00:26
	Batch 160 of 222, elapsed: 0:00:34
	Batch 200 of 222, elapsed: 0:00:43
	Batch 222 of 222, elapsed: 0:00:47

	Average training loss: 0.5359252442916235
	Training epcoh took: 0:00:47

Running validation...

	Accuracy: 0.76875
	Validation loss: 0.5093031179904938
	Validation took: 0:00:01

Trainig complete!
Total trainig took: 0:00:49


## BERT  testing

In [13]:

bert_model.eval()
predictions, true_labels = [], []

for batch in bert_test_dataloader:
    b_input_ids, b_input_mask, b_labels = extract_from_batch(batch, device)

    with torch.no_grad():
        outputs = bert_model(
            b_input_ids,
            token_type_ids=None,
            attention_mask=b_input_mask
        )
    logits = outputs[0]
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()

    predictions.append(logits)
    true_labels.append(label_ids)

flat_predictions = np.concatenate(predictions, axis=0)
flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
flat_true_labels = np.concatenate(true_labels, axis=0)

f1 = f1_score(flat_true_labels, flat_predictions)
total_result['bert'] = f1
print(f'BERT is tested, F1-score: {f1:.3f}')

BERT is tested, F1-score: 0.867


## GPT preparation

In [14]:

def calc_gpt_loss(text):
    inputs = gpt_tokenizer.encode(text, return_tensors='pt').reshape(-1).to(device)
    with torch.no_grad():
        loss = gpt_model(input_ids=inputs, labels=inputs).loss.item()
    return loss

def shot_gpt(begin: str, text: str, positive_statement: str, negative_statement: str):
    positive_loss = calc_gpt_loss(' '.join([begin, text, positive_statement]))
    negative_loss = calc_gpt_loss(' '.join([begin, text, negative_statement]))

    return 1 if positive_loss > negative_loss else 0

gpt_model.to(device)

print('GPT preparation is done.')

GPT preparation is done.


## GPT zero shot

In [15]:

tasks = [
    {
        'begin': 'Если ли здесь ошибка?',
        'positive_statement': 'Нет.',
        'negative_statement': 'Есть.'
    },
    {
        'begin': 'Если ли здесь ошибка?',
        'positive_statement': 'Отсутствует.',
        'negative_statement': 'Присутствует.'
    },
    {
        'begin': 'Если ли здесь ошибка?',
        'positive_statement': 'Предложение правильное.',
        'negative_statement': 'Допущена ошибка.'
    },
    {
        'begin': 'Проверь на ошибки.',
        'positive_statement': 'Нет',
        'negative_statement': 'Есть.'
    },
    {
        'begin': 'Проверь на ошибки.',
        'positive_statement': 'Отсутствуют.',
        'negative_statement': 'Присутствуют.'
    }
]

best_f1 = 0.0
for task in tasks:
    print(f'Begin: {task["begin"]}, positive_statement: {task["positive_statement"]}, negative_statement: {task["negative_statement"]}')
    progress_function = lambda text: shot_gpt(task['begin'], text, task['positive_statement'], task['negative_statement'])
    y_pred = test_dataframe['sentence'].progress_apply(progress_function)
    f1 = f1_score(y_pred, test_dataframe["acceptable"])
    best_f1 = max(best_f1, f1)
    print(f'F1-score: {f1}\n')
    
total_result['gpt-zero-shot'] = best_f1
print(f'\nGPT zero-shot is done, best F1-score: {best_f1:.3f}')

Begin: Если ли здесь ошибка?, positive_statement: Нет., negative_statement: Есть.


100%|████████████████████████████████████████████████████████████████████████████████| 983/983 [00:45<00:00, 21.61it/s]


F1-score: 0.01349527665317139

Begin: Если ли здесь ошибка?, positive_statement: Отсутствует., negative_statement: Присутствует.


100%|████████████████████████████████████████████████████████████████████████████████| 983/983 [00:45<00:00, 21.60it/s]


F1-score: 0.13184079601990048

Begin: Если ли здесь ошибка?, positive_statement: Предложение правильное., negative_statement: Допущена ошибка.


100%|████████████████████████████████████████████████████████████████████████████████| 983/983 [00:45<00:00, 21.51it/s]


F1-score: 0.8543123543123543

Begin: Проверь на ошибки., positive_statement: Нет, negative_statement: Есть.


100%|████████████████████████████████████████████████████████████████████████████████| 983/983 [00:45<00:00, 21.65it/s]


F1-score: 0.14390243902439026

Begin: Проверь на ошибки., positive_statement: Отсутствуют., negative_statement: Присутствуют.


100%|████████████████████████████████████████████████████████████████████████████████| 983/983 [00:45<00:00, 21.43it/s]

F1-score: 0.19437939110070257


GPT zero-shot is done, best F1-score: 0.854





## GPT few shot (3)

In [16]:

promt = """Проверить корректность предложения:
Иван вчера не позвонил. => Верно
Лесные запахи набегали волнами; в них смешалось дыхание можжевельника, вереска, брусники. => Верно
У многих туристов, кто посещают Кемер весной, есть шанс застать снег на вершине горы Тахталы и даже сочетать пляжный отдых с горнолыжным. => Неверно
"""

y_pred = test_dataframe['sentence'].progress_apply(lambda text: shot_gpt(promt, text, ' => Верно', ' => Неверно'))
f1 = f1_score(y_pred, test_dataframe["acceptable"])
total_result['gpt-few-shot-3'] = f1
print(f'GPT few-shot(3) is done, F1-score: {f1:.3f}')


100%|████████████████████████████████████████████████████████████████████████████████| 983/983 [01:11<00:00, 13.84it/s]

GPT few-shot(3) is done, F1-score: 0.854





## GPT few shot (5)

In [17]:

promt = """Проверить корректность предложения:
Вчера президент имел неофициальную беседу с английским послом. => Верно
А ты ехай прямо к директору театров, князю Гагарину. => Неверно
Коллега так и не признал вину за катастрофу перед коллективом. => Верно
Малыш уже уверенно читает слова через мягкий знак. => Неверно
Я говорил с ним только ради Вас. => Верно
"""

y_pred = test_dataframe['sentence'].progress_apply(lambda text: shot_gpt(promt, text, ' => Верно', ' => Неверно'))
f1 = f1_score(y_pred, test_dataframe["acceptable"])
total_result['gpt-few-shot-5'] = f1
print(f'GPT few-shot(5) is done, F1-score: {f1:.3f}')

100%|████████████████████████████████████████████████████████████████████████████████| 983/983 [01:11<00:00, 13.80it/s]

GPT few-shot(5) is done, F1-score: 0.854





## GPT few shot (10)

In [18]:

promt = """Проверить корректность предложения:
Только бы он громко не закричал, когда найдет решение. => Верно
Но Коле не помог его иноверец. => Неверно
Гармоничные пропорции здания основаны на классических образцах. => Верно
Вчера мне нужно ехать на завод. => Неверно
Дело приняло дурной оборот. => Верно
Я стою поблизости от Центризбиркома и наблюдаю за тем, что происходит у входа. => Неверно
Вани не было в школе. => Верно
В качестве примеров были приведены случаи, о которых, кажется, что где-то я уже читал. => Неверно
Кожа у виска была желтой. => Верно
В эту минуту роман был прочитан. => Неверно
"""

y_pred = test_dataframe['sentence'].progress_apply(lambda text: shot_gpt(promt, text, ' => Верно', ' => Неверно'))
f1 = f1_score(y_pred, test_dataframe["acceptable"])
total_result['gpt-few-shot-10'] = f1
print(f'GPT few-shot(10) is done, F1-score: {f1:.3f}')


100%|████████████████████████████████████████████████████████████████████████████████| 983/983 [01:48<00:00,  9.05it/s]

GPT few-shot(10) is done, F1-score: 0.854





## T5 training

In [19]:

t5_model.to(device)
n_epochs = conf('train.epochs.t5')

dl_length = len(t5_train_dataloader)
for epoch in range(n_epochs):
    print(f'\n======= Epoch {epoch + 1} / {n_epochs} =======\n')

    print('Training...\n')
    t5_model.train()
    
    for batch_id, batch in enumerate(t5_train_dataloader):
        outputs = t5_model(**batch)
        loss = outputs.loss
        loss.backward()
        t5_optimizer.step()
        t5_scheduler.step()
        t5_optimizer.zero_grad()
        if (((batch_id + 1) % 10 == 0) and not batch_id == 0) or (batch_id == dl_length - 1):
            print(f'\t\tBatch {batch_id+1} of {dl_length}, loss : {loss.item():.3f}')
        
    print('Validation...')
    t5_model.eval()
    
    with torch.no_grad():
        eval_loss = [t5_model(**batch).loss.item() for batch in t5_eval_dataloader]
        
    print(f'\tValidation loss: {np.sum(eval_loss)/len(eval_loss)}')
    
print('Trainig complete!')



Training...

		Batch 10 of 222, loss : 5.728
		Batch 20 of 222, loss : 2.136
		Batch 30 of 222, loss : 1.363
		Batch 40 of 222, loss : 0.833
		Batch 50 of 222, loss : 0.582
		Batch 60 of 222, loss : 0.552
		Batch 70 of 222, loss : 0.529
		Batch 80 of 222, loss : 0.374
		Batch 90 of 222, loss : 0.644
		Batch 100 of 222, loss : 0.402
		Batch 110 of 222, loss : 0.395
		Batch 120 of 222, loss : 0.370
		Batch 130 of 222, loss : 0.294
		Batch 140 of 222, loss : 0.499
		Batch 150 of 222, loss : 0.300
		Batch 160 of 222, loss : 0.430
		Batch 170 of 222, loss : 0.437
		Batch 180 of 222, loss : 0.233
		Batch 190 of 222, loss : 0.542
		Batch 200 of 222, loss : 0.252
		Batch 210 of 222, loss : 0.401
		Batch 220 of 222, loss : 0.194
		Batch 222 of 222, loss : 0.191
Validation...
	Validation loss: 0.2917381364107132


Training...

		Batch 10 of 222, loss : 0.354
		Batch 20 of 222, loss : 0.500
		Batch 30 of 222, loss : 0.333
		Batch 40 of 222, loss : 0.327
		Batch 50 of 222, loss : 0.284
		Batch 6

## T5 testing

In [20]:

pos_label = t5_tokenizer(T5TrainDataset.POS_LABEL,
                         return_tensors='pt',
                         padding='max_length',
                         truncation=True,
                         max_length=2)['input_ids'][0][0].item()

t5_model.eval()

result = np.array([])
for batch in t5_test_dataloader:
    tokens = t5_model.generate(**batch)
    tokens = [1 if pos_label in token else 0 for token in tokens]
    result = np.hstack([result, tokens])

f1 = f1_score(result, test_dataframe["acceptable"])
total_result['t5'] = f1

print(f'T5 testing is done, F1-score: {f1:.3f}')



T5 testing is done, F1-score: 0.854


## Result

In [21]:
data = [[k, v] for k, v in total_result.items()]

df = pd.DataFrame(data, columns=['Type', 'F1'])
HTML(df.to_html(index=False))

Type,F1
bert,0.86731
gpt-zero-shot,0.854312
gpt-few-shot-3,0.854312
gpt-few-shot-5,0.854312
gpt-few-shot-10,0.854312
t5,0.854142
