In [4]:
import evaluate
import pandas as pd
import numpy as np

from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import (AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback)

DATASET_NAME = "./datasets/ru-plus.csv"
TEST_SIZE = 0.2
MODEL_NAME = "ai-forever/sbert_large_nlu_ru"
SAVE_DIRECTORY = "./models/sbert_plus_multi"
OUTPUT_LOG_NAME = "./output/sbert_plus_multi"

THRESHOLD = 0.9

df = pd.read_csv(DATASET_NAME, delimiter="|")
df.columns = ["text", "label"]
df_label_0 = df[df["label"] == 0]
df_label_1 = df[df["label"] == 1]
multi_class = []
for i in range(50):
    row_0 = df_label_0.sample(n=1, random_state=np.random.RandomState())
    row_1 = df_label_1.sample(n=1, random_state=np.random.RandomState())
    text = f"{row_0['text'].values[0]}, {row_1['text'].values[0].lower()}"
    multi_class.append({'text': text, 'label': 3})

df = pd.concat([df, pd.DataFrame(multi_class)], ignore_index=True)
df['label'] = df['label'].map({0: [1, 0, 0], 1: [0, 1, 0], 2: [0, 0, 1], 3: [1, 1, 0]})

In [5]:
train, test_valid = train_test_split(df, test_size=TEST_SIZE, shuffle=True)
train = Dataset.from_pandas(train)
test, valid = train_test_split(test_valid, test_size=0.5)
test_ds = Dataset.from_pandas(test)
valid = Dataset.from_pandas(valid)

In [6]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenized_train = train.map(tokenize_function)
tokenized_test = test_ds.map(tokenize_function)
tokenized_valid = valid.map(tokenize_function)

Map:   0%|          | 0/222 [00:00<?, ? examples/s]

Map:   0%|          | 0/28 [00:00<?, ? examples/s]

Map:   0%|          | 0/28 [00:00<?, ? examples/s]

In [7]:
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=3,
    problem_type="multi_label_classification",
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ai-forever/sbert_large_nlu_ru and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
training_args = TrainingArguments(
    output_dir=OUTPUT_LOG_NAME,
    learning_rate=1e-5,
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    save_strategy='epoch',
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=32,
    load_best_model_at_end=True,
    report_to="none"
)

In [9]:
clf_metrics = evaluate.combine(["accuracy", "f1", "precision", "recall"])

def predict(val):
    return (val > THRESHOLD).astype(int).reshape(-1)

def compute_metrics(eval_pred):
   predictions, labels = eval_pred
   return clf_metrics.compute(predictions=predict(predictions), references=labels.astype(int).reshape(-1))

In [10]:
early_stopper = EarlyStoppingCallback(early_stopping_threshold=0.0001)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_valid,
    compute_metrics=compute_metrics,
    callbacks=[early_stopper]
)
trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6117,0.520028,0.619048,0.0,0.0,0.0
2,0.4327,0.370569,0.797619,0.638298,1.0,0.46875
3,0.3036,0.269767,0.845238,0.745098,1.0,0.59375
4,0.21,0.192087,0.928571,0.9,0.964286,0.84375
5,0.1448,0.132778,0.97619,0.96875,0.96875,0.96875
6,0.0972,0.098587,0.988095,0.984127,1.0,0.96875
7,0.0695,0.087064,0.97619,0.96875,0.96875,0.96875
8,0.054,0.071353,0.97619,0.96875,0.96875,0.96875
9,0.044,0.065042,0.97619,0.96875,0.96875,0.96875
10,0.0375,0.065392,0.97619,0.96875,0.96875,0.96875


TrainOutput(global_step=140, training_loss=0.20051504501274653, metrics={'train_runtime': 87.856, 'train_samples_per_second': 80.86, 'train_steps_per_second': 5.099, 'total_flos': 517223654231040.0, 'train_loss': 0.20051504501274653, 'epoch': 10.0})

In [11]:
tokenizer.save_pretrained(SAVE_DIRECTORY)
model.save_pretrained(SAVE_DIRECTORY)

#### Проверка на тестовой выборке

In [24]:
from transformers import pipeline

MODEL_TASK = "sentiment-analysis"
classifier = pipeline(MODEL_TASK, model=model, tokenizer=tokenizer, top_k=3)
texts = test['text'].tolist()
labels = test['label'].tolist()

results = classifier(texts)

In [25]:
counter = 0
mapped_results = []

for text, label, result in zip(texts, labels, results):
    res = {el['label']: 1 if el['score'] > THRESHOLD else 0 for el in result}
    mapped = [res['LABEL_0'], res['LABEL_1'], res['LABEL_2']]
    mapped_results.append(mapped)
    if mapped != label:
        counter += 1
        print(f"Текст: {text}")
        print(f"Предсказано: {mapped}, Значение: {label}, Результат: {result}")
        print()
        
print(f"Всего ошибочно: {counter}")

Текст: Спасибо большое
Предсказано: [0, 0, 0], Значение: [0, 0, 1], Результат: [{'label': 'LABEL_0', 'score': 0.8931328058242798}, {'label': 'LABEL_2', 'score': 0.11061996221542358}, {'label': 'LABEL_1', 'score': 0.03522563353180885}]

Всего ошибочно: 1


In [26]:
clf_metrics.compute(predictions=np.array(mapped_results).reshape(-1), references=np.array(labels).reshape(-1))

{'accuracy': 0.9880952380952381,
 'f1': 0.9866666666666667,
 'precision': 1.0,
 'recall': 0.9736842105263158}