This notebook is a whole testing pipeline. At first, it trains all models and then test them keeping the results in `report` dictionary. The headline shows which tokenizer is used.

In [1]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, precision_score, recall_score
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from transformers import AutoTokenizer, BertForNextSentencePrediction, Trainer



# Loading data

In [4]:
df_train = pd.read_csv("final_complete/train_data.csv", )
df_eval = pd.read_csv("final_complete/eval_data.csv", )
df_test = pd.read_csv("final_complete/test_data.csv",sep="|")

In [5]:
bert_tokenizer = AutoTokenizer.from_pretrained("models/ru_conversational_cased_L-12_H-768_A-12_pt_v1")

In [None]:
report = {
    "model_name": [],
    "step":[],
    "precision": [],
    "recall": [],
    "f1": []
}

# Logistic regression

## with bert tokenizer

In [6]:
df_train_tokenized = df_train.copy()
df_test_tokenized = df_test.copy()

In [61]:
df_train_tokenized.premise = df_train_tokenized.premise.apply(lambda x: ' '.join(bert_tokenizer.tokenize(x)))
df_train_tokenized.hypothesis = df_train_tokenized.hypothesis.apply(lambda x: ' '.join(bert_tokenizer.tokenize(x)))

df_test_tokenized.premise = df_test_tokenized.premise.apply(lambda x: ' '.join(bert_tokenizer.tokenize(x)))
df_test_tokenized.hypothesis = df_test_tokenized.hypothesis.apply(lambda x: ' '.join(bert_tokenizer.tokenize(x)))

In [62]:
train_texts = list(zip(df_train_tokenized.premise.to_list(), df_train_tokenized.hypothesis.to_list()))
train_texts = [' '.join(x) for x in train_texts]

In [63]:
test_texts = list(zip(df_test_tokenized.premise.to_list(), df_test_tokenized.hypothesis.to_list()))
test_texts = [' '.join(x) for x in test_texts]

In [65]:
vecs = CountVectorizer(max_features=100000, 
                       min_df= 5,
                       max_df=0.9
                      ).fit(train_texts)

In [66]:
x_tr = vecs.transform(train_texts)

In [67]:
x_tr

<797701x59747 sparse matrix of type '<class 'numpy.int64'>'
	with 13447809 stored elements in Compressed Sparse Row format>

In [68]:
clf = LogisticRegression(C=1, max_iter=1000 )

In [69]:
clf.fit(x_tr, df_train_tokenized.label, )

LogisticRegression(C=1, max_iter=1000)

In [72]:
report["model_name"].append("linear_regression-bert_tokenizer")
report["step"].append("1000")
report["precision"].append(precision_score(df_test_tokenized.label, clf.predict(vecs.transform(test_texts), )))
report["recall"].append(recall_score(df_test_tokenized.label, clf.predict(vecs.transform(test_texts), )))
report["f1"].append(f1_score(df_test_tokenized.label, clf.predict(vecs.transform(test_texts), )))

## with dict_tokenizer

In [74]:
train_texts = list(zip(df_train.premise.to_list(), df_train.hypothesis.to_list()))
train_texts = [' '.join(x) for x in train_texts]

test_texts = list(zip(df_test.premise.to_list(), df_test.hypothesis.to_list()))
test_texts = [' '.join(x) for x in test_texts]

In [75]:
vecs = CountVectorizer(max_features=100000, 
                       min_df= 5,
                       max_df=0.9
                      ).fit(train_texts)

In [76]:
x_tr = vecs.transform(train_texts)

In [78]:
x_tr

<797701x100000 sparse matrix of type '<class 'numpy.int64'>'
	with 11521611 stored elements in Compressed Sparse Row format>

In [77]:
clf = LogisticRegression(C=1, max_iter=1000 )

In [79]:
clf.fit(x_tr, df_train.label, )

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression(C=1, max_iter=1000)

In [80]:
report["model_name"].append("linear_regression-dict_tokenizer")
report["step"].append("1000")
report["precision"].append(precision_score(df_test_tokenized.label, clf.predict(vecs.transform(test_texts), )))
report["recall"].append(recall_score(df_test_tokenized.label, clf.predict(vecs.transform(test_texts), )))
report["f1"].append(f1_score(df_test_tokenized.label, clf.predict(vecs.transform(test_texts), )))

# RNN

In [8]:
train_data = list(zip(df_train.premise, df_train.hypothesis, df_train.label))
test_data = list(zip(df_test.premise, df_test.hypothesis, df_test.label))

## Conf

In [14]:
train_config = {
    "alighment_len":100,
    "lr": 1e-3,
    "device":"cuda",
    "epochs":3,
    "batch_sze":32,
}

model_config = {
    "num_classes": 2,
    "embed_dim":256,
    "hidden_dim":64,
    "n_layers":2,
    "is_bidirectional":True,
    "rnn_type": "LSTM",
    "intermidiate_dim":128
}

## With bert tokenizer

### RNN

In [15]:
model = get_rnn_model(vocab_size=bert_tokenizer.vocab_size, 
                      num_classes=model_config["num_classes"], 
                      embed_dim=model_config["embed_dim"], 
                      hidden_dim=model_config["hidden_dim"], 
                      n_layers=model_config["n_layers"], 
                      is_bidirectional=model_config["is_bidirectional"], 
                      rnn_type=model_config["rnn_type"], 
                      padding_idx=bert_tokenizer.pad_token_id)

Total param size: 26067202


In [19]:
train_model(data=train_data, model=model, tokenizer=bert_tokenizer, 
            alighment_len=train_config["alighment_len"],
            device=train_config["device"], 
            epochs=train_config["epochs"], 
            learning_rate=train_config["lr"], 
            batch_size=train_config["batch_sze"], 
            is_siames=False, 
            save_path="trained_models/rnn-bert_tokenizer", divider=3)

  0%|          | 0/24929 [00:00<?, ?batch/s]

  0%|          | 0/24929 [00:00<?, ?batch/s]

  0%|          | 0/24929 [00:00<?, ?batch/s]

In [15]:
#model.load_state_dict(torch.load("trained_models/rnn-bert_tokenizer/model_66477.cpkt"))
#model = model.to("cuda")
#model.eval()
#preds, corrs = inference_model(data=test_data, model=model, tokenizer=bert_tokenizer, alighment_len=100,
#          device="cuda", batch_size=16, is_siames=False)
#sum(preds == corrs) / len(preds)

RNN(
  (embedding): Embedding(100792, 256, padding_idx=0, max_norm=1)
  (encoder): Encoder(
    (rnn): LSTM(256, 64, num_layers=4, dropout=0.1, bidirectional=True)
  )
  (attention): Attention()
  (decoder): Linear(in_features=128, out_features=2, bias=True)
)

### Siames

In [20]:
model = get_siames_model(vocab_size=bert_tokenizer.vocab_size, 
                          num_classes=model_config["num_classes"], 
                          embed_dim=model_config["embed_dim"], 
                          hidden_dim=model_config["hidden_dim"],
                         intermidiate_dim=model_config["intermidiate_dim"],
                          n_layers=model_config["n_layers"], 
                          is_bidirectional=model_config["is_bidirectional"], 
                          rnn_type=model_config["rnn_type"], 
                          padding_idx=bert_tokenizer.pad_token_id)

Total param size: 26083970


In [21]:
train_model(data=train_data, 
            model=model, 
            tokenizer=bert_tokenizer, 
            alighment_len=train_config["alighment_len"],
            device=train_config["device"], 
            epochs=train_config["epochs"], 
            learning_rate=train_config["lr"], 
            batch_size=train_config["batch_sze"], 
            is_siames=True, 
            save_path="trained_models/siames_rnn_rnn-bert_tokenizer", divider=3)

  0%|          | 0/24929 [00:00<?, ?batch/s]

  0%|          | 0/24929 [00:00<?, ?batch/s]

  0%|          | 0/24929 [00:00<?, ?batch/s]

In [59]:
#model.load_state_dict(torch.load("trained_models/siames_rnn-bert_tokenizer/model_66477.cpkt"))
#model = model.to("cuda")
#model.eval()
#preds, corrs = inference_model(data=test_data, model=model, tokenizer=bert_tokenizer, alighment_len=100,
#          device="cuda", batch_size=16, is_siames=True)
#print(sum((preds == corrs)) / len(preds))

SiamesRNN(
  (embedding): Embedding(100792, 256, padding_idx=0, max_norm=1, scale_grad_by_freq=True)
  (encoder): Encoder(
    (rnn): LSTM(256, 64, num_layers=4, dropout=0.1, bidirectional=True)
  )
  (attention): Attention()
  (decoder): Linear(in_features=128, out_features=128, bias=True)
  (classifier_head): Linear(in_features=256, out_features=2, bias=True)
)

# With usual tokenizer

In [22]:
dict_tokenozer = DictTokenizer()

In [23]:
dict_tokenozer.train(df_train.premise.to_list() + df_train.hypothesis.to_list())


### RNN

In [24]:
model = get_rnn_model(vocab_size=dict_tokenozer.vocab_size, 
                      num_classes=model_config["num_classes"], 
                      embed_dim=model_config["embed_dim"], 
                      hidden_dim=model_config["hidden_dim"], 
                      n_layers=model_config["n_layers"], 
                      is_bidirectional=model_config["is_bidirectional"], 
                      rnn_type=model_config["rnn_type"], 
                      padding_idx=dict_tokenozer.padding_idx)

Total param size: 91892226


In [25]:
train_model(data=train_data, model=model, tokenizer=dict_tokenozer, 
            alighment_len=train_config["alighment_len"],
            device=train_config["device"], 
            epochs=train_config["epochs"], 
            learning_rate=train_config["lr"], 
            batch_size=train_config["batch_sze"], 
            is_siames=False, 
            save_path="trained_models/rnn-dict_tokenizer", divider=3)

  0%|          | 0/24929 [00:00<?, ?batch/s]

  0%|          | 0/24929 [00:00<?, ?batch/s]

  0%|          | 0/24929 [00:00<?, ?batch/s]

In [53]:
#model.load_state_dict(torch.load("trained_models/rnn-dict_tokenizer/model_83096.cpkt"))
#model = model.to("cuda")
#model.eval()
#preds, corrs = inference_model(data=test_data, model=model, tokenizer=dict_tokenozer, alighment_len=50,
#          device="cuda", batch_size=16, is_siames=False)
#print(sum(preds == corrs) / len(preds))

RNN(
  (embedding): Embedding(357921, 256, padding_idx=357919, max_norm=1)
  (encoder): Encoder(
    (rnn): LSTM(256, 64, num_layers=4, dropout=0.1, bidirectional=True)
  )
  (attention): Attention()
  (decoder): Linear(in_features=128, out_features=2, bias=True)
)

### Siames

In [26]:
model = get_siames_model(vocab_size=dict_tokenozer.vocab_size, 
                          num_classes=model_config["num_classes"], 
                          embed_dim=model_config["embed_dim"], 
                          hidden_dim=model_config["hidden_dim"],
                         intermidiate_dim=model_config["intermidiate_dim"],
                          n_layers=model_config["n_layers"], 
                          is_bidirectional=model_config["is_bidirectional"], 
                          rnn_type=model_config["rnn_type"], 
                          padding_idx=dict_tokenozer.padding_idx)

Total param size: 91908994


In [27]:
train_model(data=train_data, 
            model=model, 
            tokenizer=bert_tokenizer, 
            alighment_len=train_config["alighment_len"],
            device=train_config["device"], 
            epochs=train_config["epochs"], 
            learning_rate=train_config["lr"], 
            batch_size=train_config["batch_sze"], 
            is_siames=True, 
            save_path="trained_models/siames_rnn-dict_tokenizer", divider=3)

  0%|          | 0/24929 [00:00<?, ?batch/s]

  0%|          | 0/24929 [00:00<?, ?batch/s]

  0%|          | 0/24929 [00:00<?, ?batch/s]

In [None]:
#model.load_state_dict(torch.load("trained_models/siames_rnn-dict_tokenizer/model_49858.cpkt"))
#model = model.to("cuda")
#model.eval()
#preds, corrs = inference_model(data=test_data, model=model, tokenizer=dict_tokenozer, alighment_len=100,
#          device="cuda", batch_size=16, is_siames=True)

SiamesRNN(
  (embedding): Embedding(357921, 256, padding_idx=357919, max_norm=1, scale_grad_by_freq=True)
  (encoder): Encoder(
    (rnn): LSTM(256, 64, num_layers=4, dropout=0.1, bidirectional=True)
  )
  (attention): Attention()
  (decoder): Linear(in_features=128, out_features=256, bias=True)
  (classifier_head): Linear(in_features=512, out_features=2, bias=True)
)

In [29]:
sum(preds == corrs) / len(preds)

0.5373993095512083

# SentenceBERT

In [32]:
model_st = SentenceTransformer("models/sbert_output_softmax/")

In [33]:
model = SentenceBERTClassifier(model_st, 
                               sentence_embedding_dimension=256,
                               num_labels=2
                              )

In [41]:
train_dataset = PairedTextDataset(df_train.premise.to_list(), 
                                  df_train.hypothesis.to_list(), 
                                  df_train.label.to_list())

dataloader = DataLoader(train_dataset, batch_size=8)

In [42]:
for param in model.sent_bert_model.parameters():
    param.requires_grad = False

In [43]:
epochs = 3
lr = 10e-5
divider = 3
save_path = "trained_models/sbert-sent_embeddings_256-3_epochs_1"

In [44]:
loss_function = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=lr , )
model.train()
model = model.to("cuda")
loss_avg = []
save_counter = 0
already_saved = 0
if save_path is not None:
    if not isinstance(save_path, Path):
        save_path = Path(save_path)
    if not save_path.exists():
        os.mkdir(save_path)
    save_after = len(dataloader) // divider
for e in range(epochs):
    with tqdm(dataloader, unit="batch") as tepoch:
        for batch in tepoch:
            tepoch.set_description(f"Epoch {e}")
            batch["label"] = batch["label"].to("cuda")
            model.zero_grad()
            score = model(batch["text_pair"])
            loss = loss_function(score, batch["label"].squeeze())
            loss.backward()
            optimizer.step()
            loss_avg.append(loss.item())
            tepoch.set_postfix(loss=np.mean(loss_avg), )
            save_counter += 1
            if save_counter > save_after:
                torch.save(model.state_dict(), save_path / f"model_{save_counter + save_after*already_saved}.cpkt")
                save_counter = 0
                already_saved += 1
torch.save(model.state_dict(), save_path / f"model_final.cpkt")

  0%|          | 0/99713 [00:00<?, ?batch/s]

  0%|          | 0/99713 [00:00<?, ?batch/s]

  0%|          | 0/99713 [00:00<?, ?batch/s]

# Testing

In [104]:
base = Path("models/trained_models/")

all_corrects = []
all_predictions = {}
for model_dir in os.listdir(base):
    model_base = base/model_dir
    for model_checkpoint in os.listdir(model_base):
        if ("final" not in model_checkpoint) and ("15000" not in model_checkpoint):
            continue
        if model_checkpoint == ".ipynb_checkpoints":
            continue
        if "rnn" in model_dir:
            if "dict_tokenizer" in model_dir:
                tokenizer = dict_tokenozer
                padding_idx = dict_tokenozer.padding_idx
            else:
                tokenizer = bert_tokenizer
                padding_idx = bert_tokenizer.pad_token_id
            if "siames" in model_dir:
                model = get_siames_model(vocab_size=tokenizer.vocab_size, 
                                          num_classes=model_config["num_classes"], 
                                          embed_dim=model_config["embed_dim"], 
                                          hidden_dim=model_config["hidden_dim"],
                                         intermidiate_dim=model_config["intermidiate_dim"],
                                          n_layers=model_config["n_layers"], 
                                          is_bidirectional=model_config["is_bidirectional"], 
                                          rnn_type=model_config["rnn_type"], 
                                          padding_idx=padding_idx)
                is_siames = True
            else:
                model = get_rnn_model(vocab_size=tokenizer.vocab_size, 
                                      num_classes=model_config["num_classes"], 
                                      embed_dim=model_config["embed_dim"], 
                                      hidden_dim=model_config["hidden_dim"], 
                                      n_layers=model_config["n_layers"], 
                                      is_bidirectional=model_config["is_bidirectional"], 
                                      rnn_type=model_config["rnn_type"], 
                                      padding_idx=padding_idx)
                is_siames = False

            model.load_state_dict(torch.load(model_base/model_checkpoint))
            model = model.to("cuda")
            model.eval()

            predictions, corrects = inference_model(data=test_data, model=model, tokenizer=tokenizer, alighment_len=50,
                      device="cuda", batch_size=16, is_siames=is_siames)
        elif "sbert" in model_dir:
            model = SentenceBERTClassifier(model_st, 
                                           sentence_embedding_dimension=256,
                                           num_labels=2
                                          )
            model.load_state_dict(torch.load(model_base/model_checkpoint))
            model = model.to("cuda")
            model.eval()
            predictions = inference_sbert(df_test, model)
            corrects = df_test.label.to_list()
        else:
            model = BertForNextSentencePrediction.from_pretrained(model_base/model_checkpoint)
            tokenized_texts_ts = bert_tokenizer(df_test.premise.to_list(), 
                                                df_test.hypothesis.to_list(), return_tensors='pt',
                                truncation=True, max_length=512, padding = 'max_length',)
            test_dataset = TextsLabelsDataset(tokenized_texts_ts, df_test.label.to_list())
            trainer = Trainer(model=model,)
            test_predictions = trainer.predict(test_dataset)
            predictions = np.argmax(test_predictions[0], axis=1)
            corrects = df_test.label.to_list()
        if "fine_tuned" in model_dir:
            model_checkpoint = model_checkpoint.split("-")[1]
        else:
            model_checkpoint = model_checkpoint.split("_")[1].split(".")[0]
        report["model_name"].append(model_dir)
        report["step"].append(model_checkpoint)
        report["precision"].append(precision_score(corrects, predictions))
        report["recall"].append(recall_score(corrects, predictions))
        report["f1"].append(f1_score(corrects, predictions))
        all_corrects.append((model_dir, predictions))
        print(f"model:{report['model_name'][-1]}, pre:{report['precision'][-1]}, rec:{report['recall'][-1]}, f1:{report['f1'][-1]}")

model_8310.cpkt True
model_16619.cpkt True
model_24928.cpkt True
model_33237.cpkt True
model_41546.cpkt True
model_49855.cpkt True
model_58164.cpkt True
model_66473.cpkt True
model_final.cpkt True
Total param size: 26067202


  0%|          | 0/544 [00:00<?, ?batch/s]

model:rnn-bert_tokenizer, pre:0.5602196939976462, rec:0.7145359019264448, f1:0.6280373831775702
model_8310.cpkt True
model_16619.cpkt True
model_24928.cpkt True
model_33237.cpkt True
model_41546.cpkt True
model_49855.cpkt True
model_58164.cpkt True
model_66473.cpkt True
model_final.cpkt True
Total param size: 26083970


  0%|          | 0/544 [00:00<?, ?batch/s]

model:siames_rnn_rnn-bert_tokenizer, pre:0.6598812553011026, rec:0.38929196897673257, f1:0.48969315499606614
model_8310.cpkt True
model_16619.cpkt True
model_24928.cpkt True
model_33237.cpkt True
model_41546.cpkt True
model_49855.cpkt True
model_58164.cpkt True
model_66473.cpkt True
model_final.cpkt True
Total param size: 91892226


  0%|          | 0/544 [00:00<?, ?batch/s]

model:rnn-dict_tokenizer, pre:0.6025316455696202, rec:0.4763572679509632, f1:0.5320665083135392
model_8310.cpkt True
model_16619.cpkt True
model_24928.cpkt True
model_33237.cpkt True
model_41546.cpkt True
model_49855.cpkt True
model_58164.cpkt True
model_66473.cpkt True
model_final.cpkt True
Total param size: 91908994


  0%|          | 0/544 [00:00<?, ?batch/s]

model:siames_rnn-dict_tokenizer, pre:0.408, rec:0.06379784838628971, f1:0.11034184335785374
model_33238.cpkt True
model_66475.cpkt True
model_99712.cpkt True
model_132949.cpkt True
model_166186.cpkt True
model_199423.cpkt True
model_232660.cpkt True
model_265897.cpkt True
model_final.cpkt True


  0%|          | 0/272 [00:00<?, ?batch/s]

loading configuration file models/trained_models/results_fine_tuned_ru_conversational_bert/checkpoint-150000/config.json
Model config BertConfig {
  "_name_or_path": "ru_conversational_cased_L-12_H-768_A-12_pt_v1",
  "architectures": [
    "BertForNextSentencePrediction"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 1

model:sbert-sent_embeddings_256-3_epochs, pre:0.7869053457577244, rec:0.8028521391043283, f1:0.794798761609907
checkpoint-135000 True
checkpoint-150000 True


All model checkpoint weights were used when initializing BertForNextSentencePrediction.

All the weights of BertForNextSentencePrediction were initialized from the model checkpoint at models/trained_models/results_fine_tuned_ru_conversational_bert/checkpoint-150000.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForNextSentencePrediction for predictions without further training.
No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running Prediction *****
  Num examples = 8690
  Batch size = 8


model:results_fine_tuned_ru_conversational_bert, pre:0.8220277169948942, rec:0.8458844133099825, f1:0.8337854500616523


In [83]:
df = pd.DataFrame(report)

In [114]:
for m_name, preds in all_corrects:
    df_test[m_name] = preds == df_test.label

In [120]:
model_names = [x[0] for x in all_corrects]

In [128]:
pd.options.display.float_format = '{:,.3f}'.format
print(df_test.groupby("chat")[model_names].agg(lambda x: sum(x) / len(x)).style.to_latex())

\begin{tabular}{lrrrrrr}
 & rnn-bert_tokenizer & siames_rnn_rnn-bert_tokenizer & rnn-dict_tokenizer & siames_rnn-dict_tokenizer & sbert-sent_embeddings_256-3_epochs & results_fine_tuned_ru_conversational_bert \\
chat &  &  &  &  &  &  \\
balichat_woman & 0.694686 & 0.687923 & 0.658937 & 0.579710 & 0.857005 & 0.883092 \\
borussia_chat & 0.584541 & 0.627053 & 0.604831 & 0.568116 & 0.798068 & 0.821256 \\
chat_suicidnikov & 0.538164 & 0.575845 & 0.578744 & 0.533333 & 0.783575 & 0.826087 \\
cotedazurchat & 0.574879 & 0.628019 & 0.588406 & 0.536232 & 0.762319 & 0.793237 \\
easypeasycodechat & 0.702213 & 0.519115 & 0.555332 & 0.277666 & 0.859155 & 0.885312 \\
openwrt_ru & 0.555907 & 0.620253 & 0.566456 & 0.527426 & 0.787975 & 0.835443 \\
orange_sosedi & 0.625121 & 0.647343 & 0.657971 & 0.533333 & 0.817391 & 0.849275 \\
sling38 & 0.685990 & 0.655072 & 0.667633 & 0.512077 & 0.841546 & 0.889855 \\
terrariaphone & 0.577778 & 0.624155 & 0.618357 & 0.543961 & 0.800966 & 0.840580 \\
\end{tabular}



In [130]:
df_result = df_test.groupby("chat")[model_names].agg(lambda x: sum(x) / len(x))

In [134]:
print(df_result.style.format( precision=3).to_latex())

\begin{tabular}{lrrrrrr}
 & rnn-bert_tokenizer & siames_rnn_rnn-bert_tokenizer & rnn-dict_tokenizer & siames_rnn-dict_tokenizer & sbert-sent_embeddings_256-3_epochs & results_fine_tuned_ru_conversational_bert \\
chat &  &  &  &  &  &  \\
balichat_woman & 0.695 & 0.688 & 0.659 & 0.580 & 0.857 & 0.883 \\
borussia_chat & 0.585 & 0.627 & 0.605 & 0.568 & 0.798 & 0.821 \\
chat_suicidnikov & 0.538 & 0.576 & 0.579 & 0.533 & 0.784 & 0.826 \\
cotedazurchat & 0.575 & 0.628 & 0.588 & 0.536 & 0.762 & 0.793 \\
easypeasycodechat & 0.702 & 0.519 & 0.555 & 0.278 & 0.859 & 0.885 \\
openwrt_ru & 0.556 & 0.620 & 0.566 & 0.527 & 0.788 & 0.835 \\
orange_sosedi & 0.625 & 0.647 & 0.658 & 0.533 & 0.817 & 0.849 \\
sling38 & 0.686 & 0.655 & 0.668 & 0.512 & 0.842 & 0.890 \\
terrariaphone & 0.578 & 0.624 & 0.618 & 0.544 & 0.801 & 0.841 \\
\end{tabular}



In [98]:
print(df.groupby(["model_name", ])[[ 'f1', "step"]].agg("max"))

                                                 f1    step
model_name                                                 
linear_regression-bert_tokenizer           0.501125    1000
linear_regression-dict_tokenizer           0.500118    1000
results_fine_tuned_ru_conversational_bert  0.833785  150000
rnn-bert_tokenizer                         0.628037   final
rnn-dict_tokenizer                         0.566009   final
sbert-sent_embeddings_256-3_epochs         0.797450   final
siames_rnn-dict_tokenizer                  0.505583   final
siames_rnn_rnn-bert_tokenizer              0.539064   final
