In [None]:
import os
if 'ozom671games.zip' not in os.listdir():
    from google.colab import drive
    drive.mount('/content/drive')
    !pip install transformers
    !pip install sentencepiece
    !pip install bitsandbytes
    !cp drive/MyDrive/ozon/ozom671games.zip ozom671games.zip
    !unzip ozom671games.zip

In [None]:
import pandas as pd
import json
from tqdm.notebook import tqdm
import numpy as np
from scipy.spatial.distance import cosine, euclidean
from sklearn.metrics import pairwise_distances
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import torch

In [None]:
import random
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(228)

In [None]:
train = pd.read_parquet('train_data.parquet')
target = pd.read_parquet('train_pairs.parquet')

In [None]:
# json.loads(data['categories_1'][0])['3']

In [None]:
ltr = len(target)

In [None]:
test = pd.read_parquet('test_data.parquet')
test_target = pd.read_parquet('test_pairs_wo_target.parquet')

In [None]:
target = pd.concat([target, test_target]).reset_index(drop = True)

In [None]:
data = pd.concat([train, test[~test.variantid.isin(train['variantid'].unique())] ])

In [None]:
data = target.merge(data[['name', 'variantid', 'categories', 'color_parsed']], right_on = 'variantid', left_on = 'variantid1', how = 'left').merge(
    data[['name', 'variantid', 'categories', 'color_parsed']], right_on = 'variantid', left_on = 'variantid2', how = 'left', suffixes=('_1', '_2'))

In [None]:
from transformers import AdamW, AutoConfig, AutoModel, AutoTokenizer, get_cosine_schedule_with_warmup
import numpy as np
import torch
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

In [None]:
data['cat_3'] = [json.loads(x)['3'] for x in data['categories_1']]
data['tmp'] = data['target'].astype('str') + '_' + data['cat_3']

def standart_split(data, target, n_splits = 5):
    split_list = []
    kf = StratifiedKFold(n_splits = n_splits, shuffle = True, random_state = 228)
    for train_index, test_index in kf.split(data.loc[:ltr-1,:], data['tmp'][:ltr]) :
        split_list += [(train_index, test_index)]
    return split_list

split_list = standart_split(data, 'target')



In [None]:
import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F

from torch.optim import Adam, SGD, AdamW
from torch.utils.data import DataLoader, Dataset

In [None]:
class TrainDataset(Dataset):
    def __init__(self, df, tokenizer, max_len, shuffle = 0):
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.texts_1 = df['name_1'].values
        self.texts_2 = df['name_2'].values
        self.cats_1 = df['color_parsed_1'].values
        self.cats_2 = df['color_parsed_2'].values
        self.target = df['target'].values
        self.shuffle = shuffle

    def __len__(self):
        return len(self.texts_1)

    def __getitem__(self, item):
        inputs = [self.texts_1[item], self.texts_2[item]]
        # color_1 = self.cats_1[item]
        # if color_1 is not None:
        #     color_1 = 'Цвета ' + ' '.join(color_1) + '.'
        # else:
        #     color_1 = 'Цвета нет.'
        # color_2 = self.cats_2[item]
        # if color_2 is not None:
        #     color_2 = 'Цвета ' + ' '.join(color_2) + '.'
        # else:
        #     color_2 = 'Цвета нет.'
        if self.shuffle == 1:
            text_1 = self.texts_2[item] 
            text_2 = self.texts_1[item]
        elif self.shuffle == 2:
            if random.random() > 0.5:
                text_1 = self.texts_2[item] 
                text_2 = self.texts_1[item]
            else :
                text_1 = self.texts_1[item] 
                text_2 = self.texts_2[item]
        else:
            text_1 = self.texts_1[item]
            text_2 = self.texts_2[item]

        # json.loads(self.cats_2[item])['3'] + ". " + json.loads(self.cats_2[item])['4'] + ". " + self.texts_2[item]
        # drp = set(text_1.split())
        # text_2 = ' '.join([x for x in text_2.split() if x not in drp])
        token_inputs = tokenizer.encode_plus(
                    text_1,
                    text_2,
                    add_special_tokens=True,
                    max_length=self.max_len,
                    padding="do_not_pad",
                    truncation=True,
                )

        label = self.target[item]
        return token_inputs, label

In [None]:
class CustomModel(nn.Module):
    def __init__(self, model, fc_dropout = [0.3], nn_dp = 0.1, lns = 1e-07, config_path=None, pretrained=False):
        super().__init__()
        # self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(model)
        else:
            self.config = torch.load(config_path)

        self.config.update(
            {
                'hidden_dropout_prob': nn_dp,
                "output_hidden_states": True,
                'layer_norm_eps': lns,
                # "add_pooling_layer": True,
                # "use_cache" : False,
                "num_labels": 2,
            }
        )

        if pretrained:
            self.model = AutoModel.from_pretrained(model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        

        self.num_dropout = len(fc_dropout)
        self.fc_dropout0 = nn.Dropout(fc_dropout[0])
        self.fc_dropout1 = nn.Dropout(fc_dropout[1] if len(fc_dropout) > 1 else 0)
        self.fc_dropout2 = nn.Dropout(fc_dropout[2] if len(fc_dropout) > 2 else 0)
        self.fc_dropout3 = nn.Dropout(fc_dropout[3] if len(fc_dropout) > 3 else 0)
        self.fc_dropout4 = nn.Dropout(fc_dropout[4] if len(fc_dropout) > 4 else 0)

        self.fc = nn.Linear(self.config.hidden_size, 1)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature_1(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0][:,0,:].squeeze(1)
        return last_hidden_states

    def feature_2(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        attention_mask = inputs['attention_mask']
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_states.size()).float()
        sum_embeddings = torch.sum(last_hidden_states * input_mask_expanded, 1)
        sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
        return sum_embeddings / sum_mask

    def forward(self, inputs, type_ = 1):
        if type_ == 1:
            feature = self.feature_1(inputs)
        else:
            feature = self.feature_2(inputs)

        output_list = []
        output0 = self.fc(self.fc_dropout0(feature))
        output1 = self.fc(self.fc_dropout1(feature))
        output2 = self.fc(self.fc_dropout2(feature))
        output3 = self.fc(self.fc_dropout3(feature))
        output4 = self.fc(self.fc_dropout4(feature))
        
        output_list = [output0, output1, output2, output3, output4]
        return output_list[:self.num_dropout]

In [None]:
def make_predict(model, valid_dataloader, criterion, epoch, valid_target, type_):
    preds = []
    model.eval()
    len_loader = len(valid_dataloader)
    tk0 = tqdm(enumerate(valid_dataloader), total = len_loader)
    average_loss = 0
    with torch.no_grad():
        for batch_number,  (inputs, labels)  in tk0:
            for k, v in inputs.items():
                inputs[k] = v.cuda()
            labels = labels.cuda()

            with torch.cuda.amp.autocast():
                y_preds_list  = model(inputs, type_)
                loss_list = [criterion(pred[:, 0], labels) for pred in y_preds_list]
                loss = sum(loss_list) / len(loss_list)
                
            y_preds = sum(y_preds_list) / len(y_preds_list)

            average_loss += loss.cpu().detach().numpy()
            tk0.set_postfix(loss=average_loss / (batch_number + 1), stage="validation", epoch = epoch)
            preds += [y_preds.sigmoid().to('cpu').numpy()]
    preds = np.concatenate(preds)[:, 0]
    return preds

In [None]:
class Collate:
    def __init__(self, tokenizer, is_train = True):
        self.tokenizer = tokenizer
        self.is_train = is_train
    def __call__(self, batch):

        inputs = [sample[0] for sample in batch]
        labels = [sample[1] for sample in batch]
        # print(inputs[0])

        batch_max = max([len(ids['input_ids']) for ids in inputs])

        inputs_dict = dict()
        inputs_dict["attention_mask"] = [s['attention_mask'] + (batch_max - len(s['attention_mask'])) * [0] for s in inputs]
        inputs_dict["input_ids"] = [s['input_ids'] + (batch_max - len(s['attention_mask'])) * [0] for s in inputs]

        inputs_dict["attention_mask"] = torch.tensor(inputs_dict["attention_mask"], dtype=torch.long)
        inputs_dict["input_ids"] = torch.tensor(inputs_dict["input_ids"], dtype=torch.long)

        labels = torch.tensor(labels, dtype=torch.float)

        return inputs_dict, labels

In [None]:
def standart_split(data, target, ltr, n_splits = 5):
    split_list = []
    kf = GroupKFold(n_splits = n_splits)
    for train_index, test_index in kf.split(data.loc[:ltr-1,:], data['target'][:ltr], data['variantid1'][:ltr]) :
        # test_index = data.loc[test_index][data.loc[test_index, 'type'] == 1].index
        split_list += [(train_index, test_index)]
    return split_list

split_list = standart_split(data, 'target', ltr)

In [None]:
batch_size = 48
max_val_len = 512

params_valid = {'batch_size': batch_size, 'shuffle': False, 'drop_last': False, 'num_workers': 4}

dict_predicts = {}

dict_model_name = {3 : "DeepPavlov/rubert-base-cased", 4 : "DeepPavlov/rubert-base-cased-conversational", 5 : "DeepPavlov/rubert-base-cased",
                  7: "ai-forever/ruBert-base",  8: "cointegrated/LaBSE-en-ru", 9 : "cointegrated/LaBSE-en-ru" }

model_type = {3:1, 4:1, 5:1, 7:1, 8:2, 9:2}
for num_model in [3,4,5,7,8,9]:

    model_name = dict_model_name[num_model]
    model = CustomModel(model_name, [0],pretrained = True).cuda()
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    if num_model != 5:
        list_names_models = [f'model_ozon_{num_model}_fold']
    else:
        list_names_models = [f'model_ozon_{num_model}_fold_{epoch}' for epoch in [2, 3]]

    for name_model in list_names_models:
        criterion = nn.BCEWithLogitsLoss()

        list_predict_0 = []
        train_predict_0 = np.zeros(ltr)

        list_predict_1 = []
        train_predict_1 = np.zeros(ltr)

        for fold in np.arange(5):
            name_model_tmp = name_model.replace('fold', str(fold))
            valid_df = data.loc[split_list[fold][1]].reset_index(drop=True)
            model_dict = torch.load(f"drive/MyDrive/{name_model_tmp}_best.pt", map_location='cuda')
            model.load_state_dict(model_dict)

            collate_fn_val = Collate(tokenizer, False)
            valid_dataloader_0  = DataLoader( TrainDataset(valid_df.reset_index(drop=True), tokenizer, 
                                max_val_len, 0),  collate_fn  = collate_fn_val, **params_valid)
            test_dataloader_0 = DataLoader( TrainDataset(data.loc[ltr:].reset_index(drop=True), tokenizer, 
                                max_val_len, 0),  collate_fn  = collate_fn_val, **params_valid)
            
            # train_predict_0[split_list[fold][1]] =  make_predict(model, valid_dataloader_0, criterion, 1, data.loc[split_list[0][1], 'target'], model_type[num_model]) 
            list_predict_0 +=  [ make_predict(model, test_dataloader_0, criterion, 1, data.loc[ltr:, 'target'], model_type[num_model]) ]

    
            valid_dataloader_1  = DataLoader( TrainDataset(valid_df.reset_index(drop=True), tokenizer, 
                                max_val_len, 1),  collate_fn  = collate_fn_val, **params_valid)
            test_dataloader_1 = DataLoader( TrainDataset(data.loc[ltr:].reset_index(drop=True), tokenizer, 
                                max_val_len, 1),  collate_fn  = collate_fn_val, **params_valid)
            
            # train_predict_1[split_list[fold][1]] =  make_predict(model, valid_dataloader_1, criterion, 1, data.loc[split_list[0][1], 'target'], model_type[num_model]) 
            list_predict_1 +=  [ make_predict(model, test_dataloader_1, criterion, 1, data.loc[ltr:, 'target'], model_type[num_model]) ]

            print(roc_auc_score(data['target'][split_list[fold][1]], train_predict_0[split_list[fold][1]]))

        dict_predicts[name_model] = [list_predict_0, list_predict_1, train_predict_0, train_predict_1]

Some weights of the model checkpoint at DeepPavlov/rubert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9251515320817685


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9266368576806214


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9260103315482586


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9277574603411661


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9269768549836226


Some weights of the model checkpoint at DeepPavlov/rubert-base-cased-conversational were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.925410375497127


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9269935208734196


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9255944517915704


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9275653583430695


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9269938919890788


Some weights of the model checkpoint at DeepPavlov/rubert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9270003026300763


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9283930815544355


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9270545254864975


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9287923514297772


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9287902863644528


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9258020544679715


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9273983455576748


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9264013579030013


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9279452526526897


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9278852970402226


Some weights of the model checkpoint at ai-forever/ruBert-base were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.925892375389318


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9271105718402483


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9262726133073077


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.928043475500552


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9274373883924363


Some weights of the model checkpoint at cointegrated/LaBSE-en-ru were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)okenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/521k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9279480268424003


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.929667132666922


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9289497568948111


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9304328633430186


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9295890952639589


Some weights of the model checkpoint at cointegrated/LaBSE-en-ru were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9289422378030987


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.930540330574491


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.929111401194118


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9310026985990814


  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1278 [00:00<?, ?it/s]

  0%|          | 0/377 [00:00<?, ?it/s]

0.9304411527654723


In [None]:
import pickle
with open('drive/MyDrive/preds_train_final.pickle', 'wb') as f:
    pickle.dump(dict_predicts, f)