In [None]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Import Libraries

In [None]:
import os

import pandas as pd
from torch.utils.data import DataLoader
from transformers import AutoTokenizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from sklearn.metrics import f1_score

# from dataset import *
# from model import *
# from trainer import Trainer

torch.manual_seed(51)

<torch._C.Generator at 0x791c0e41e890>

In [None]:
PATH = "../"
MAX_LEN = 128
BATCH_SIZE = 512

In [None]:
import torch
from torch.utils.data import Dataset


class FiveDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_seq_len):
        self.data = dataframe
        self.text = dataframe['text'].tolist()
        self.targets = None
        if 'rate' in dataframe:
            self.targets = dataframe['rate'].tolist()
        self.tokenizer = tokenizer
        self.max_seq_len = max_seq_len

    def __getitem__(self, index):
        text = str(self.text[index])
        text = ' '.join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_seq_len,
            pad_to_max_length=True,
            return_token_type_ids=True,
            truncation=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']

        if self.targets is not None:
            return {
                'ids': torch.tensor(ids, dtype=torch.long),
                'mask': torch.tensor(mask, dtype=torch.long),
                'targets': torch.tensor(self.targets[index], dtype=torch.long)
            }
        else:
            return {
                'ids': torch.tensor(ids, dtype=torch.long),
                'mask': torch.tensor(mask, dtype=torch.long),
            }

    def __len__(self) -> int:
        return len(self.text)

import torch
import random
from torch.utils.data import Dataset
from typing import Optional, Dict, List


import torch
import random
from torch.utils.data import Dataset
from typing import Optional, Dict


class FiveDataset2(Dataset):
    def __init__(
        self,
        dataframe,
        tokenizer,
        max_seq_len: int,
        is_train: bool = False,
        aug_config: Optional[Dict] = None
    ):
        self.data = dataframe
        self.text = dataframe['text'].tolist()
        self.targets = None
        if 'rate' in dataframe:
            self.targets = dataframe['rate'].tolist()
        self.tokenizer = tokenizer
        self.max_seq_len = max_seq_len
        self.is_train = is_train
        self.aug_config = aug_config or {}

        # Инициализация параметров аугментации
        self._init_augmentation_params()

    def _init_augmentation_params(self):
        self.aug_prob = self.aug_config.get('aug_prob', 0.3)
        self.methods = self.aug_config.get('methods', [])
        self.synonyms = self.aug_config.get('synonyms', {})
        self.char_swap_map = {
            'а': ['a', '@'],
            'о': ['o', '0'],
            'е': ['e', 'ё'],
            'и': ['u', 'i'],
            'с': ['c', 's']
        }

    def __getitem__(self, index):
        text = str(self.text[index])
        text = ' '.join(text.split())

        if self.is_train and self.methods:
            text = self._apply_augmentation(text)

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_seq_len,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True
        )

        item = {
            'ids': torch.tensor(inputs['input_ids'], dtype=torch.long),
            'mask': torch.tensor(inputs['attention_mask'], dtype=torch.long)
        }

        if self.targets is not None:
            item['targets'] = torch.tensor(self.targets[index], dtype=torch.long)

        return item

    # Методы аугментации остаются без изменений
    def _apply_augmentation(self, text: str) -> str:
        if random.random() > self.aug_prob:
            return text

        for method in self.methods:
            if method == 'synonym' and self.synonyms:
                text = self._replace_synonyms(text)
            elif method == 'shuffle':
                text = self._shuffle_words(text)
            elif method == 'typo':
                text = self._add_typos(text)
            elif method == 'dropout':
                text = self._random_dropout(text)

        return text

    def __len__(self) -> int:
        return len(self.text)

In [None]:
from typing import Dict

import torch
from transformers import AutoModel


class ModelForClassification(torch.nn.Module):

    def __init__(self, model_path: str, config: Dict):
        super(ModelForClassification, self).__init__()
        self.model_name = model_path
        self.config = config
        self.n_classes = config['num_classes']
        self.dropout_rate = config['dropout_rate']
        self.bert = AutoModel.from_pretrained(self.model_name)
        self.pre_classifier = torch.nn.Linear(312, 768)
        self.dropout = torch.nn.Dropout(self.dropout_rate)
        self.classifier = torch.nn.Linear(768, self.n_classes)

    def forward(self, input_ids, attention_mask,):
        output = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        hidden_state = output[0]
        hidden_state = hidden_state[:, 0]
        hidden_state = self.pre_classifier(hidden_state)
        hidden_state = torch.nn.ReLU()(hidden_state)
        hidden_state = self.dropout(hidden_state)
        output = self.classifier(hidden_state)
        return output

class ModelForClassification2(torch.nn.Module):
    def __init__(self, model_path: str, config: Dict):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_path)
        bert_output_dim = self.bert.config.hidden_size

        self.pre_classifier = torch.nn.Linear(bert_output_dim, bert_output_dim)
        self.layer_norm = torch.nn.LayerNorm(bert_output_dim)
        self.dropout = torch.nn.Dropout(config['dropout_rate'])
        self.additional_fc = torch.nn.Linear(bert_output_dim, bert_output_dim)
        self.classifier = torch.nn.Linear(bert_output_dim, config['num_classes'])

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled = output.last_hidden_state[:, 0]

        pooled = self.pre_classifier(pooled)
        pooled = self.layer_norm(pooled)
        pooled = torch.nn.GELU()(pooled)
        pooled = self.dropout(pooled)

        pooled = self.additional_fc(pooled)
        pooled = torch.nn.GELU()(pooled)
        pooled = self.dropout(pooled)

        return self.classifier(pooled)

In [None]:
from typing import Dict

import torch
from numpy import asarray
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from tqdm.notebook import tqdm


class Trainer:
    def __init__(self, config: Dict):
        self.config = config
        self.n_epochs = config['n_epochs']
        self.optimizer = None
        self.opt_fn = lambda model: Adam(model.parameters(), config['lr'])
        self.model = None
        self.history = None
        self.loss_fn = CrossEntropyLoss()
        self.device = config['device']
        self.verbose = config.get('verbose', True)

    def fit(self, model, train_dataloader, val_dataloader):
        self.model = model.to(self.device)
        self.optimizer = self.opt_fn(model)
        self.history = {
            'train_loss': [],
            'val_loss': [],
            'val_acc': []
        }

        for epoch in range(self.n_epochs):
            print(f"Epoch {epoch + 1}/{self.n_epochs}")
            train_info = self.train_epoch(train_dataloader)
            val_info = self.val_epoch(val_dataloader)
            self.history['train_loss'].extend(train_info['loss'])
            self.history['val_loss'].extend([val_info['loss']])
            self.history['val_acc'].extend([val_info['acc']])
        return self.model.eval()

    def train_epoch(self, train_dataloader):
        self.model.train()
        losses = []
        if self.verbose:
            train_dataloader = tqdm(train_dataloader)
        for batch in train_dataloader:
            ids = batch['ids'].to(self.device, dtype=torch.long)
            mask = batch['mask'].to(self.device, dtype=torch.long)
            targets = batch['targets'].to(self.device, dtype=torch.long)

            outputs = self.model(ids, mask)
            loss = self.loss_fn(outputs, targets)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            loss_val = loss.item()
            if self.verbose:
                train_dataloader.set_description(f"Loss={loss_val:.3}")
            losses.append(loss_val)
        return {'loss': losses}

    def val_epoch(self, val_dataloader):
        self.model.eval()
        all_logits = []
        all_labels = []
        if self.verbose:
            val_dataloader = tqdm(val_dataloader)
        with torch.no_grad():
            for batch in val_dataloader:
                ids = batch['ids'].to(self.device, dtype=torch.long)
                mask = batch['mask'].to(self.device, dtype=torch.long)
                targets = batch['targets'].to(self.device, dtype=torch.long)
                outputs = self.model(ids, mask)
                all_logits.append(outputs)
                all_labels.append(targets)
        all_labels = torch.cat(all_labels).to(self.device)
        all_logits = torch.cat(all_logits).to(self.device)
        loss = self.loss_fn(all_logits, all_labels).item()
        acc = (all_logits.argmax(1) == all_labels).float().mean().item()
        print(acc)
        if self.verbose:
            val_dataloader.set_description(f"Loss={loss:.3}; Acc:{acc:.3}")
        return {
            'acc': acc,
            'loss': loss
        }

    def predict(self, test_dataloader):
        if not self.model:
            raise RuntimeError("You should train the model first")
        self.model.eval()
        predictions = []
        with torch.no_grad():
            for batch in test_dataloader:
                ids = batch['ids'].to(self.device, dtype=torch.long)
                mask = batch['mask'].to(self.device, dtype=torch.long)
                outputs = self.model(ids, mask)
                predictions.extend(outputs.argmax(1).tolist())
        return asarray(predictions)

    def save(self, path: str):
        if self.model is None:
            raise RuntimeError("You should train the model first")
        checkpoint = {
            "config": self.model.config,
            "trainer_config": self.config,
            "model_name": self.model.model_name,
            "model_state_dict": self.model.state_dict()
        }
        torch.save(checkpoint, path)

    @classmethod
    def load(cls, path: str):
        ckpt = torch.load(path)
        keys = ["config", "trainer_config", "model_state_dict"]
        for key in keys:
            if key not in ckpt:
                raise RuntimeError(f"Missing key {key} in checkpoint")
        new_model = ModelForClassification(
            ckpt['model_name'],
            ckpt["config"]
        )
        new_model.load_state_dict(ckpt["model_state_dict"])
        new_trainer = cls(ckpt["trainer_config"])
        new_trainer.model = new_model
        new_trainer.model.to(new_trainer.device)
        return new_trainer


from typing import Dict
import torch
from numpy import asarray
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from tqdm.notebook import tqdm

class Trainer2:
    def __init__(self, config: Dict):
        self.config = config
        self.n_epochs = config['n_epochs']
        self.optimizer = None
        self.opt_fn = lambda model: Adam(model.parameters(), config['lr'])
        self.model = None
        self.history = None
        self.loss_fn = CrossEntropyLoss()
        self.device = config['device']
        self.verbose = config.get('verbose', True)

        # Параметры для постепенной разморозки
        self.unfreeze_start_epoch = config.get('unfreeze_start_epoch', 2)
        self.unfreeze_every = config.get('unfreeze_every', 2)

    def _freeze_bert(self):
        """Заморозить все слои BERT"""
        for param in self.model.bert.parameters():
            param.requires_grad = False

    def _unfreeze_bert_layers(self, epoch: int):
        """Постепенная разморозка слоев BERT"""
        if epoch < self.unfreeze_start_epoch:
            return

        # Вычисляем количество слоев для разморозки
        n_layers = len(self.model.bert.encoder.layer)
        layers_to_unfreeze = min(
            (epoch - self.unfreeze_start_epoch) // self.unfreeze_every + 1,
            n_layers
        )

        # Замораживаем все слои
        for layer in self.model.bert.encoder.layer:
            for param in layer.parameters():
                param.requires_grad = False

        # Размораживаем последние N слоев
        for layer in self.model.bert.encoder.layer[-layers_to_unfreeze:]:
            for param in layer.parameters():
                param.requires_grad = True

        # Размораживаем pooler (если есть)
        if hasattr(self.model.bert, 'pooler') and self.model.bert.pooler:
            for param in self.model.bert.pooler.parameters():
                param.requires_grad = True

    def fit(self, model, train_dataloader, val_dataloader):
        self.model = model.to(self.device)
        self._freeze_bert()  # Изначально замораживаем BERT
        self.optimizer = self.opt_fn(model)
        self.history = {
            'train_loss': [],
            'val_loss': [],
            'val_acc': []
        }

        for epoch in range(self.n_epochs):
            print(f"Epoch {epoch + 1}/{self.n_epochs}")

            # Управление разморозкой слоев
            self._unfreeze_bert_layers(epoch)

            # Обучение и валидация
            train_info = self.train_epoch(train_dataloader)
            val_info = self.val_epoch(val_dataloader)

            # Сохранение метрик
            self.history['train_loss'].extend(train_info['loss'])
            self.history['val_loss'].append(val_info['loss'])
            self.history['val_acc'].append(val_info['acc'])

        return self.model.eval()

    # Остальные методы остаются без изменений
    def train_epoch(self, train_dataloader):
        self.model.train()
        losses = []
        if self.verbose:
            train_dataloader = tqdm(train_dataloader)
        for batch in train_dataloader:
            ids = batch['ids'].to(self.device, dtype=torch.long)
            mask = batch['mask'].to(self.device, dtype=torch.long)
            targets = batch['targets'].to(self.device, dtype=torch.long)

            outputs = self.model(ids, mask)
            loss = self.loss_fn(outputs, targets)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            loss_val = loss.item()
            if self.verbose:
                train_dataloader.set_description(f"Loss={loss_val:.3}")
            losses.append(loss_val)
        return {'loss': losses}

    def val_epoch(self, val_dataloader):
        self.model.eval()
        all_logits = []
        all_labels = []
        if self.verbose:
            val_dataloader = tqdm(val_dataloader)
        with torch.no_grad():
            for batch in val_dataloader:
                ids = batch['ids'].to(self.device, dtype=torch.long)
                mask = batch['mask'].to(self.device, dtype=torch.long)
                targets = batch['targets'].to(self.device, dtype=torch.long)
                outputs = self.model(ids, mask)
                all_logits.append(outputs)
                all_labels.append(targets)

        # Конкатенация и перемещение на CPU
        all_labels = torch.cat(all_labels).cpu()
        all_logits = torch.cat(all_logits).cpu()

        # Вычисление метрик
        loss = self.loss_fn(all_logits, all_labels).item()
        acc = f1_score(
            all_labels.numpy(),
            all_logits.argmax(1).numpy(),
            average='weighted'
        )
        print(acc)

        if self.verbose:
            val_dataloader.set_description(f"Loss={loss:.3}; F1:{acc:.3}")
        return {'acc': acc, 'loss': loss}

    def predict(self, test_dataloader):
        if not self.model:
            raise RuntimeError("You should train the model first")
        self.model.eval()
        predictions = []
        with torch.no_grad():
            for batch in test_dataloader:
                ids = batch['ids'].to(self.device, dtype=torch.long)
                mask = batch['mask'].to(self.device, dtype=torch.long)
                outputs = self.model(ids, mask)
                predictions.extend(outputs.argmax(1).tolist())
        return asarray(predictions)

    def save(self, path: str):
        if self.model is None:
            raise RuntimeError("You should train the model first")
        checkpoint = {
            "config": self.model.config,
            "trainer_config": self.config,
            "model_name": self.model.model_name,
            "model_state_dict": self.model.state_dict()
        }
        torch.save(checkpoint, path)

    @classmethod
    def load(cls, path: str):
        ckpt = torch.load(path)
        keys = ["config", "trainer_config", "model_state_dict"]
        for key in keys:
            if key not in ckpt:
                raise RuntimeError(f"Missing key {key} in checkpoint")
        new_model = ModelForClassification(
            ckpt['model_name'],
            ckpt["config"]
        )
        new_model.load_state_dict(ckpt["model_state_dict"])
        new_trainer = cls(ckpt["trainer_config"])
        new_trainer.model = new_model
        new_trainer.model.to(new_trainer.device)
        return new_trainer

# Loading data

In [None]:
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

train_data.head()

Unnamed: 0,rate,text
0,4,Очень понравилось. Были в начале марта с соба...
1,5,В целом магазин устраивает.\nАссортимент позво...
2,5,"Очень хорошо что открылась 5 ка, теперь не над..."
3,3,Пятёрочка громко объявила о том как она заботи...
4,3,"Тесно, вечная сутолока, между рядами трудно ра..."


# Label encoding

In [None]:
le = LabelEncoder()

train_data.rate = le.fit_transform(train_data.rate)
train_data.head()

Unnamed: 0,rate,text
0,3,Очень понравилось. Были в начале марта с соба...
1,4,В целом магазин устраивает.\nАссортимент позво...
2,4,"Очень хорошо что открылась 5 ка, теперь не над..."
3,2,Пятёрочка громко объявила о том как она заботи...
4,2,"Тесно, вечная сутолока, между рядами трудно ра..."


# Train Test split

In [None]:
train_split, val_split = train_test_split(train_data, test_size=0.2, random_state=42)

# Loading tokenizer from pretrained

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    "cointegrated/rubert-tiny2", truncation=True, do_lower_case=True)

# Creating datasets and dataloaders

In [None]:
# # Конфигурация аугментации
# AUG_CONFIG = {
#     'aug_prob': 0.5,  # Вероятность применения аугментаций
#     'methods': ['synonym', 'shuffle', 'typo', 'dropout'],  # Методы аугментации
#     'synonyms': {  # Словарь синонимов
#         'хороший': ['отличный', 'прекрасный', 'неплохой'],
#         'плохой': ['ужасный', 'отвратительный', 'негодный']
#     }
# }

# # Создание датасета
# train_dataset = FiveDataset(
#     dataframe=train_df,
#     tokenizer=tokenizer,
#     max_seq_len=512,
#     is_train=True,
#     aug_config=AUG_CONFIG
# )

In [None]:
train_dataset = FiveDataset(train_split, tokenizer, MAX_LEN)
val_dataset = FiveDataset(val_split, tokenizer, MAX_LEN)
test_dataset = FiveDataset(test_data, tokenizer, MAX_LEN)

In [None]:
train_dataset = FiveDataset(train_split, tokenizer, MAX_LEN)
val_dataset = FiveDataset(val_split, tokenizer, MAX_LEN)
test_dataset = FiveDataset(test_data, tokenizer, MAX_LEN)

train_params = {"batch_size": BATCH_SIZE,
                "shuffle": True,
                "num_workers": 0
                }

test_params = {"batch_size": BATCH_SIZE,
               "shuffle": False,
               "num_workers": 0
               }

train_dataloader = DataLoader(train_dataset, **train_params)
val_dataloader = DataLoader(val_dataset, **test_params)
test_dataloader = DataLoader(test_dataset, **test_params)

In [None]:
# AUG_CONFIG = {
#     'aug_prob': 0.5,
#     'methods': ['synonym', 'shuffle'],
#     'synonyms': {
#         'отлично': ['прекрасно', 'замечательно'],
#         'плохо': ['ужасно', 'отвратительно']
#     }
# }

# # Создание датасетов
# train_dataset = FiveDataset2(
#     dataframe=train_dataset,
#     tokenizer=tokenizer,
#     max_seq_len=512,
#     is_train=True,  # Включаем аугментацию только для тренировочных данных
#     aug_config=AUG_CONFIG
# )

# val_dataset = FiveDataset2(
#     dataframe=val_dataset,
#     tokenizer=tokenizer,
#     max_seq_len=512,
#     is_train=False  # Без аугментации для валидации
# )

# test_dataset = FiveDataset2(
#     dataframe=test_dataset,
#     tokenizer=tokenizer,
#     max_seq_len=512,
#     is_train=False  # Без аугментации для теста
# )

# # Параметры DataLoader остаются без изменений
# train_params = {
#     "batch_size": 32,
#     "shuffle": True,
#     "num_workers": 0
# }

# test_params = {
#     "batch_size": 32,
#     "shuffle": False,
#     "num_workers": 0
# }

# train_dataloader = DataLoader(train_dataset, **train_params)
# val_dataloader = DataLoader(val_dataset, **test_params)
# test_dataloader = DataLoader(test_dataset, **test_params)

# Loading pretrained model from Huggingface

In [None]:
config = {
    "num_classes": 5,
    "dropout_rate": 0.1
}
model = ModelForClassification2(
    "cointegrated/rubert-tiny2",
    config=config
)

# Creating Trainer object and fitting the model

In [None]:
trainer_config = {
    "lr": 1e-4,
    "n_epochs": 11,
    "weight_decay": 1e-6,
    "batch_size": BATCH_SIZE,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "seed": 51,
    'unfreeze_start_epoch': 2,
    'unfreeze_every': 2
}
t = Trainer2(trainer_config)

In [None]:
t.fit(
    model,
    train_dataloader,
    val_dataloader
)

Epoch 1/11


  0%|          | 0/153 [00:00<?, ?it/s]

  0%|          | 0/39 [00:00<?, ?it/s]

0.5934116388705512
Epoch 2/11


  0%|          | 0/153 [00:00<?, ?it/s]



  0%|          | 0/39 [00:00<?, ?it/s]

0.6050574548102888
Epoch 3/11


  0%|          | 0/153 [00:00<?, ?it/s]



  0%|          | 0/39 [00:00<?, ?it/s]

0.6178785792327212
Epoch 4/11


  0%|          | 0/153 [00:00<?, ?it/s]



  0%|          | 0/39 [00:00<?, ?it/s]

0.6202721817882835
Epoch 5/11


  0%|          | 0/153 [00:00<?, ?it/s]



  0%|          | 0/39 [00:00<?, ?it/s]

0.6417459334784509
Epoch 6/11


  0%|          | 0/153 [00:00<?, ?it/s]



  0%|          | 0/39 [00:00<?, ?it/s]

0.6279875433150468
Epoch 7/11


  0%|          | 0/153 [00:00<?, ?it/s]



  0%|          | 0/39 [00:00<?, ?it/s]

0.6437255616992205
Epoch 8/11


  0%|          | 0/153 [00:00<?, ?it/s]



  0%|          | 0/39 [00:00<?, ?it/s]

0.6556342978529089
Epoch 9/11


  0%|          | 0/153 [00:00<?, ?it/s]



  0%|          | 0/39 [00:00<?, ?it/s]

0.6324019881466554
Epoch 10/11


  0%|          | 0/153 [00:00<?, ?it/s]



  0%|          | 0/39 [00:00<?, ?it/s]

0.6518539049443086
Epoch 11/11


  0%|          | 0/153 [00:00<?, ?it/s]



  0%|          | 0/39 [00:00<?, ?it/s]

0.653428271213767


ModelForClassification2(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(83828, 312, padding_idx=0)
      (position_embeddings): Embedding(2048, 312)
      (token_type_embeddings): Embedding(2, 312)
      (LayerNorm): LayerNorm((312,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-2): 3 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=312, out_features=312, bias=True)
              (key): Linear(in_features=312, out_features=312, bias=True)
              (value): Linear(in_features=312, out_features=312, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=312, out_features=312, bias=True)
              (LayerNorm): LayerNorm((312,), eps=1e-12, el

# Save model

In [None]:
t.save("baseline_model.ckpt")

# Load pretrained Model

In [None]:
t = Trainer.load("baseline_model.ckpt")

Some weights of the model checkpoint at cointegrated/rubert-tiny2 were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


# Get testset predictions


In [None]:
predictions = t.predict(test_dataloader)

# Create submission


In [None]:
sample_submission = pd.read_csv("sample_submission.csv")
sample_submission["rate"] = predictions
sample_submission.rate = le.inverse_transform(sample_submission.rate)
sample_submission.head()

Unnamed: 0,index,rate
0,0,5
1,1,5
2,2,5
3,3,4
4,4,1


In [None]:
sample_submission.to_csv("submission_to_send.csv", index=False)