In [1]:
pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import gc

import pandas as pd
import numpy as np

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.optim import AdamW
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel #AutoModelForSequenceClassification
from transformers import get_linear_schedule_with_warmup

def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    return sum_embeddings / sum_mask

# sentences = ['Привет! Как твои дела?',
#              'А правда, что 42 твое любимое число?']

# model = AutoModel.from_pretrained("sberbank-ai/sbert_large_mt_nlu_ru")

# encoded_input = tokenizer(sentences, padding='max_length', truncation=True, max_length=24, return_tensors='pt')

# with torch.no_grad():
#     model_output = model(**encoded_input)

# sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

In [4]:
company = 'DSKY'
tg = pd.read_parquet('/content/drive/MyDrive/Диссертация/Парсеры сайтов/tg_raw.parquet')
comp = tg[tg[company] == True].copy()
comp.drop(columns=['views', 'forwards', 'fwd_from', 'source'], inplace=True)
comp.dropna(inplace=True)
comp = comp[comp['date'] < '2022-01-01'].copy()
comp = comp[comp['date'] >= '2017-02-10'].copy()
cols = ['date', 'message'] + [col for col in comp.columns if f'{company}_' in col]
comp = comp[cols].copy()
train_data, test_data = comp.iloc[:int(0.8*len(comp)), :], comp.iloc[int(0.8*len(comp)):, :]
cols = [col for col in comp.columns if f'{company}_' in col][-4]

In [5]:
train_texts, val_texts = train_data['message'].values, test_data['message'].values
train_target, val_target = train_data[cols].values, test_data[cols].values

### Большой Bert

In [6]:
tokenizer = AutoTokenizer.from_pretrained("sberbank-ai/sbert_large_mt_nlu_ru")

In [7]:
class BertClassifier(nn.Module):

    def __init__(self, n_classes=7, freeze=True):

        super(BertClassifier, self).__init__()

        self.bert = AutoModel.from_pretrained("sberbank-ai/sbert_large_mt_nlu_ru")
        self.fc1 = nn.Linear(2048, 512)
        self.fc2 = nn.Linear(2560, 1024)
        self.fc3 = nn.Linear(1024, n_classes)

        self.drop1 = nn.Dropout(0.1)
        self.drop2 = nn.Dropout(0.1)

        self.act = nn.ReLU()

        if freeze:
          for param in self.bert.parameters():
            param.requires_grad = False

    def forward(self, input_id, token_type, mask):
        model_output = self.bert(input_id, token_type, mask)
        sentence_embeddings = mean_pooling(model_output, mask)
        embeddings = torch.cat([model_output[1], sentence_embeddings], dim=1)
        embeddings1 = self.fc1(embeddings)
        embeddings = torch.cat([embeddings, embeddings1], dim=1)
        embeddings = self.act(embeddings)
        embeddings = self.drop1(embeddings)
        embeddings = self.fc2(embeddings)
        embeddings = self.act(embeddings)
        embeddings = self.drop2(embeddings)
        output = self.fc3(embeddings)

        return output

In [8]:
class TgDataset(Dataset):
    def __init__(self, texts, targets):
    
      self.labels = targets.astype(int)
      self.texts = [tokenizer(text, 
                          padding='max_length', max_length = 256, truncation=True,
                          return_tensors="pt") for text in texts]

    def __len__(self):
        return len(self.labels)


    def __getitem__(self, idx):

        batch_texts = self.texts[idx]
        batch_y = self.labels[idx]

        return batch_texts, batch_y

In [9]:
# def train(model, train_dataloader, val_dataloader, optimizer, criterion, device, epochs, scheduler=None):

#     mean_1_min, mean_5_min, mean_10_min, mean_15_min, mean_30_min, mean_1_hour, mean_1_day = train_target.mean(axis=0)

#     model = model.to(device)

#     for epoch_num in range(epochs):
#         model.train()

#         acc_1_min_train = 0
#         acc_5_min_train = 0
#         acc_10_min_train = 0
#         acc_15_min_train = 0
#         acc_30_min_train = 0
#         acc_1_hour_train = 0
#         acc_1_day_train = 0
#         total_loss_train = 0

#         for train_input, train_label in tqdm(train_dataloader):

#             train_label = train_label.to(device)

#             print(train_input.keys())
#             input_id = train_input['input_ids'].squeeze(1).to(device)
#             token_type = train_input['token_type_ids'].squeeze(1).to(device)
#             mask = train_input['attention_mask'].squeeze(1).to(device)

#             output = model(input_id, token_type, mask)
#             batch_loss = criterion(output, train_label.float())
#             total_loss_train += batch_loss.item()
#             out = torch.sigmoid(output)

#             acc_1_min_train += ((out[:,0] > mean_1_min).int() == train_label[:,0]).sum().item()
#             acc_5_min_train += ((out[:,1] > mean_5_min).int() == train_label[:,1]).sum().item()
#             acc_10_min_train += ((out[:,2] > mean_10_min).int() == train_label[:,2]).sum().item()
#             acc_15_min_train += ((out[:,3] > mean_15_min).int() == train_label[:,3]).sum().item()
#             acc_30_min_train += ((out[:,4] > mean_30_min).int() == train_label[:,4]).sum().item()
#             acc_1_hour_train += ((out[:,5] > mean_1_hour).int() == train_label[:,5]).sum().item()
#             acc_1_day_train += ((out[:,6] > mean_1_day).int() == train_label[:,6]).sum().item()

#             model.zero_grad()
#             batch_loss.backward()
#             optimizer.step()

#             if scheduler:
#                 scheduler.step()
      
#         acc_1_min_val = 0
#         acc_5_min_val = 0
#         acc_10_min_val = 0
#         acc_15_min_val = 0
#         acc_30_min_val = 0
#         acc_1_hour_val = 0
#         acc_1_day_val = 0
#         total_loss_val = 0

#         with torch.no_grad():
#             model.eval()
#             for val_input, val_label in tqdm(val_dataloader):

#                 val_label = val_label.to(device)
#                 input_id = val_input['input_ids'].squeeze(1).to(device)
#                 token_type = val_input['token_type_ids'].squeeze(1).to(device)
#                 mask = val_input['attention_mask'].squeeze(1).to(device)

#                 output = model(input_id, token_type, mask)

#                 batch_loss = criterion(output, val_label.float())
#                 total_loss_val += batch_loss.item()
#                 out = torch.sigmoid(output)

#                 acc_1_min_val += ((out[:,0] > mean_1_min).int() == val_label[:,0]).sum().item()
#                 acc_5_min_val += ((out[:,1] > mean_5_min).int() == val_label[:,1]).sum().item()
#                 acc_10_min_val += ((out[:,2] > mean_10_min).int() == val_label[:,2]).sum().item()
#                 acc_15_min_val += ((out[:,3] > mean_15_min).int() == val_label[:,3]).sum().item()
#                 acc_30_min_val += ((out[:,4] > mean_30_min).int() == val_label[:,4]).sum().item()
#                 acc_1_hour_val += ((out[:,5] > mean_1_hour).int() == val_label[:,5]).sum().item()
#                 acc_1_day_val += ((out[:,6] > mean_1_day).int() == val_label[:,6]).sum().item()
      
#             print(
#                 f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_dataset): .5f} \
#                 | Train Acc 1 min: {acc_1_min_train / len(train_dataset): .5f} \
#                 | Train Acc 5 min: {acc_5_min_train / len(train_dataset): .5f} \
#                 | Train Acc 10 min: {acc_10_min_train / len(train_dataset): .5f} \
#                 | Train Acc 15 min: {acc_15_min_train / len(train_dataset): .5f} \
#                 | Train Acc 30 min: {acc_30_min_train / len(train_dataset): .5f} \
#                 | Train Acc 1 hour: {acc_1_hour_train / len(train_dataset): .5f} \
#                 | Train Acc 1 day: {acc_1_day_train / len(train_dataset): .5f} \
#                 | Val Loss: {total_loss_val / len(val_dataset): .5f} \
#                 | Val Acc 1 min: {acc_1_min_val / len(val_dataset): .5f} \
#                 | Val Acc 5 min: {acc_5_min_val / len(val_dataset): .5f} \
#                 | Val Acc 10 min: {acc_10_min_val / len(val_dataset): .5f} \
#                 | Val Acc 15 min: {acc_15_min_val / len(val_dataset): .5f} \
#                 | Val Acc 30 min: {acc_30_min_val / len(val_dataset): .5f} \
#                 | Val Acc 1 hour: {acc_1_hour_val / len(val_dataset): .5f} \
#                 | Val Acc 1 day: {acc_1_day_val / len(val_dataset): .5f}')

In [10]:
def train(model, train_dataloader, val_dataloader, optimizer, criterion, device, epochs, scheduler=None):

    mean_1_day = train_target.mean(axis=0)

    model = model.to(device)

    for epoch_num in range(epochs):
        model.train()

        acc_1_day_train = 0
        total_loss_train = 0

        for train_input, train_label in tqdm(train_dataloader):
            train_label = train_label.to(device)

            input_id = train_input['input_ids'].squeeze(1).to(device)
            token_type = train_input['token_type_ids'].squeeze(1).to(device)
            mask = train_input['attention_mask'].squeeze(1).to(device)

            output = model(input_id, token_type, mask)
            batch_loss = criterion(output, train_label.float().unsqueeze(1))
            total_loss_train += batch_loss.item()
            out = torch.sigmoid(output)

            acc_1_day_train += ((out.squeeze(1) > mean_1_day).int() == train_label).sum().item()

            model.zero_grad()
            batch_loss.backward()
            optimizer.step()

            if scheduler:
                scheduler.step()
      
        acc_1_day_val = 0
        total_loss_val = 0

        with torch.no_grad():
            model.eval()
            for val_input, val_label in tqdm(val_dataloader):

                val_label = val_label.to(device)
                input_id = val_input['input_ids'].squeeze(1).to(device)
                token_type = val_input['token_type_ids'].squeeze(1).to(device)
                mask = val_input['attention_mask'].squeeze(1).to(device)

                output = model(input_id, token_type, mask)

                batch_loss = criterion(output, val_label.float().unsqueeze(1))
                total_loss_val += batch_loss.item()
                out = torch.sigmoid(output)

                acc_1_day_val += ((out.squeeze(1) > mean_1_day).int() == val_label).sum().item()
      
            print(
                f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_dataset): .5f} \
                | Train Acc 1 day: {acc_1_day_train / len(train_dataset): .5f} \
                | Val Loss: {total_loss_val / len(val_dataset): .5f} \
                | Val Acc 1 day: {acc_1_day_val / len(val_dataset): .5f}')

In [11]:
batch_size = 32
epochs = 5
train_dataset, val_dataset = TgDataset(train_texts, train_target), TgDataset(val_texts, val_target)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)
total_steps = len(train_dataloader) * epochs

In [12]:
n_classes = 1
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = BertClassifier(n_classes)
LR = 2e-5
optimizer = AdamW(model.parameters(), lr=LR)
criterion = nn.BCEWithLogitsLoss()
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0,#int(0.1*total_steps),
                                            num_training_steps = total_steps)

In [13]:
train(model, train_dataloader, val_dataloader, optimizer, criterion, device, 1)

100%|██████████| 46/46 [01:06<00:00,  1.45s/it]
100%|██████████| 12/12 [00:16<00:00,  1.37s/it]

Epochs: 1 | Train Loss:  0.02175                 | Train Acc 1 day:  0.48623                 | Val Loss:  0.02212                 | Val Acc 1 day:  0.35813





In [14]:
batch_size = 6
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

In [15]:
for param in model.bert.parameters():
    param.requires_grad = True

In [16]:
for papram in model.bert.embeddings.parameters():
    param.requires_grad = False

In [None]:
# torch.save(model.state_dict(), '/content/drive/MyDrive/Диссертация/tg_sbert_7_cls.pth')

In [None]:
# model.load_state_dict(torch.load('/content/drive/MyDrive/tg_sbert_7_cls.pth'))

In [17]:
train(model, train_dataloader, val_dataloader, optimizer, criterion, device, epochs, scheduler)

100%|██████████| 242/242 [04:00<00:00,  1.01it/s]
100%|██████████| 61/61 [00:18<00:00,  3.38it/s]


Epochs: 1 | Train Loss:  0.11508                 | Train Acc 1 day:  0.49518                 | Val Loss:  0.11044                 | Val Acc 1 day:  0.51240


100%|██████████| 242/242 [04:00<00:00,  1.01it/s]
100%|██████████| 61/61 [00:18<00:00,  3.38it/s]


Epochs: 2 | Train Loss:  0.11425                 | Train Acc 1 day:  0.46763                 | Val Loss:  0.11044                 | Val Acc 1 day:  0.51240


100%|██████████| 242/242 [04:00<00:00,  1.01it/s]
100%|██████████| 61/61 [00:18<00:00,  3.38it/s]


Epochs: 3 | Train Loss:  0.11407                 | Train Acc 1 day:  0.49449                 | Val Loss:  0.11044                 | Val Acc 1 day:  0.51240


100%|██████████| 242/242 [03:59<00:00,  1.01it/s]
100%|██████████| 61/61 [00:18<00:00,  3.38it/s]


Epochs: 4 | Train Loss:  0.11407                 | Train Acc 1 day:  0.50138                 | Val Loss:  0.11044                 | Val Acc 1 day:  0.51240


100%|██████████| 242/242 [04:00<00:00,  1.01it/s]
100%|██████████| 61/61 [00:18<00:00,  3.38it/s]

Epochs: 5 | Train Loss:  0.11396                 | Train Acc 1 day:  0.51309                 | Val Loss:  0.11044                 | Val Acc 1 day:  0.51240





In [None]:
torch.save(model.state_dict(), f'/content/drive/MyDrive/Диссертация/sbert_tg_{company}.pth')

### Маленькие Bert'ы

In [None]:
tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/distilrubert-tiny-cased-conversational-v1")

Downloading (…)okenizer_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/175 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/382k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
class BertClassifierTiny(nn.Module):

    def __init__(self, n_classes=7б freeze=True):

        super(BertClassifierTiny, self).__init__()

        self.bert = AutoModel.from_pretrained("DeepPavlov/distilrubert-tiny-cased-conversational-v1")
        self.fc1 = nn.Linear(264*2, 132)
        self.fc2 = nn.Linear(264*2+132, 264)
        self.fc3 = nn.Linear(264, n_classes)

        self.drop1 = nn.Dropout(0.1)
        self.drop2 = nn.Dropout(0.1)

        self.act = nn.ReLU()

        if freeze:
          for param in model.bert.parameters():
            param.requires_grad = False

    def forward(self, input_id, mask):
        model_output = self.bert(input_id, mask)
        sentence_embeddings = mean_pooling(model_output, mask)
        embeddings = torch.cat([model_output[0][:,0,:], sentence_embeddings], dim=1)
        embeddings1 = self.fc1(embeddings)
        embeddings = torch.cat([embeddings, embeddings1], dim=1)
        embeddings = self.act(embeddings)
        embeddings = self.drop1(embeddings)
        embeddings = self.fc2(embeddings)
        embeddings = self.act(embeddings)
        embeddings = self.drop2(embeddings)
        output = self.fc3(embeddings)

        return output

In [None]:
class TgDataset(Dataset):
    def __init__(self, texts, targets):
    
      self.labels = targets.astype(int)
      self.texts = [tokenizer(text, 
                          padding='max_length', max_length = 128, truncation=True,
                          return_tensors="pt") for text in texts]

    def __len__(self):
        return len(self.labels)


    def __getitem__(self, idx):

        batch_texts = self.texts[idx]
        batch_y = self.labels[idx]

        return batch_texts, batch_y

In [None]:
# def train(model, train_dataloader, val_dataloader, optimizer, criterion, device, epochs, scheduler=None):

#     mean_1_min, mean_5_min, mean_10_min, mean_15_min, mean_30_min, mean_1_hour, mean_1_day = train_target.mean(axis=0)

#     model = model.to(device)

#     for epoch_num in range(epochs):
#         model.train()

#         acc_1_min_train = 0
#         acc_5_min_train = 0
#         acc_10_min_train = 0
#         acc_15_min_train = 0
#         acc_30_min_train = 0
#         acc_1_hour_train = 0
#         acc_1_day_train = 0
#         total_loss_train = 0

#         for train_input, train_label in tqdm(train_dataloader):

#             train_label = train_label.to(device)

#             input_id = train_input['input_ids'].squeeze(1).to(device)
#             mask = train_input['attention_mask'].squeeze(1).to(device)

#             output = model(input_id, mask)
#             batch_loss = criterion(output, train_label.float())
#             total_loss_train += batch_loss.item()
#             out = torch.sigmoid(output)

#             acc_1_min_train += ((out[:,0] > mean_1_min).int() == train_label[:,0]).sum().item()
#             acc_5_min_train += ((out[:,1] > mean_5_min).int() == train_label[:,1]).sum().item()
#             acc_10_min_train += ((out[:,2] > mean_10_min).int() == train_label[:,2]).sum().item()
#             acc_15_min_train += ((out[:,3] > mean_15_min).int() == train_label[:,3]).sum().item()
#             acc_30_min_train += ((out[:,4] > mean_30_min).int() == train_label[:,4]).sum().item()
#             acc_1_hour_train += ((out[:,5] > mean_1_hour).int() == train_label[:,5]).sum().item()
#             acc_1_day_train += ((out[:,6] > mean_1_day).int() == train_label[:,6]).sum().item()

#             model.zero_grad()
#             batch_loss.backward()
#             optimizer.step()

#             if scheduler:
#                 scheduler.step()
      
#         acc_1_min_val = 0
#         acc_5_min_val = 0
#         acc_10_min_val = 0
#         acc_15_min_val = 0
#         acc_30_min_val = 0
#         acc_1_hour_val = 0
#         acc_1_day_val = 0
#         total_loss_val = 0

#         with torch.no_grad():
#             model.eval()
#             for val_input, val_label in tqdm(val_dataloader):

#                 val_label = val_label.to(device)
#                 input_id = val_input['input_ids'].squeeze(1).to(device)
#                 mask = val_input['attention_mask'].squeeze(1).to(device)

#                 output = model(input_id, mask)

#                 batch_loss = criterion(output, val_label.float())
#                 total_loss_val += batch_loss.item()
#                 out = torch.sigmoid(output)

#                 acc_1_min_val += ((out[:,0] > mean_1_min).int() == val_label[:,0]).sum().item()
#                 acc_5_min_val += ((out[:,1] > mean_5_min).int() == val_label[:,1]).sum().item()
#                 acc_10_min_val += ((out[:,2] > mean_10_min).int() == val_label[:,2]).sum().item()
#                 acc_15_min_val += ((out[:,3] > mean_15_min).int() == val_label[:,3]).sum().item()
#                 acc_30_min_val += ((out[:,4] > mean_30_min).int() == val_label[:,4]).sum().item()
#                 acc_1_hour_val += ((out[:,5] > mean_1_hour).int() == val_label[:,5]).sum().item()
#                 acc_1_day_val += ((out[:,6] > mean_1_day).int() == val_label[:,6]).sum().item()
      
#             print(
#                 f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_dataset): .5f} \
#                 | Train Acc 1 min: {acc_1_min_train / len(train_dataset): .5f} \
#                 | Train Acc 5 min: {acc_5_min_train / len(train_dataset): .5f} \
#                 | Train Acc 10 min: {acc_10_min_train / len(train_dataset): .5f} \
#                 | Train Acc 15 min: {acc_15_min_train / len(train_dataset): .5f} \
#                 | Train Acc 30 min: {acc_30_min_train / len(train_dataset): .5f} \
#                 | Train Acc 1 hour: {acc_1_hour_train / len(train_dataset): .5f} \
#                 | Train Acc 1 day: {acc_1_day_train / len(train_dataset): .5f} \
#                 | Val Loss: {total_loss_val / len(val_dataset): .5f} \
#                 | Val Acc 1 min: {acc_1_min_val / len(val_dataset): .5f} \
#                 | Val Acc 5 min: {acc_5_min_val / len(val_dataset): .5f} \
#                 | Val Acc 10 min: {acc_10_min_val / len(val_dataset): .5f} \
#                 | Val Acc 15 min: {acc_15_min_val / len(val_dataset): .5f} \
#                 | Val Acc 30 min: {acc_30_min_val / len(val_dataset): .5f} \
#                 | Val Acc 1 hour: {acc_1_hour_val / len(val_dataset): .5f} \
#                 | Val Acc 1 day: {acc_1_day_val / len(val_dataset): .5f}')

In [None]:
def train(model, train_dataloader, val_dataloader, optimizer, criterion, device, epochs, scheduler=None):
    mean_1_day = train_target.mean(axis=0)

    model = model.to(device)

    for epoch_num in range(epochs):
        model.train()

        acc_1_day_train = 0
        total_loss_train = 0

        for train_input, train_label in tqdm(train_dataloader):

            train_label = train_label.to(device)

            input_id = train_input['input_ids'].squeeze(1).to(device)
            mask = train_input['attention_mask'].squeeze(1).to(device)

            output = model(input_id, mask)
            batch_loss = criterion(output, train_label.float())
            total_loss_train += batch_loss.item()
            out = torch.sigmoid(output)

            acc_1_day_train += ((out[:,0] > mean_1_day).int() == train_label[:,0]).sum().item()

            model.zero_grad()
            batch_loss.backward()
            optimizer.step()

            if scheduler:
                scheduler.step()
      
        acc_1_day_val = 0
        total_loss_val = 0

        with torch.no_grad():
            model.eval()
            for val_input, val_label in tqdm(val_dataloader):

                val_label = val_label.to(device)
                input_id = val_input['input_ids'].squeeze(1).to(device)
                mask = val_input['attention_mask'].squeeze(1).to(device)

                output = model(input_id, mask)

                batch_loss = criterion(output, val_label.float())
                total_loss_val += batch_loss.item()
                out = torch.sigmoid(output)

                acc_1_day_val += ((out[:,0] > mean_1_day).int() == val_label[:,0]).sum().item()
      
            print(
                f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_dataset): .5f} \
                | Train Acc 1 day: {acc_1_day_train / len(train_dataset): .5f} \
                | Val Loss: {total_loss_val / len(val_dataset): .5f} \
                | Val Acc 1 day: {acc_1_day_val / len(val_dataset): .5f}')

In [None]:
batch_size = 32
epochs = 5
train_dataset, val_dataset = TgDataset(train_texts, train_target), TgDataset(val_texts, val_target)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)
total_steps = len(train_dataloader) * epochs

In [None]:
n_classes = 7
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = BertClassifierTiny(n_classes)
LR = 2e-5
optimizer = AdamW(model.parameters(), lr=LR)
criterion = nn.BCEWithLogitsLoss()
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = int(0.1*total_steps),
                                            num_training_steps = total_steps)

Some weights of the model checkpoint at DeepPavlov/distilrubert-tiny-cased-conversational-v1 were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
train(model, train_dataloader, val_dataloader, optimizer, criterion, device, 1)

100%|██████████| 46/46 [00:01<00:00, 25.21it/s]
100%|██████████| 12/12 [00:00<00:00, 89.87it/s]

Epochs: 1 | Train Loss:  0.02205                 | Train Acc 1 min:  0.43182                 | Train Acc 5 min:  0.53237                 | Train Acc 10 min:  0.42287                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42218                 | Train Acc 1 hour:  0.44628                 | Train Acc 1 day:  0.47314                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099





In [None]:
torch.save(model.state_dict(), '/content/drive/MyDrive/Диссертация/tg_tiny_7_cls.pth')

In [None]:
for param in model.bert.parameters():
  param.requires_grad = False

In [None]:
train(model, train_dataloader, val_dataloader, optimizer, criterion, device, 15)

100%|██████████| 46/46 [00:01<00:00, 23.54it/s]
100%|██████████| 12/12 [00:00<00:00, 84.81it/s]


Epochs: 1 | Train Loss:  0.02205                 | Train Acc 1 min:  0.43664                 | Train Acc 5 min:  0.54132                 | Train Acc 10 min:  0.42493                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42218                 | Train Acc 1 hour:  0.44628                 | Train Acc 1 day:  0.47521                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 24.80it/s]
100%|██████████| 12/12 [00:00<00:00, 83.40it/s]


Epochs: 2 | Train Loss:  0.02202                 | Train Acc 1 min:  0.43388                 | Train Acc 5 min:  0.55028                 | Train Acc 10 min:  0.42424                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42355                 | Train Acc 1 hour:  0.44766                 | Train Acc 1 day:  0.46970                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 24.49it/s]
100%|██████████| 12/12 [00:00<00:00, 87.85it/s]


Epochs: 3 | Train Loss:  0.02205                 | Train Acc 1 min:  0.43182                 | Train Acc 5 min:  0.55096                 | Train Acc 10 min:  0.42424                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42287                 | Train Acc 1 hour:  0.44904                 | Train Acc 1 day:  0.46970                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 24.12it/s]
100%|██████████| 12/12 [00:00<00:00, 62.94it/s]


Epochs: 4 | Train Loss:  0.02203                 | Train Acc 1 min:  0.43457                 | Train Acc 5 min:  0.56956                 | Train Acc 10 min:  0.42355                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42287                 | Train Acc 1 hour:  0.45041                 | Train Acc 1 day:  0.49311                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:02<00:00, 20.45it/s]
100%|██████████| 12/12 [00:00<00:00, 46.82it/s]


Epochs: 5 | Train Loss:  0.02204                 | Train Acc 1 min:  0.43457                 | Train Acc 5 min:  0.54270                 | Train Acc 10 min:  0.42424                 | Train Acc 15 min:  0.43457                 | Train Acc 30 min:  0.42355                 | Train Acc 1 hour:  0.44697                 | Train Acc 1 day:  0.47245                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:02<00:00, 19.76it/s]
100%|██████████| 12/12 [00:00<00:00, 82.22it/s]


Epochs: 6 | Train Loss:  0.02206                 | Train Acc 1 min:  0.43664                 | Train Acc 5 min:  0.53650                 | Train Acc 10 min:  0.42355                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42287                 | Train Acc 1 hour:  0.44835                 | Train Acc 1 day:  0.49105                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 25.29it/s]
100%|██████████| 12/12 [00:00<00:00, 88.39it/s]


Epochs: 7 | Train Loss:  0.02207                 | Train Acc 1 min:  0.43044                 | Train Acc 5 min:  0.53650                 | Train Acc 10 min:  0.42355                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42424                 | Train Acc 1 hour:  0.44766                 | Train Acc 1 day:  0.45730                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 25.84it/s]
100%|██████████| 12/12 [00:00<00:00, 87.96it/s]


Epochs: 8 | Train Loss:  0.02206                 | Train Acc 1 min:  0.43320                 | Train Acc 5 min:  0.53581                 | Train Acc 10 min:  0.42355                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42355                 | Train Acc 1 hour:  0.44559                 | Train Acc 1 day:  0.47176                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 25.84it/s]
100%|██████████| 12/12 [00:00<00:00, 88.05it/s]


Epochs: 9 | Train Loss:  0.02206                 | Train Acc 1 min:  0.43388                 | Train Acc 5 min:  0.54201                 | Train Acc 10 min:  0.42355                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42287                 | Train Acc 1 hour:  0.44697                 | Train Acc 1 day:  0.47727                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 26.09it/s]
100%|██████████| 12/12 [00:00<00:00, 86.56it/s]


Epochs: 10 | Train Loss:  0.02203                 | Train Acc 1 min:  0.43113                 | Train Acc 5 min:  0.55096                 | Train Acc 10 min:  0.42493                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42287                 | Train Acc 1 hour:  0.44766                 | Train Acc 1 day:  0.47176                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 25.96it/s]
100%|██████████| 12/12 [00:00<00:00, 86.85it/s]


Epochs: 11 | Train Loss:  0.02203                 | Train Acc 1 min:  0.43457                 | Train Acc 5 min:  0.55647                 | Train Acc 10 min:  0.42355                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42287                 | Train Acc 1 hour:  0.44904                 | Train Acc 1 day:  0.48416                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 24.98it/s]
100%|██████████| 12/12 [00:00<00:00, 79.66it/s]


Epochs: 12 | Train Loss:  0.02206                 | Train Acc 1 min:  0.43526                 | Train Acc 5 min:  0.55028                 | Train Acc 10 min:  0.42355                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42287                 | Train Acc 1 hour:  0.44628                 | Train Acc 1 day:  0.47452                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 25.01it/s]
100%|██████████| 12/12 [00:00<00:00, 79.30it/s]


Epochs: 13 | Train Loss:  0.02202                 | Train Acc 1 min:  0.43182                 | Train Acc 5 min:  0.56474                 | Train Acc 10 min:  0.42424                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42355                 | Train Acc 1 hour:  0.45041                 | Train Acc 1 day:  0.47727                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 25.62it/s]
100%|██████████| 12/12 [00:00<00:00, 88.24it/s]


Epochs: 14 | Train Loss:  0.02204                 | Train Acc 1 min:  0.42975                 | Train Acc 5 min:  0.54959                 | Train Acc 10 min:  0.42355                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42355                 | Train Acc 1 hour:  0.44559                 | Train Acc 1 day:  0.47314                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099


100%|██████████| 46/46 [00:01<00:00, 26.00it/s]
100%|██████████| 12/12 [00:00<00:00, 85.06it/s]

Epochs: 15 | Train Loss:  0.02205                 | Train Acc 1 min:  0.43388                 | Train Acc 5 min:  0.53168                 | Train Acc 10 min:  0.42355                 | Train Acc 15 min:  0.43388                 | Train Acc 30 min:  0.42287                 | Train Acc 1 hour:  0.44972                 | Train Acc 1 day:  0.47590                 | Val Loss:  0.02297                 | Val Acc 1 min:  0.35989                 | Val Acc 5 min:  0.62912                 | Val Acc 10 min:  0.34890                 | Val Acc 15 min:  0.33516                 | Val Acc 30 min:  0.35989                 | Val Acc 1 hour:  0.37363                 | Val Acc 1 day:  0.51099





In [None]:
model()

In [None]:
model.load_state_dict(torch.load('/content/drive/MyDrive/tg_sbert_7_cls.pth'))