In [1]:
import pandas as pd
import numpy as np

import youtokentome as yttm

from functools import partial, reduce
import operator

from src.utils import *
from src.vars import *
from src.model import Classifier
from src.dataset import BinaryDataset, BinaryEvenDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.optim as optim
from torch import nn
from torch.utils import data
import torch
from torch import Tensor


from tqdm.notebook import tqdm


from sklearn.model_selection import train_test_split

### params

In [3]:
num_of_epochs = 10
batch_size = 32
epsilon = 1e-4
learning_rate = 0.001
word_embedding_dim = 64
hidden_dim = 128
topk = 3

### data

In [2]:
train_input = pd.read_csv(train_file, sep='\t')

tokenizer = yttm.BPE(model=tokenizer_path)
sep_token = tokenizer.id_to_subword(sep_token_id)

x = clue(train_input, sep_token, to_lower=True)
x = x.apply(lambda r: tokenizer.encode(r))
y = train_input[label_column]

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

del train_input, x, y

In [4]:
train_data = BinaryEvenDataset(X_train,y_train)
val_data = BinaryEvenDataset(X_test, y_test)

assert train_data.small_label == val_data.small_label, "something gone wrong in train/val dataset label asignment"

data_loader = {'train': create_dataloader(train_data, batch_size, pad_token_id),
               'val': create_dataloader(val_data, batch_size, pad_token_id)}

### model

In [26]:
# Choose device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Init stats
stats = {'train': {'acc': [], 'loss': []},
         'val': {'acc': [], 'loss': []}}
best_acc = 0

word_vocab_size = tokenizer.vocab_size()
print('Word vocab size:', word_vocab_size)


# Init model.
model = Classifier(word_vocab_size=word_vocab_size,
                   word_embedding_dim=word_embedding_dim,
                   hidden_dim=hidden_dim,
                   target_size=2,
                   padding_idx=pad_token_id,
                   topk=topk)
model.to(device)

loss_function = nn.CrossEntropyLoss() 
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5,
                                 patience=0, verbose=True, threshold_mode='abs',
                                 threshold=1e-2)

Word vocab size: 128


### training pipeline

In [28]:
def run_model(model, device, optimizer, loss_function,
              data_loader, is_train_phase, desc=None, verbose=True):
    """Run the given data through the model.
    :param model: model to run
    :param optimizer: optimizer for the model
    :param loss_function: function to calculate the loss
    :param data_loader: loader for the data
    :param is_train_phase: if true, model runs in train mode and propagate gradient, otherwise in eval mode
    :param desc: description for the tqdm visualization
    :param verbose: verbose state
    :return: tuple of accuracies and losses
    """
    # Setup gradient
    if is_train_phase:
        model.train()
    else:
        model.eval()
    losses = []
    accuracies = []
    iterable = tqdm(data_loader, desc=desc) if verbose else data_loader
    for sentences, targets in iterable:
        sentences = sentences.to(device)
        targets = targets.to(device).flatten()
        optimizer.zero_grad()

        with torch.set_grad_enabled(is_train_phase):
            y_hat = model(sentences)
            loss = loss_function(y_hat, targets)
            acc = calc_accuracy(y_hat, targets).cpu().detach().numpy()

            # backprop only in train phase
            if is_train_phase:
                loss.backward()
                optimizer.step()

            # store loss
            losses.append(loss.item())
            accuracies.append(acc)

            # Update metrics in description
            if verbose:
                iterable.set_description(desc +
                                         f' - acc: {np.mean(accuracies):.4f}; ' +
                                         f'loss: {np.mean(losses):.4f}')

    return accuracies, losses

In [29]:
# Train model
for epoch in range(num_of_epochs):
    tqdm.write(f'------------ Epoch {epoch} ------------')
    for phase in ['train', 'val']:
        desc = f"{phase.title()}: Epoch #{epoch}"
        epoch_accs, epoch_losses = run_model(model, device, optimizer,
                                             loss_function, data_loader[phase],
                                             phase == 'train', desc)

        acc, loss = np.mean(epoch_accs), np.mean(epoch_losses)
        stats[phase]['acc'].append(acc)
        stats[phase]['loss'].append(loss)
    # Update learning rate.
    lr_scheduler.step(stats['val']['acc'][-1])
    # Check best model
    if stats['val']['acc'][-1] > best_acc:
        best_acc = stats['val']['acc'][-1]
        tqdm.write('Biggest val accuracy')
        tqdm.write('Saving model...')
        try:
            torch.save(model, model_file)
            tqdm.write('Saved successfully')
        except FileNotFoundError:
            tqdm.write('Error during saving!')
    # Check loss change for early stopping
    loss_change = abs(reduce(operator.sub, stats['train']['loss'][-2:]))
    if epsilon and loss_change < epsilon:
        print(f'Early stopping: loss change ({loss_change}) is less than {epsilon}')

print('Finished...')

------------ Epoch 0 ------------


HBox(children=(FloatProgress(value=0.0, description='Train: Epoch #0', max=9155.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Val: Epoch #0', max=2276.0, style=ProgressStyle(descripti…


Biggest val accuracy
Saving model...
Saved successfully
------------ Epoch 1 ------------


HBox(children=(FloatProgress(value=0.0, description='Train: Epoch #1', max=9155.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Val: Epoch #1', max=2276.0, style=ProgressStyle(descripti…


Biggest val accuracy
Saving model...
Saved successfully
------------ Epoch 2 ------------


HBox(children=(FloatProgress(value=0.0, description='Train: Epoch #2', max=9155.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Val: Epoch #2', max=2276.0, style=ProgressStyle(descripti…


Epoch     2: reducing learning rate of group 0 to 5.0000e-04.
Biggest val accuracy
Saving model...
Saved successfully
------------ Epoch 3 ------------


HBox(children=(FloatProgress(value=0.0, description='Train: Epoch #3', max=9155.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Val: Epoch #3', max=2276.0, style=ProgressStyle(descripti…


Biggest val accuracy
Saving model...
Saved successfully
------------ Epoch 4 ------------


HBox(children=(FloatProgress(value=0.0, description='Train: Epoch #4', max=9155.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Val: Epoch #4', max=2276.0, style=ProgressStyle(descripti…


Epoch     4: reducing learning rate of group 0 to 2.5000e-04.
Biggest val accuracy
Saving model...
Saved successfully
------------ Epoch 5 ------------


HBox(children=(FloatProgress(value=0.0, description='Train: Epoch #5', max=9155.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Val: Epoch #5', max=2276.0, style=ProgressStyle(descripti…


Epoch     5: reducing learning rate of group 0 to 1.2500e-04.
Biggest val accuracy
Saving model...
Saved successfully
------------ Epoch 6 ------------


HBox(children=(FloatProgress(value=0.0, description='Train: Epoch #6', max=9155.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Val: Epoch #6', max=2276.0, style=ProgressStyle(descripti…


Epoch     6: reducing learning rate of group 0 to 6.2500e-05.
Biggest val accuracy
Saving model...
Saved successfully
------------ Epoch 7 ------------


HBox(children=(FloatProgress(value=0.0, description='Train: Epoch #7', max=9155.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Val: Epoch #7', max=2276.0, style=ProgressStyle(descripti…


Epoch     7: reducing learning rate of group 0 to 3.1250e-05.
------------ Epoch 8 ------------


HBox(children=(FloatProgress(value=0.0, description='Train: Epoch #8', max=9155.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Val: Epoch #8', max=2276.0, style=ProgressStyle(descripti…


Epoch     8: reducing learning rate of group 0 to 1.5625e-05.
Biggest val accuracy
Saving model...
Saved successfully
------------ Epoch 9 ------------


HBox(children=(FloatProgress(value=0.0, description='Train: Epoch #9', max=9155.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Val: Epoch #9', max=2276.0, style=ProgressStyle(descripti…


Epoch     9: reducing learning rate of group 0 to 7.8125e-06.
Biggest val accuracy
Saving model...
Saved successfully
Finished...


** topk = 2 **

1 слой LSTM: (val) 87,99 - 89,37 - 90,57

2 слоя LSTM: (val) 84,02 - 85,32



** 1 слой LSTM ** 

topk = 1: (val) 84,01 

topk = 3: (val) 87,94 - 89,75 - ...