In [91]:
from sentence_transformers import SentenceTransformer
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
import torch

NET_PARAMS = {
    'hidden_shape': [50, 75],
    'learning_rate': 0.003,
    'batch_size': 64,
    'transfer': 'sigmoid',
    'patience': 15
}

class Net(nn.Module):
    def __init__(self, inp_shape, hidden_shape, out_shape, transfer=NET_PARAMS['transfer']):
        super(Net, self).__init__()
        self.transfer = torch.sigmoid if transfer == 'sigmoid' else torch.tanh
        self.input_layer = nn.Linear(inp_shape, hidden_shape[0])
        self.hidden_layers = nn.ModuleList([nn.Linear(hidden_shape[hi-1], hidden_shape[hi]) for hi in range(1, len(hidden_shape))])
        self.output_layer = nn.Linear(hidden_shape[-1], out_shape)

        self.train()

    def forward(self, x):
        x = self.transfer(self.input_layer(x))
        
        for layer in self.hidden_layers:
            x = self.transfer(layer(x))
        
        return self.output_layer(x)

class Model:

    def __init__(self):
        self.text2vec = SentenceTransformer('fav-kky/FERNET-C5')

        # Net
        self.net = None

        # Labels
        self.target2label = None
        
        # Optimizer
        self.optimizer = None
        
        # Loss function
        self.criterion = nn.CrossEntropyLoss()

    def encode_input(self, text):
        return self.text2vec.encode(text)
    
    def decode_output(self, vec):
        return self.target2label[torch.argmax(vec).item()]

    def init_net(self, dataset, print_summary=True):
        self.net = Net(dataset['train'].n, NET_PARAMS['hidden_shape'], dataset['train'].m)
        self.target2label = dataset['train'].target2label
        self.optimizer = torch.optim.SGD(self.net.parameters(), lr=NET_PARAMS['learning_rate'])
        
        if print_summary:
            summary(self.net, (1, dataset['train'].n))

    def fit(self, trainloader, devloader=None, epochs=50, verbose=True, save=True):
        train_loss_list = []
        dev_loss_list = []

        for epoch in range(1, epochs+1):

            if epoch % 500 == 0:
                for g in self.optimizer.param_groups:
                    g['lr'] *= 0.75
                    print(f'Epoch {epoch}: Learning rate changed to {g["lr"]}')
            
            # Set model to train configuration
            self.net.train()
            epoch_train_loss_list = []
            for x, y_true, _, _ in trainloader:
                # Clear gradient
                self.optimizer.zero_grad()

                # Make a prediction
                y_pred = self.net(x)

                # Calculate loss
                loss = self.criterion(y_pred, y_true)

                # Calculate gradients of parameters
                loss.backward()

                # Update parameters
                self.optimizer.step()

                epoch_train_loss_list.append(loss.data)

            # Set model to eval configuration
            self.net.eval()
            epoch_dev_loss_list = []
            for x, y_true, _, _ in devloader:
                
                y_pred = self.net(x)

                # Calculate loss
                loss = self.criterion(y_pred, y_true)

                epoch_dev_loss_list.append(loss.data)
            
            mean_train_loss = np.mean([l.item() for l in epoch_train_loss_list])
            mean_dev_loss = np.mean([l.item() for l in epoch_dev_loss_list])
            
            train_loss_list.append(mean_train_loss)
            dev_loss_list.append(mean_dev_loss)
            
            if verbose > 0:
                print(f'epoch {epoch}, train loss {mean_train_loss}, dev loss {mean_dev_loss}')

            if len(dev_loss_list) > NET_PARAMS['patience'] and all([dl < mean_dev_loss for dl in dev_loss_list[-NET_PARAMS['patience']:-1]]):
                print(f'Early stopping, dev_loss tail: {dev_loss_list[-NET_PARAMS["patience"]:-1]}')
                break


        print(f'Final loss: {train_loss_list[-1].item()}')

        if save:
            torch.save(self.net.state_dict(), 'model-test')

    def predict(self, sample, is_encoded=False):
        self.net.eval()
        
        if is_encoded:
            encoded = sample
        else:
            encoded = torch.from_numpy(self.encode_input(sample))

        out = self.net(encoded)
        return self.decode_output(out)
    
    def evaluate(self, testloader):
        self.net.eval()
        loss_list = []
        n_correct = 0
        n_fail = 0
        for x, y_true, _, _ in testloader:
            
            y_pred = self.net(x)
            loss_list.append(self.criterion(y_pred, y_true).data)
            
            if torch.argmax(y_pred).item() == y_true[0].item():
                n_correct += 1
            else:
                n_fail += 1
        
        acc = n_correct / (n_correct + n_fail)
        loss = np.mean([l.item() for l in loss_list])
        print(f'Loss: {loss}, Acc: {acc}')
        
        return loss, acc
    
model = Model()

Some weights of the model checkpoint at /Users/kitt/.cache/torch/sentence_transformers/fav-kky_FERNET-C5 were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [92]:
class IntentsDataset(Dataset):

    def __init__(self, group, samples, labels, encoder, decoder):
        """
            samples: list: ['sentence 1', 'sentence 2', ... 'sentence n']
            labels: list: ['label 1', 'label 2', ... 'label n']
            encoder: def for encoding input (in: <string> sentence, out: <np.ndarray> vec)
            decoder: def for decoding output (in: <np.ndarray> vec, <string[]> labels, out: <string> label)
        """

        self.group = group
        self.samples = samples
        self.labels = labels
        self.encoder = encoder
        self.decoder = decoder

        self.samples_encoded = np.array([self.encoder(sample) for sample in self.samples])
        self.sorted_labels = sorted(list(set(self.labels)))
        self.target2label = {target:label for target, label in enumerate(self.sorted_labels)}
        self.label2target = [self.sorted_labels.index(label) for label in self.labels]

        self.x = torch.from_numpy(self.samples_encoded.reshape(-1, self.samples_encoded.shape[1]).astype('float32'))
        self.y = torch.tensor(self.label2target)
        self.y_one_hot = F.one_hot(self.y)
        
        self.p = self.x.shape[0]
        self.n = len(self.x[0])
        self.m = len(self.sorted_labels)

        print(f'Dataset {self.group}: n = {self.n}, m = {self.m}, p = {self.p}')
        print(f'Labels: {self.target2label}')

    def __getitem__(self, index):
        return self.x[index], self.y[index], self.samples[index], self.labels[index]

    def __len__(self):
        return self.p
    
    def label(self, one_hot):
        return self.decoder(one_hot, self.labels)

In [100]:
samples = {'train': [], 'dev': [], 'test': []}
labels = {'train': [], 'dev': [], 'test': []}
dataset = {}

LIM = {'train': 2500, 'dev': 500, 'test': 750}

for group in ('train', 'dev', 'test'):
    with open(f'data/{group}-cs.tsv', 'r') as fr:
        for line in fr.readlines()[:LIM[group]]:
            sample, label = line.strip().split('\t')
            samples[group].append(sample)
            labels[group].append(label)

    print(f'Loaded {len(samples[group])} {group} samples, creating dataset...')
    dataset[group] = IntentsDataset(
        group=group,
        samples=samples[group],
        labels=labels[group],
        encoder=model.encode_input,
        decoder=model.decode_output
    )
    
trainloader = DataLoader(dataset=dataset['train'], batch_size=NET_PARAMS['batch_size'], shuffle=True)
devloader = DataLoader(dataset=dataset['dev'], batch_size=256, shuffle=True)
testloader = DataLoader(dataset=dataset['test'], batch_size=1, shuffle=True)

model.init_net(dataset=dataset)

Loaded 2500 train samples, creating dataset...
Dataset train: n = 768, m = 25, p = 2500
Labels: {0: 'application_status', 1: 'change_language', 2: 'change_user_name', 3: 'definition', 4: 'find_phone', 5: 'flight_status', 6: 'flip_coin', 7: 'fun_fact', 8: 'improve_credit_score', 9: 'insurance_change', 10: 'maybe', 11: 'meaning_of_life', 12: 'oil_change_how', 13: 'payday', 14: 'pto_request', 15: 'replacement_card_duration', 16: 'restaurant_reservation', 17: 'shopping_list_update', 18: 'time', 19: 'timer', 20: 'transfer', 21: 'translate', 22: 'travel_alert', 23: 'what_can_i_ask_you', 24: 'where_are_you_from'}
Loaded 500 dev samples, creating dataset...
Dataset dev: n = 768, m = 25, p = 500
Labels: {0: 'application_status', 1: 'change_language', 2: 'change_user_name', 3: 'definition', 4: 'find_phone', 5: 'flight_status', 6: 'flip_coin', 7: 'fun_fact', 8: 'improve_credit_score', 9: 'insurance_change', 10: 'maybe', 11: 'meaning_of_life', 12: 'oil_change_how', 13: 'payday', 14: 'pto_request',

In [102]:
model.fit(trainloader, devloader, epochs=5000, verbose=True)
model.evaluate(testloader)

epoch 1, train loss 1.7693025201559067, dev loss 1.8030024766921997
epoch 2, train loss 1.7604318857192993, dev loss 1.800839364528656
epoch 3, train loss 1.752240166068077, dev loss 1.8006113767623901
epoch 4, train loss 1.7637113958597184, dev loss 1.7986257076263428
epoch 5, train loss 1.7564810782670974, dev loss 1.795855462551117
epoch 6, train loss 1.762559026479721, dev loss 1.7951326966285706
epoch 7, train loss 1.7548008978366851, dev loss 1.792678952217102
epoch 8, train loss 1.753334891796112, dev loss 1.79136723279953
epoch 9, train loss 1.7519496113061905, dev loss 1.788540542125702
epoch 10, train loss 1.7609618872404098, dev loss 1.7883279919624329
epoch 11, train loss 1.7545320212841033, dev loss 1.7858557105064392
epoch 12, train loss 1.7483060866594315, dev loss 1.7852643132209778
epoch 13, train loss 1.7530488163232802, dev loss 1.7835111021995544
epoch 14, train loss 1.7387350469827652, dev loss 1.7805202007293701
epoch 15, train loss 1.7406265437602997, dev loss 1.

(0.5278510923236609, 0.904)

In [98]:
def evaluate(net, criterion, testloader):
    net.eval()
    loss_list = []
    n_correct = 0
    n_fail = 0
    for x, y_true, _, _ in testloader:
        
        y_pred = net(x)
        loss_list.append(criterion(y_pred, y_true).data)

        if torch.argmax(y_pred).item() == y_true[0].item():
            n_correct += 1
        else:
            n_fail += 1
    
    acc = n_correct / (n_correct + n_fail)
    loss = np.mean([l.item() for l in loss_list])
    print(f'Loss: {loss}, Acc: {acc}')
    
    return loss, acc

evaluate(model.net, model.criterion, testloader)

Loss: 0.1777581220244368, Acc: 0.9733333333333334


(0.1777581220244368, 0.9733333333333334)

In [99]:
while True:
    inp = input('\n>>')
    if inp == 'stop':
        break

    print(f'>> {model.predict(inp)}')
    print(model.net(torch.from_numpy(model.encode_input(inp))))

>> transfer
tensor([-0.2055, -3.5630,  0.2506,  3.9521, -1.0722], grad_fn=<AddBackward0>)
>> translate
tensor([ 1.2219, -1.4527, -2.4432, -1.3665,  3.5469], grad_fn=<AddBackward0>)
>> translate
tensor([ 0.7103, -1.6924, -1.2817, -0.8355,  2.6430], grad_fn=<AddBackward0>)
>> meaning_of_life
tensor([ 0.9619,  3.5216, -0.5340, -3.2572, -0.8638], grad_fn=<AddBackward0>)
>> definition
tensor([ 1.1462,  0.0531, -1.1359, -0.1523, -0.3334], grad_fn=<AddBackward0>)
>> definition
tensor([ 2.7815,  0.1539, -2.5925, -1.3863,  0.6302], grad_fn=<AddBackward0>)
>> timer
tensor([ 0.2671, -0.9565,  1.4220,  0.3834, -1.4432], grad_fn=<AddBackward0>)
