# Task 3.2

In [None]:
import requests

# download the json data file
res = requests.get('https://raw.githubusercontent.com/tobideusser/kpi-edgar/main/data/kpi_edgar.json')

In [None]:
data = res.json()

# print the data specification
print(data[0].keys())
print(data[0]['segments'][0].keys())
print(data[0]['segments'][0]['sentences'][0].keys())

dict_keys(['id_', 'segments'])
dict_keys(['id_', 'value', 'sentences'])
dict_keys(['id_', 'value', 'split_type', 'unique_id', 'words', 'entities_anno_iobes', 'entities_anno_iobes_ids', 'entities_anno', 'entities_anno_secondary', 'relations_anno', 'relations_anno_secondary'])


In [None]:
data[0]['segments'][0]['sentences'][0]

{'id_': 4,
 'value': '(4) Includes $ 6.7 billion of revenue recognized in 2021 that was included in deferred revenue as of September 26, 2020, $ 5.0 billion of revenue recognized in 2020 that was included in deferred revenue as of September 28, 2019, and $ 5.9 billion of revenue recognized in 2019 that was included in deferred revenue as of September 29, 2018.',
 'split_type': 'train',
 'unique_id': 'AAPL_10-K_0000320193-21-000105.txt_15_4',
 'words': [{'id_': 0,
   'value': '(',
   'prefix': None,
   'suffix': None,
   'info': None,
   'value_numeric': None,
   'value_masked': None,
   'is_numeric': False,
   'is_currency': False,
   'unit': None,
   'multiplier': None},
  {'id_': 1,
   'value': '4',
   'prefix': None,
   'suffix': None,
   'info': None,
   'value_numeric': 4.0,
   'value_masked': '<NUM>',
   'is_numeric': True,
   'is_currency': None,
   'unit': None,
   'multiplier': None},
  {'id_': 2,
   'value': ')',
   'prefix': None,
   'suffix': None,
   'info': None,
   'valu

In [None]:
!pip install transformers
!pip install sacremoses

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Subword tokenization

Let us apply two tokenizer models, BertTokenizer and moses, and compare the output.

In [None]:
import torchtext
import transformers
from transformers import BertTokenizer

# example of two tokenizers for Sub-Word Tokenizing
tokenizer_bert = BertTokenizer.from_pretrained("bert-base-uncased")
tokenizer_moses = torchtext.data.get_tokenizer("moses")

sentence = data[0]['segments'][0]['sentences'][0]['value']
print(sentence)
print('moses\n', tokenizer_moses(sentence))
print('bert', '\n', tokenizer_bert.tokenize(sentence))

(4) Includes $ 6.7 billion of revenue recognized in 2021 that was included in deferred revenue as of September 26, 2020, $ 5.0 billion of revenue recognized in 2020 that was included in deferred revenue as of September 28, 2019, and $ 5.9 billion of revenue recognized in 2019 that was included in deferred revenue as of September 29, 2018.
moses
 ['(', '4', ')', 'Includes', '$', '6.7', 'billion', 'of', 'revenue', 'recognized', 'in', '2021', 'that', 'was', 'included', 'in', 'deferred', 'revenue', 'as', 'of', 'September', '26', ',', '2020', ',', '$', '5.0', 'billion', 'of', 'revenue', 'recognized', 'in', '2020', 'that', 'was', 'included', 'in', 'deferred', 'revenue', 'as', 'of', 'September', '28', ',', '2019', ',', 'and', '$', '5.9', 'billion', 'of', 'revenue', 'recognized', 'in', '2019', 'that', 'was', 'included', 'in', 'deferred', 'revenue', 'as', 'of', 'September', '29', ',', '2018', '.']
bert 
 ['(', '4', ')', 'includes', '$', '6', '.', '7', 'billion', 'of', 'revenue', 'recognized', '

# Problem with tokenization

The provided dataset was already tokenized using a BertTokenizer combined with a regex algorithm to properly extract numerical data. 

But as we can see, original BertTokenizer breaks the numbers with a decimal part: 

'6.7' -> '6', '.', '7'

We could use moses tokenizer, but for some sentences it produces a different token sequence from the dataset, so we will have to manually relabel the dataset.

To actually use the provided labels, we will have to use the pre-tokenized words from the dataset.

# Preprocessing

We extract the words and labels from the json file and rearrange them into lists

In [None]:
# preprocessing the data

sentence_list = []
iobes_ids_list = []
for data_entry in data:

  for segment in data_entry['segments']:

    if segment and 'sentences' in segment and segment['sentences']:
      # print(segment)
      for sentence in segment['sentences']:

        if 'words' in sentence:
          sentence_words = []
          sentence_labels = sentence['entities_anno_iobes_ids']

          for word in sentence['words']:
            sentence_words.append(word['value'])
            
          if sentence_words and sentence_labels:
            sentence_list.append(sentence_words)
            iobes_ids_list.append(sentence_labels)

In [None]:
# results of preprocessing

print('N of sentences:', len(sentence_list), '| N of labels:', len(iobes_ids_list))

print(sentence_list[-5:])
print(iobes_ids_list[-5:])

N of sentences: 1158 | N of labels: 1158
[['The', 'table', 'below', 'provides', 'additional', 'detail', 'for', 'those', '21', 'projects', ',', 'which', 'total', '$', '3,181', 'million', '.'], ['(', '2', ')', 'Includes', 'premiums', 'of', '$', '148', 'million', 'in', '2020', '.'], ['(', '3', ')', 'Includes', 'premiums', 'of', '$', '87', 'million', 'in', '2020', 'and', '$', '92', 'million', 'in', '2019', '.'], ['The', 'reconciliation', 'between', 'income', 'tax', 'expense', '(', 'credit', ')', 'and', 'a', 'theoretical', 'U.S', '.', 'tax', 'computed', 'by', 'applying', 'a', 'rate', 'of', '21', 'percent', 'for', '2020', ',', '2019', 'and', '2018', 'is', 'as', 'follows', ':', '(', '1', ')', '2020', 'includes', 'the', 'impact', 'of', 'an', 'increase', 'in', 'valuation', 'allowance', 'of', '$', '647', 'million', 'in', 'non-U.S', '.', 'and', '$', '115', 'million', 'in', 'U.S', '.', 'state', 'jurisdictions', '.'], ['(', '2', ')', '2019', 'includes', 'taxes', 'less', 'than', 'the', 'theoretical'

# Model setup

In [None]:
import pandas as pd
import numpy as np
import transformers
from transformers import BertTokenizer, BertModel, get_linear_schedule_with_warmup
import torch
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import torch.nn as nn
from sklearn.metrics import accuracy_score
import pickle as pkl
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

In [None]:
class Config:
    CLS = [101]
    SEP = [102]
    VALUE_TOKEN = [0]
    MAX_LEN = 128
    TRAIN_BATCH_SIZE = 32
    VAL_BATCH_SIZE = 8
    EPOCHS = 4
    TOKENIZER = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=False)

# Dataset setup

Here we prepare the dataset class for training of BERT. 

1) We tokenize the words using BertTokenizer

2) We add special symbols, and pad the sentences and labels so that each sentence has 128 words/labels

3) We mask the original words with 1s and padded words with 0s

In [None]:
class Dataset:
  
  def __init__(self, texts, tags):
    
    #Texts: [['Diana', 'is', 'a', 'girl], ['she', 'plays', 'football']]
    #tags: [[0, 1, 2, 5], [1, 3, 5]]
    
    self.texts = texts
    self.tags = tags
  
  def __len__(self):
    return len(self.texts)

  def __getitem__(self, index):
    texts = self.texts[index]
    tags = self.tags[index]

    #Tokenise
    ids = []
    target_tag = []

    for i, s in enumerate(texts):
        inputs = Config.TOKENIZER.encode(s, add_special_tokens=False)
     
        input_len = len(inputs)
        ids.extend(inputs)
        target_tag.extend(input_len * [tags[i]])
    
    #To Add Special Tokens, subtract 2 from MAX_LEN
    ids = ids[:Config.MAX_LEN - 2]
    target_tag = target_tag[:Config.MAX_LEN - 2]

    #Add Sepcial Tokens
    ids = Config.CLS + ids + Config.SEP
    target_tags = Config.VALUE_TOKEN + target_tag + Config.VALUE_TOKEN

    mask = [1] * len(ids)
    token_type_ids = [0] * len(ids)

    #Add Padding if the input_len is small

    padding_len = Config.MAX_LEN - len(ids)
    ids = ids + ([0] * padding_len)
    target_tags = target_tags + ([0] * padding_len)
    mask = mask + ([0] * padding_len)
    token_type_ids = token_type_ids + ([0] * padding_len)

    return {
        "ids" : torch.tensor(ids, dtype=torch.long),
        "mask" : torch.tensor(mask, dtype=torch.long),
        "token_type_ids" : torch.tensor(token_type_ids, dtype=torch.long),
        "target_tags" : torch.tensor(target_tags, dtype=torch.long)
      }

# Train-test split

In [None]:
from sklearn.model_selection import train_test_split

#Train Test Split
X_train, X_test, y_train, y_test = train_test_split(sentence_list, iobes_ids_list, test_size=0.15)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15)

#Create DataLoaders
train_dataset = Dataset(texts = X_train, tags = y_train)
val_dataset = Dataset(texts = X_val, tags = y_val)
train_data_loader = DataLoader(train_dataset, batch_size=Config.TRAIN_BATCH_SIZE)
val_data_loader = DataLoader(val_dataset, batch_size=Config.VAL_BATCH_SIZE)

for i, data_ in enumerate(train_data_loader):
    print(data_)
    break

{'ids': tensor([[ 101,  100, 1022,  ...,    0,    0,    0],
        [ 101,  100, 1999,  ...,    0,    0,    0],
        [ 101,  100, 1997,  ...,    0,    0,    0],
        ...,
        [ 101,  100, 3858,  ...,    0,    0,    0],
        [ 101,  100,  100,  ...,    0,    0,    0],
        [ 101,  100, 2760,  ...,    0,    0,    0]]), 'mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'target_tags': tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 0, 28,  0,  ...,  0,  0,  0],
        [ 0,  0,  0,  ...,  0,  0,  0],
        ...,
        [ 0,  0,  0,  ...,  0,  0,  0],
        [ 0, 

# Functions for training and evaluation

In [None]:
device =  "cuda" if torch.cuda.is_available() else "cpu"

def train_fn(train_data_loader, model, optimizer, device, scheduler):
    #Train the Model
    model.train()
    loss_ = 0
    for data in tqdm(train_data_loader, total = len(train_data_loader)):
        for i, j in data.items():
            data[i] = j.to(device)

        #Backward Propagation
        optimizer.zero_grad()
        _, loss = model(**data)
        loss.backward()
        optimizer.step()
        scheduler.step()
        loss_ += loss.item()
    return model, loss_ / len(train_data_loader)

def val_fn(val_data_loader, model, optimizer, device, scheduler):
    model.eval()
    loss_ = 0
    for data in tqdm(val_data_loader, total = len(val_data_loader)):
        for i, j in data.items():
            data[i] = j.to(device)
        _, loss = model(**data)
        loss_ += loss.item()
    return loss_ / len(val_data_loader)

# BERT model

Our model includes BertModel, a dropout layer, and a linear layer with 768 inputs and an output neuron for each tag number

In [None]:
class NERBertModel(nn.Module):
    
    def __init__(self, num_tag):
        super(NERBertModel, self).__init__()
        self.num_tag = num_tag
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.bert_drop = nn.Dropout(0.3)
        self.out_tag = nn.Linear(768, self.num_tag)
        
    def forward(self, ids, mask, token_type_ids, target_tags):
        output, _ = self.bert(ids, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False)
        bert_out = self.bert_drop(output) 
        tag = self.out_tag(bert_out)
    
        #Calculate the loss
        Critirion_Loss = nn.CrossEntropyLoss()
        active_loss = mask.view(-1) == 1
        active_logits = tag.view(-1, self.num_tag)
        active_labels = torch.where(active_loss, target_tags.view(-1), torch.tensor(Critirion_Loss.ignore_index).type_as(target_tags))
        loss = Critirion_Loss(active_logits, active_labels)
        return output, tag, loss

In [None]:
import numpy as np

#Model Architecture

num_tag = max(np.unique([item for sublist in iobes_ids_list for item in sublist])) + 1
print(num_tag)
model = NERBertModel(num_tag=num_tag)
model.to(device)

45


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


NERBertModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)

# Function for hyperparameters

In [None]:
def get_hyperparameters(model, ff):

    # ff: full_finetuning
    if ff:
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "gamma", "beta"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
                ],
                "weight_decay_rate": 0.01,
            },
            {
                "params": [
                    p for n, p in param_optimizer if any(nd in n for nd in no_decay)
                ],
                "weight_decay_rate": 0.0,
            },
        ]
    else:
        param_optimizer = list(model.named_parameters())
        optimizer_grouped_parameters = [{"params": [p for n, p in param_optimizer]}]

    return optimizer_grouped_parameters

In [None]:
# Set hyperparameters (optimizer, weight decay, learning rate)
FULL_FINETUNING = False
optimizer_grouped_parameters = get_hyperparameters(model, FULL_FINETUNING)
optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=1e-1)
num_train_steps = int(len(X_train) / Config.TRAIN_BATCH_SIZE * Config.EPOCHS)
scheduler = get_linear_schedule_with_warmup(
    optimizer, 
    num_warmup_steps=0, 
    num_training_steps=num_train_steps
)

# Training of BERT

In [None]:
for epoch in range(Config.EPOCHS):
    model, train_loss = train_fn(train_data_loader, model, optimizer, device, scheduler)
    val_loss = val_fn(val_data_loader, model, optimizer, device, scheduler)
    print(f"Epoch: {epoch + 1}, Train_loss: {train_loss}, Val_loss: {val_loss}")

  0%|          | 0/27 [00:06<?, ?it/s]


KeyboardInterrupt: ignored

Each epoch of training BERT model takes about 1 hour. The loss noticeably improved after 3 iterations, but we could not wait any longer. For NER recognition we will use the untrained version.

# Encoder model from Task 1

We tried to train our model from Task 1, but it performed much worse than BERT and also took a long time to train. The code is almost identical to the code for BERT (replace self.bert = BertModel.from_pretrained('bert-base-uncased') with any transformer), we will not show it here.

# Output
Here is an example output of an untrained BERT model

In [None]:
import string

def prediction(test_sentence, model):

    Token_inputs = Config.TOKENIZER.encode(test_sentence, add_special_tokens=False)
    test_dataset =  Dataset(test_sentence, tags= [[1] * len(test_sentence)])
    # num_tag = len(le.classes_)

    # print(test_sentence)
    # print(Token_inputs)
   
    with torch.no_grad():
        data = test_dataset[0]
        for i, j in data.items():
            data[i] = j.to(device).unsqueeze(0)
        output, tag, _ = model(**data)
        return output
        # print(tag)
        # print(tag.argmax(2).cpu().numpy().reshape(-1)[1:len(Token_inputs)+1])

In [None]:
bert_output = prediction(X_train[0], model)
print('BERT output:', bert_output)
print('labels:', y_train[0])
' '.join(X_train[0])

BERT output: tensor([[[-0.1689,  0.2608, -0.1726,  ...,  0.0096,  0.4342,  0.5926],
         [ 0.0257,  0.5536,  0.4179,  ..., -0.1947,  0.3685,  0.2144],
         [ 0.1301,  0.3480,  0.4897,  ..., -0.2039,  0.5030, -0.0019],
         ...,
         [-0.1569,  0.2610,  0.4485,  ..., -0.1336,  0.1430,  0.0371],
         [-0.0396,  0.1616,  0.4300,  ..., -0.2126,  0.1323,  0.5248],
         [-0.0524,  0.2202,  0.4112,  ..., -0.1602,  0.1982,  0.3317]]])
labels: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 26, 26, 26, 26, 26, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 41, 42, 42, 43, 0]


'Note 8 – Property , Plant and Equipment , Net Our property , plant and equipment , net , consisted of the following ( in millions ) : As of December 31 , 2018 , the table above included $ 1.69 billion of gross build-to-suit lease assets .'

# Named Entity Recognition

We will make a new dataset with the results of BERT. 

1) For NER task, we will focus on binary classification of words in a sentence: KPI / NOT KPI. 

2) We will use the outputs of BERT as features for classification.

3) We again pad each sentence with empty strings to set the number of words to 128. For each word we will predict whether it is KPI or not (1 or 0).

3) We apply PCA to reduce the number of BERT features for a sentence from 768 * 128 = 98304 down to ~700. 


5) For classification we will use a small NN with a few linear layers.

# KPI dataset

In [None]:
from sklearn.decomposition import PCA

class DatasetKPI:
  
  def __init__(self, text_inputs, tags, bert_model, n_features = None):
    self.text_inputs = text_inputs
    self.bert_results = []
    self.n_features = n_features
    
    for text_input in tqdm(text_inputs):
      self.bert_results.append(prediction(text_input, bert_model))

    self.reduce_bert_dimensionality(self.bert_results)
    self.tags = tags

  def reduce_bert_dimensionality(self, bert_results):
    if not self.n_features:
      self.n_features = 0.97

    pca = PCA(n_components = self.n_features)
    bert_res = []
    for i in bert_results:
      # print(i)
      bert_res.append(np.array(torch.flatten(i)))

    bert_res = np.array(bert_res)
    # print(bert_res.shape)
    
    self.bert_results = pca.fit_transform(np.array(bert_res))
    self.n_features = pca.n_components_
    print('n features:', self.n_features)
  
  def __len__(self):
    return len(self.text_inputs)

  def __getitem__(self, index):
    texts = self.text_inputs[index]
    bert_results = self.bert_results[index]
    tags = self.tags[index]

    texts = texts[:Config.MAX_LEN]
    tags = tags[:Config.MAX_LEN]

    #Add Padding if the length of texts is small
    padding_len = Config.MAX_LEN - len(texts)
    texts = texts + ([''] * padding_len)

    tags = [bool(tag) for tag in tags]
    tags = tags + ([0] * padding_len)

    return {
        "text" : texts,
        "bert_result" : bert_results,
        "tags" : torch.tensor(tags)
        }

In [None]:
dataset_kpi = DatasetKPI(X_train[:750], y_train[:750], model)

100%|██████████| 750/750 [06:24<00:00,  1.95it/s]


n features: 639


# Train/test split

In [None]:
train_set_kpi = torch.utils.data.Subset(dataset_kpi, range(600))
test_set_kpi = torch.utils.data.Subset(dataset_kpi, range(600, len(dataset_kpi)))

data_loader_kpi_train = DataLoader(train_set_kpi, batch_size=Config.TRAIN_BATCH_SIZE)
data_loader_kpi_test = DataLoader(test_set_kpi, batch_size=1)

# NN architecture

For classification we use a small network with 3 linear layers and a dropout layer

In [None]:
class Network(nn.Module):
  def __init__(self):
    super(Network, self).__init__()
    self.linear1 = nn.Linear(dataset_kpi.n_features, 128)
    self.linear2 = nn.Linear(128, 128)
    self.dropout = nn.Dropout(0.7)
    self.linear3 = nn.Linear(128, Config.MAX_LEN)
    
  def forward(self, x):
    x = torch.relu(self.linear1(x))
    x = torch.relu(self.linear2(x))
    x = self.dropout(x)
    x = torch.relu(self.linear3(x))
    return x

# Training

In [None]:
clf = Network()

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(clf.parameters(), lr=0.1)

epochs = 100
for epoch in range(epochs):
  running_loss = 0.0
  for i, data in enumerate(data_loader_kpi_train, 0):
    inputs = data['bert_result']
    labels = data['tags']
    # set optimizer to zero grad to remove previous epoch gradients
    optimizer.zero_grad()
    
    # forward propagation
    outputs = clf(inputs)
    # print(outputs)
    loss = criterion(outputs, labels.float())
    
    # backward propagation
    loss.backward()
    
    # optimize
    optimizer.step()
    running_loss += loss.item()
    
  # display statistics
  print(f'{epoch + 1} loss: {running_loss}')

1 loss: 15.076046407222748
2 loss: 14.306605219841003
3 loss: 13.992307186126709
4 loss: 13.808817386627197
5 loss: 13.687402486801147
6 loss: 13.601940214633942
7 loss: 13.534206986427307
8 loss: 13.47859799861908
9 loss: 13.43634682893753
10 loss: 13.398392677307129
11 loss: 13.36576509475708
12 loss: 13.340641677379608
13 loss: 13.323532342910767
14 loss: 13.301436066627502
15 loss: 13.287679135799408
16 loss: 13.271123111248016
17 loss: 13.256970942020416
18 loss: 13.248479187488556
19 loss: 13.238181471824646
20 loss: 13.226947546005249
21 loss: 13.221908152103424
22 loss: 13.21670913696289
23 loss: 13.210373282432556
24 loss: 13.20313811302185
25 loss: 13.19961005449295
26 loss: 13.196849048137665
27 loss: 13.193075716495514
28 loss: 13.18871396780014
29 loss: 13.187800407409668
30 loss: 13.186232209205627
31 loss: 13.184845447540283
32 loss: 13.182960569858551
33 loss: 13.180581033229828
34 loss: 13.180615901947021
35 loss: 13.17891538143158
36 loss: 13.177353143692017
37 loss: 

#Sample output

If our prediction for a word is > 0, we round it up to 1. As a result, our output can be either 0 or 1.

In [None]:
for i in range(3):
  inputs = test_set_kpi[i]['bert_result']
  output = clf(torch.tensor(inputs))
  print('prediction', (output > 0).int())
  print('label', test_set_kpi[i]['tags'])

prediction tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0], dtype=torch.int32)
label tensor([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0,
        0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])
prediction tensor([0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       

# Model evaluation

Let us see how many KPI indicators were classified correctly

In [None]:
hits_kpi = 0
hits_all = 0
misses_kpi = 0
misses_all = 0

for batch in data_loader_kpi_test:
  for i in range(len(batch)):
    entry = batch
    inputs = entry['bert_result'][0]
    label_list = entry['tags'][0].int()
    output = clf(inputs)
    prediction_list = (output > 0).int()

    for prediction, label in zip(prediction_list, label_list):
      if label == prediction:
        hits_all += 1
      if label and label == prediction:
        hits_kpi += 1
      if label != prediction:
        misses_all += 1
      if label and label != prediction:
        misses_kpi += 1

print(f'Accuracy {hits_all / (hits_all + misses_all):.3f}')
print(f'F1 score: {hits_kpi / (hits_kpi + misses_kpi):.3f}')

Accuracy 0.841
F1 score: 0.127
