In [1]:
%run NER_common.ipynb

You should consider upgrading via the 'pip install --upgrade pip' command.[0m
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
!pip install flair

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [1]:
import seqeval.metrics
from tqdm import tqdm_notebook

import numpy as np
from itertools import chain, islice
from collections import Counter
from collections import defaultdict
from functools import partial

In [3]:
import flair
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, Dataset, DataLoader
import pytorch_lightning as pl
from test_tube import Experiment
import argparse
import os.path

In [5]:
from tagging import *

In [6]:
import abc

In [7]:
original_inputs, original_targets = read_data('data/data.txt')

In [8]:
experiment_config = argparse.Namespace()
experiment_config.basic_model = 'BILSTM-flair'

experiment_config.replace_urls = False
experiment_config.replace_numbers = False
experiment_config.split_hashtags = False
experiment_config.split_nicknames = False

experiment_config.val_batch_size = 24
experiment_config.train_batch_size = 16
experiment_config.lr = 2e-4
experiment_config.gradient_acccumulation_steps = 1
experiment_config.gradient_clipping_norm = 5.0
experiment_config.n_epochs = 12
experiment_config.emb_dropout_type = 'locked'
experiment_config.emb_dropout = 0.5
experiment_config.bilstm_hidden_size = 2048
experiment_config.bilstm_dropout = 0.2
experiment_config.bilstm_n_layers = 1
experiment_config.pre_fc_dropout = 0.2
experiment_config.flair_embeddings = 'flair+glove' # bert
experiment_config.flair_embeddings_id = 'medium'
experiment_config.save_embedding_strategy = 'gpu'
# experiment_config.mask_additional_wordpieces = True

experiment_dir = 'NER_experiments/'
experiment_config.experiment_name = 'BILSTM-over-flair'

In [9]:
split_names = ['train', 'val', 'test']

In [10]:
input_flair_sentences = [flair.data.Sentence(' '.join(inp)) for inp in original_inputs]

In [11]:
for fl, inp in zip(input_flair_sentences, original_inputs):
    assert len(fl) == len(inp)

In [12]:
inputs_split, flair_sentences_split, targets_split = split_to_dicts([original_inputs, input_flair_sentences, original_targets],
                                                                 (train_indices, val_indices, test_indices), 
                                                                 split_names)

In [13]:
unique_tags = count_tags(original_targets)

In [14]:
target_vocab = Vocab.from_id2word(unique_tags, unk_index=None, n_specials=0)

In [15]:
experiment_config.n_classes = len(target_vocab)

In [16]:
def numericalize_targets(targets, target_vocab):
    target_ids = target_vocab.numericalize(targets)
    return target_ids

In [17]:
target_ids = numericalize_targets(original_targets, target_vocab)

In [18]:
target_ids_split = split_to_dicts([target_ids], (train_indices, val_indices, test_indices), split_names)[0]

In [19]:
sequence_lengths = [len(tgt) for tgt in original_targets]
lengths_split = split_to_dicts([sequence_lengths], (train_indices, val_indices, test_indices), split_names)[0]

In [20]:
print(max(sequence_lengths))

41


In [21]:
class FlairDataset(Dataset):
    def __init__(self, input_sentences, target_ids, input_lengths):
        self.input_sentences = input_sentences
        self.target_ids = target_ids
        self.input_lengths = input_lengths
        assert len(input_lengths) == len(target_ids) == len(input_sentences)
        
    def __len__(self):
        return len(self.input_sentences)
    
    def __getitem__(self, idx):
        return self.input_sentences[idx], self.target_ids[idx], self.input_lengths[idx]
    
    @staticmethod
    def collate( examples):
        list_of_sentences = []
        list_of_targets = []
        list_of_lengths = []
        list_of_masks = []
        for sent, tgt, length in examples:
            list_of_sentences.append(sent)
            list_of_targets.append(torch.tensor(tgt, dtype=torch.long))
            list_of_lengths.append(length)
            list_of_masks.append(torch.ones(length, dtype=torch.bool))
            
        target_tensor = torch.nn.utils.rnn.pad_sequence(list_of_targets, batch_first=True)
        mask_tensor = torch.nn.utils.rnn.pad_sequence(list_of_masks, batch_first=True)
        lengths_tensor = torch.tensor(list_of_lengths, dtype=torch.long)
        return list_of_sentences, target_tensor, lengths_tensor, mask_tensor

In [22]:
def make_flair_embeddings(experiment_config):
    """
    Creates FlairEmbeddings object, depending on the configuration.
    Sets config.embedding_dim equal to the used embeddings.
    """
    if experiment_config.flair_embeddings == 'flair':
        flair_embeddings_forward = flair.embeddings.FlairEmbeddings('news-forward')
        flair_embeddings_backward = flair.embeddings.FlairEmbeddings('news-backward')
        flair_stacked_embeddings = flair.embeddings.StackedEmbeddings([flair_embeddings_forward, flair_embeddings_backward])
    elif experiment_config.flair_embeddings == 'flair+glove':
        flair_embeddings_forward = flair.embeddings.FlairEmbeddings('news-forward')
        flair_embeddings_backward = flair.embeddings.FlairEmbeddings('news-backward')
        glove_embeddings = flair.embeddings.WordEmbeddings('glove')
        flair_stacked_embeddings = flair.embeddings.StackedEmbeddings([flair_embeddings_forward, flair_embeddings_backward, glove_embeddings])
    else:
        raise ValueError("Unknown type of embeddings {}".format(experiment_config.flair_embeddings))
        
    experiment_config.embedding_dim = flair_stacked_embeddings.embedding_length
    return flair_stacked_embeddings

In [24]:
train_dataset = FlairDataset(flair_sentences_split['train'], target_ids_split['train'], lengths_split['train'])
val_dataset = FlairDataset(flair_sentences_split['val'], target_ids_split['val'], lengths_split['val'])
test_dataset = FlairDataset(flair_sentences_split['test'], target_ids_split['test'], lengths_split['test'])

In [25]:
# list_of_sentences, target_tensor, lengths_tensor, mask_tensor

In [26]:
class FlairEncoder(nn.Module):
    def __init__(self, flair_embeddings):
        super().__init__()
        self.flair_embeddings = flair_embeddings
        
    def forward(self, encoder_inputs):
        """
        Produces embeddings for every positions in the input texts.
        Args:
            encoder_inputs (Tuple: (list_of_sentences, length_tensor)):
                list_of_sentences: a list of flair.data.Sentence objects
                length_tensor: a list or tensor, containing the length of every sentence in list_of_sentences
        Output:
            encoder_states (FloatTensor): a tensor, containing embeddings for every positions
        Shape:
            encoder_states: (batch_size, padded_length, embedding_dim)
        """
        list_of_sentences, length_tensor = encoder_inputs
        with torch.no_grad():
            self.flair_embeddings.embed(list_of_sentences)
            
        padding_length = torch.max(length_tensor).item()
        
        batch_size = len(list_of_sentences)
        
        sentence_tensor = torch.zeros(
            batch_size,
            padding_length,
            self.flair_embeddings.embedding_length,
            dtype=torch.float,
            device=length_tensor.device,
        )
        for s_id, sentence in enumerate(list_of_sentences):
            # fill values with word embeddings
            sentence_tensor[s_id][: len(sentence)] = torch.cat(
                [token.get_embedding().unsqueeze(0) for token in sentence],
                dim=0)
            
        return sentence_tensor

In [27]:
class BiLSTMFlairLightning(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.config = config
        flair_embeddings = make_flair_embeddings(config)
        encoder = FlairEncoder(flair_embeddings)
        tagger = BiLSTMTagger(config)
        self.model = TaggerWithEncoder(encoder, tagger)
        
    def encoder_tagger_inputs(self, list_of_sentences, length_tensor):
        return (list_of_sentences, length_tensor), (length_tensor,)
        
    def forward(self, list_of_sentences, length_tensor):
        encoder_inputs = (list_of_sentences, length_tensor)
        tagger_inputs = (length_tensor,)
        return self.model(encoder_inputs, tagger_inputs)
    
    def compute_loss_on_batch(self, batch):
        list_of_sentences, target_tensor, lengths_tensor, mask_tensor = batch
        logits = self(list_of_sentences, lengths_tensor)
        
        loss_mask = ~mask_tensor
        loss = masked_crossentropy_loss(logits, target_tensor, loss_mask)
        return loss
    
    def training_step(self, batch, batch_nb):
        # REQUIRED
        self.train()
        loss = self.compute_loss_on_batch(batch)
        return {'loss': loss}

    def validation_step(self, batch, batch_nb):
        # OPTIONAL
        loss = self.compute_loss_on_batch(batch)
        list_of_sentences, target_tensor, lengths_tensor, mask_tensor =  batch
        encoder_inputs = (list_of_sentences, lengths_tensor)
        tagger_inputs = (lengths_tensor,)
         # encoder_inputs, tagger_inputs, mask_tensor, target_vocab
        predicted_tags = self.model.predict_tags(encoder_inputs, tagger_inputs, mask_tensor, target_vocab)
        
        return {'val_loss': loss, 'tags': predicted_tags}

    def validation_end(self, outputs):
        # OPTIONAL
        avg_loss = torch.tensor([x['val_loss'] for x in outputs]).mean()
        predictions = list(chain.from_iterable(x['tags'] for x in outputs))
        f1_score = seqeval.metrics.f1_score(targets_split['val'], predictions)
        
        print(seqeval.metrics.classification_report(targets_split['val'], predictions))
        metrics = {'avg_val_loss': avg_loss.item(), 'f1': f1_score}
        metrics_to_write = dict(metrics, epoch=self.trainer.current_epoch+1)
#         metrics.update(self.trainer.tng_tqdm_dic)
        
#         scalar_metrics = self.trainer.__metrics_to_scalars(
#                     metrics, blacklist=self.trainer.__log_vals_blacklist())
        
        assert self.experiment
        self.experiment.log(metrics_to_write)
        self.experiment.save()
        
        return metrics
    
    def predict_loader(self, loader, device='cuda'):
        result = []
        self.eval()
        with torch.no_grad():
            for batch in loader:
                list_of_sentences, target_tensor, lengths_tensor, mask_tensor =  batch
                lengths_tensor = lengths_tensor.to(device)
                mask_tensor = mask_tensor.to(device)
                encoder_inputs = (list_of_sentences, lengths_tensor)
                tagger_inputs = (lengths_tensor,)
                
                predicted_tags = self.model.predict_tags(encoder_inputs, tagger_inputs, mask_tensor, target_vocab)
                result.extend(predicted_tags)
        
        return result
    
    def configure_optimizers(self):
        # REQUIRED
        # can return multiple optimizers and learning_rate schedulers
        return torch.optim.Adam(self.parameters(), lr=self.config.lr)

    @pl.data_loader
    def tng_dataloader(self):
        # REQUIRED
        assert isinstance(train_dataset, FlairDataset)
        return DataLoader(train_dataset, batch_size=self.config.train_batch_size, shuffle=True, collate_fn=FlairDataset.collate)

    @pl.data_loader
    def val_dataloader(self):
        # OPTIONAL
        assert isinstance(val_dataset, FlairDataset)
        return DataLoader(val_dataset, batch_size=self.config.val_batch_size, shuffle=False, collate_fn=FlairDataset.collate)

In [28]:
!nvidia-smi

Wed Sep  4 12:33:24 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:05:00.0 Off |                  N/A |
| 21%   27C    P8     9W / 250W |     10MiB / 11178MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [29]:
# def bilstm_test():
#     bilstm = BiLSTMFlairLightning(experiment_config)
#     bilstm = bilstm.cuda()
#     predictions = bilstm_predict_tags_for_loader(bilstm, bilstm.val_dataloader, target_vocab)
#     print(len(predictions))
#     print(predictions[100])
#     del bilstm
#     torch.cuda.empty_cache()

In [30]:
# bilstm_test()

In [31]:
# del bilstm_model
import gc
gc.collect()
torch.cuda.empty_cache()

In [32]:
exp = Experiment(save_dir=experiment_dir, name=experiment_config.experiment_name)

In [33]:
print(exp.version)

5


In [34]:
bilstm_model = BiLSTMFlairLightning(experiment_config)

  "num_layers={}".format(dropout, num_layers))


In [35]:
exp.argparse(experiment_config)
exp.save()

In [36]:
bilstm_model = bilstm_model.cuda()

In [37]:
checkpoint_path = f'{experiment_dir}/{experiment_config.experiment_name}/version_{exp.version}/checkpoint'

In [38]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    filepath= checkpoint_path,
    save_best_only=True,
    verbose=True,
    monitor='f1',
    mode='max'
)

early_stop = pl.callbacks.EarlyStopping(
        monitor='f1',
        patience=5,
        verbose=True,
        mode='max'
)

In [39]:
trainer = pl.Trainer(experiment=exp,
                     max_nb_epochs=experiment_config.n_epochs,
                     gpus=[0],
                     gradient_clip=experiment_config.gradient_clipping_norm,
                     early_stop_callback=early_stop,
                     accumulate_grad_batches=experiment_config.gradient_acccumulation_steps,
                     add_log_row_interval=100,
                     checkpoint_callback=checkpoint_callback)

VISIBLE GPUS: '0'
gpu available: True, used: True


In [40]:
trainer.fit(bilstm_model)

 20%|██        | 1/5 [00:00<00:00,  5.34it/s]

                                                 Name               Type  \
0                                               model  TaggerWithEncoder   
1                                       model.encoder       FlairEncoder   
2                      model.encoder.flair_embeddings  StackedEmbeddings   
3     model.encoder.flair_embeddings.list_embedding_0    FlairEmbeddings   
4   model.encoder.flair_embeddings.list_embedding_...      LanguageModel   
5   model.encoder.flair_embeddings.list_embedding_...            Dropout   
6   model.encoder.flair_embeddings.list_embedding_...          Embedding   
7   model.encoder.flair_embeddings.list_embedding_...               LSTM   
8   model.encoder.flair_embeddings.list_embedding_...             Linear   
9     model.encoder.flair_embeddings.list_embedding_1    FlairEmbeddings   
10  model.encoder.flair_embeddings.list_embedding_...      LanguageModel   
11  model.encoder.flair_embeddings.list_embedding_...            Dropout   
12  model.en

  0%|          | 0/361 [00:00<01:08,  5.26it/s]

             precision    recall  f1-score   support

      other       0.01      0.01      0.01       110
    company       0.00      0.01      0.00        84
   facility       0.00      0.00      0.00        58
        loc       0.01      0.01      0.01       156
     person       0.00      0.00      0.00       131
    product       0.00      0.00      0.00        38
musicartist       0.00      0.00      0.00        33
 sportsteam       0.00      0.00      0.00        21
      movie       0.00      0.00      0.00        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.00      0.00      0.00       659
  macro avg       0.00      0.00      0.00       659



100%|██████████| 361/361 [01:46<00:00,  6.31it/s, avg_val_loss=0.249, batch_nb=324, epoch=0, f1=0.283, gpu=0, loss=0.289, v_nb=5]

             precision    recall  f1-score   support

      other       0.00      0.00      0.00       110
    company       0.33      0.12      0.18        84
   facility       0.04      0.03      0.04        58
        loc       0.44      0.49      0.46       156
     person       0.64      0.43      0.51       131
    product       0.00      0.00      0.00        38
musicartist       0.00      0.00      0.00        33
 sportsteam       0.00      0.00      0.00        21
      movie       0.00      0.00      0.00        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.40      0.22      0.28       659
  macro avg       0.28      0.22      0.24       659

save callback...

Epoch 00001: f1 improved from -inf to 0.28319, saving model to NER_experiments//BILSTM-over-flair/version_5/checkpoint/_ckpt_epoch_1.ckpt


100%|██████████| 361/361 [00:20<00:00, 16.36it/s, avg_val_loss=0.189, batch_nb=324, epoch=1, f1=0.459, gpu=0, loss=0.208, v_nb=5]

             precision    recall  f1-score   support

      other       0.26      0.23      0.24       110
    company       0.57      0.43      0.49        84
   facility       0.35      0.29      0.32        58
        loc       0.61      0.69      0.65       156
     person       0.72      0.66      0.69       131
    product       0.00      0.00      0.00        38
musicartist       0.00      0.00      0.00        33
 sportsteam       0.38      0.14      0.21        21
      movie       0.00      0.00      0.00        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.51      0.42      0.46       659
  macro avg       0.45      0.42      0.43       659

save callback...

Epoch 00002: f1 improved from 0.28319 to 0.45935, saving model to NER_experiments//BILSTM-over-flair/version_5/checkpoint/_ckpt_epoch_2.ckpt


100%|██████████| 361/361 [00:20<00:00, 16.06it/s, avg_val_loss=0.169, batch_nb=324, epoch=2, f1=0.476, gpu=0, loss=0.187, v_nb=5]

             precision    recall  f1-score   support

      other       0.39      0.24      0.29       110
    company       0.51      0.48      0.49        84
   facility       0.40      0.24      0.30        58
        loc       0.61      0.70      0.65       156
     person       0.77      0.60      0.68       131
    product       0.16      0.11      0.13        38
musicartist       0.00      0.00      0.00        33
 sportsteam       0.40      0.10      0.15        21
      movie       0.00      0.00      0.00        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.56      0.42      0.48       659
  macro avg       0.48      0.42      0.44       659

save callback...

Epoch 00003: f1 improved from 0.45935 to 0.47569, saving model to NER_experiments//BILSTM-over-flair/version_5/checkpoint/_ckpt_epoch_3.ckpt


100%|██████████| 361/361 [00:20<00:00, 16.58it/s, avg_val_loss=0.163, batch_nb=324, epoch=3, f1=0.502, gpu=0, loss=0.161, v_nb=5]

             precision    recall  f1-score   support

      other       0.29      0.37      0.33       110
    company       0.69      0.40      0.51        84
   facility       0.44      0.53      0.48        58
        loc       0.73      0.67      0.70       156
     person       0.75      0.61      0.68       131
    product       0.16      0.08      0.11        38
musicartist       0.11      0.03      0.05        33
 sportsteam       0.78      0.33      0.47        21
      movie       0.00      0.00      0.00        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.55      0.46      0.50       659
  macro avg       0.54      0.46      0.49       659

save callback...

Epoch 00004: f1 improved from 0.47569 to 0.50166, saving model to NER_experiments//BILSTM-over-flair/version_5/checkpoint/_ckpt_epoch_4.ckpt


100%|██████████| 361/361 [00:21<00:00, 15.55it/s, avg_val_loss=0.148, batch_nb=324, epoch=4, f1=0.572, gpu=0, loss=0.149, v_nb=5]

             precision    recall  f1-score   support

      other       0.50      0.34      0.40       110
    company       0.61      0.64      0.63        84
   facility       0.47      0.62      0.54        58
        loc       0.79      0.77      0.78       156
     person       0.76      0.65      0.70       131
    product       0.18      0.13      0.15        38
musicartist       0.25      0.03      0.05        33
 sportsteam       0.42      0.52      0.47        21
      movie       0.00      0.00      0.00        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.62      0.53      0.57       659
  macro avg       0.58      0.53      0.54       659

save callback...

Epoch 00005: f1 improved from 0.50166 to 0.57166, saving model to NER_experiments//BILSTM-over-flair/version_5/checkpoint/_ckpt_epoch_5.ckpt


  0%|          | 1/361 [00:00<00:23, 15.23it/s, avg_val_loss=0.151, batch_nb=0, epoch=6, f1=0.55, gpu=0, loss=0.131, v_nb=5]     

             precision    recall  f1-score   support

      other       0.37      0.43      0.39       110
    company       0.62      0.57      0.59        84
   facility       0.54      0.55      0.55        58
        loc       0.72      0.76      0.74       156
     person       0.77      0.63      0.69       131
    product       0.13      0.11      0.12        38
musicartist       0.33      0.06      0.10        33
 sportsteam       0.78      0.33      0.47        21
      movie       1.00      0.06      0.11        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.59      0.52      0.55       659
  macro avg       0.59      0.52      0.53       659

save callback...

Epoch 00006: f1 did not improve


100%|██████████| 361/361 [00:21<00:00, 15.19it/s, avg_val_loss=0.148, batch_nb=324, epoch=6, f1=0.584, gpu=0, loss=0.111, v_nb=5]

             precision    recall  f1-score   support

      other       0.44      0.37      0.40       110
    company       0.67      0.55      0.60        84
   facility       0.69      0.57      0.62        58
        loc       0.71      0.76      0.73       156
     person       0.77      0.73      0.75       131
    product       0.30      0.16      0.21        38
musicartist       0.30      0.09      0.14        33
 sportsteam       0.41      0.52      0.46        21
      movie       0.67      0.12      0.20        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.63      0.54      0.58       659
  macro avg       0.60      0.54      0.56       659

save callback...

Epoch 00007: f1 improved from 0.57166 to 0.58361, saving model to NER_experiments//BILSTM-over-flair/version_5/checkpoint/_ckpt_epoch_7.ckpt


100%|██████████| 361/361 [00:20<00:00, 15.52it/s, avg_val_loss=0.151, batch_nb=324, epoch=7, f1=0.585, gpu=0, loss=0.097, v_nb=5]

             precision    recall  f1-score   support

      other       0.44      0.39      0.41       110
    company       0.64      0.64      0.64        84
   facility       0.56      0.67      0.61        58
        loc       0.79      0.78      0.79       156
     person       0.71      0.79      0.74       131
    product       0.11      0.21      0.15        38
musicartist       0.55      0.18      0.27        33
 sportsteam       0.44      0.57      0.50        21
      movie       0.17      0.06      0.09        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.58      0.59      0.58       659
  macro avg       0.58      0.59      0.58       659

save callback...

Epoch 00008: f1 improved from 0.58361 to 0.58478, saving model to NER_experiments//BILSTM-over-flair/version_5/checkpoint/_ckpt_epoch_8.ckpt


  0%|          | 1/361 [00:00<00:23, 15.26it/s, avg_val_loss=0.148, batch_nb=0, epoch=9, f1=0.582, gpu=0, loss=0.085, v_nb=5]    

             precision    recall  f1-score   support

      other       0.51      0.37      0.43       110
    company       0.61      0.61      0.61        84
   facility       0.58      0.53      0.56        58
        loc       0.79      0.73      0.76       156
     person       0.73      0.75      0.74       131
    product       0.29      0.26      0.27        38
musicartist       0.23      0.15      0.18        33
 sportsteam       0.45      0.43      0.44        21
      movie       0.17      0.06      0.09        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.62      0.55      0.58       659
  macro avg       0.59      0.55      0.57       659

save callback...

Epoch 00009: f1 did not improve


100%|██████████| 361/361 [00:21<00:00, 15.23it/s, avg_val_loss=0.164, batch_nb=324, epoch=9, f1=0.599, gpu=0, loss=0.065, v_nb=5]

             precision    recall  f1-score   support

      other       0.49      0.41      0.45       110
    company       0.68      0.56      0.61        84
   facility       0.60      0.59      0.59        58
        loc       0.76      0.78      0.77       156
     person       0.79      0.71      0.75       131
    product       0.26      0.24      0.25        38
musicartist       0.47      0.21      0.29        33
 sportsteam       0.50      0.48      0.49        21
      movie       0.75      0.18      0.29        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.64      0.56      0.60       659
  macro avg       0.63      0.56      0.59       659

save callback...

Epoch 00010: f1 improved from 0.58478 to 0.59854, saving model to NER_experiments//BILSTM-over-flair/version_5/checkpoint/_ckpt_epoch_10.ckpt


  0%|          | 1/361 [00:00<00:24, 14.85it/s, avg_val_loss=0.166, batch_nb=0, epoch=11, f1=0.569, gpu=0, loss=0.059, v_nb=5]    

             precision    recall  f1-score   support

      other       0.32      0.42      0.36       110
    company       0.74      0.58      0.65        84
   facility       0.50      0.57      0.53        58
        loc       0.77      0.72      0.75       156
     person       0.76      0.76      0.76       131
    product       0.37      0.29      0.32        38
musicartist       0.27      0.18      0.22        33
 sportsteam       0.38      0.62      0.47        21
      movie       0.50      0.18      0.26        17
     tvshow       0.07      0.09      0.08        11

  micro avg       0.57      0.57      0.57       659
  macro avg       0.59      0.57      0.57       659

save callback...

Epoch 00011: f1 did not improve


100%|██████████| 361/361 [00:21<00:00, 15.02it/s, avg_val_loss=0.162, batch_nb=324, epoch=11, f1=0.593, gpu=0, loss=0.046, v_nb=5]

             precision    recall  f1-score   support

      other       0.47      0.42      0.44       110
    company       0.72      0.55      0.62        84
   facility       0.60      0.57      0.58        58
        loc       0.75      0.76      0.76       156
     person       0.83      0.73      0.78       131
    product       0.20      0.21      0.20        38
musicartist       0.31      0.33      0.32        33
 sportsteam       0.45      0.48      0.47        21
      movie       0.75      0.18      0.29        17
     tvshow       0.00      0.00      0.00        11

  micro avg       0.63      0.56      0.59       659
  macro avg       0.63      0.56      0.59       659

save callback...

Epoch 00012: f1 did not improve


1

## Inference

In [41]:
import os

In [42]:
checkpoint_file = os.listdir(checkpoint_path)[0]
assert checkpoint_file.startswith('_ckpt_')
checkpoint_file_path = checkpoint_path + '/' + checkpoint_file
print(checkpoint_file_path)

tags_path = f'{experiment_dir}/{experiment_config.experiment_name}/version_{exp.version}/meta_tags.csv'
print(tags_path)

NER_experiments//BILSTM-over-flair/version_5/checkpoint/_ckpt_epoch_10.ckpt
NER_experiments//BILSTM-over-flair/version_5/meta_tags.csv


100%|██████████| 361/361 [00:36<00:00, 15.02it/s, avg_val_loss=0.162, batch_nb=324, epoch=11, f1=0.593, gpu=0, loss=0.046, v_nb=5]

In [43]:
device = 'cuda'

In [44]:
bilstm_model = BiLSTMFlairLightning.load_from_metrics(checkpoint_file_path, tags_path, on_gpu=False)
bilstm_model.freeze()
bilstm_model = bilstm_model.to(device)

  "num_layers={}".format(dropout, num_layers))


In [45]:
test_loader = DataLoader(test_dataset, batch_size=bilstm_model.config.val_batch_size, collate_fn=FlairDataset.collate)

In [46]:
test_pred = bilstm_model.predict_loader(test_loader, device)

In [47]:
report = seqeval.metrics.classification_report(targets_split['test'], test_pred, digits=3)

In [48]:
test_report_save_path =f'{experiment_dir}/{experiment_config.experiment_name}/version_{exp.version}/test_report.txt'

In [49]:
with open(test_report_save_path, 'w+') as of:
    of.write(report)

In [50]:
def predict_for_tokens(model: BiLSTMFlairLightning, tokens, device='cuda'):
    sentence = flair.data.Sentence(' '.join(tokens))
    mask = torch.ones(1, len(sentence)).type(torch.bool).to(device)
    lengths = torch.tensor([len(sentence)]).to(device)
    encoder_inputs, tagger_inputs = model.encoder_tagger_inputs([sentence], lengths)
    model.eval()
    with torch.no_grad():
        tags = bilstm_model.model.predict_tags(encoder_inputs, tagger_inputs, mask, target_vocab)
    return tags[0]


In [51]:
import nltk

In [52]:
text_example = "Satellite imagery this morning of now Category 5 Hurricane Dorian approaching the Abaco Islands in the northern Bahamas. For the latest on Dorian visit http://hurricanes.gov"

In [53]:
tokens_example = nltk.tokenize.TweetTokenizer().tokenize(text_example)
print(tokens_example)

['Satellite', 'imagery', 'this', 'morning', 'of', 'now', 'Category', '5', 'Hurricane', 'Dorian', 'approaching', 'the', 'Abaco', 'Islands', 'in', 'the', 'northern', 'Bahamas', '.', 'For', 'the', 'latest', 'on', 'Dorian', 'visit', 'http://hurricanes.gov']


In [54]:
prediction = predict_for_tokens(bilstm_model, tokens_example)

In [55]:
for token, tag in zip(tokens_example, prediction):
    print(token, tag)

Satellite O
imagery O
this O
morning O
of O
now O
Category O
5 O
Hurricane O
Dorian O
approaching O
the O
Abaco B-geo-loc
Islands I-geo-loc
in O
the O
northern O
Bahamas B-geo-loc
. O
For O
the O
latest O
on O
Dorian B-person
visit O
http://hurricanes.gov O
