In [1]:
import itertools

import torch
import torch.optim as optim
from allennlp.data.dataset_readers.conll2003 import Conll2003DatasetReader
from allennlp.data.iterators import BasicIterator
from allennlp.data.token_indexers import SingleIdTokenIndexer
from allennlp.data.token_indexers import TokenCharactersIndexer
from allennlp.data.token_indexers.elmo_indexer import ELMoTokenCharactersIndexer
from allennlp.data.tokenizers.word_tokenizer import WordTokenizer
from allennlp.data.vocabulary import Vocabulary
from allennlp.nn.activations import Activation
from allennlp.models.simple_tagger import SimpleTagger
from allennlp.modules.attention import LinearAttention, BilinearAttention, DotProductAttention
from allennlp.modules.seq2seq_encoders import PytorchSeq2SeqWrapper, StackedSelfAttentionEncoder
from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
from allennlp.modules.token_embedders import Embedding, ElmoTokenEmbedder
from allennlp.modules.seq2vec_encoders import CnnEncoder
from allennlp.modules.token_embedders.token_characters_encoder import TokenCharactersEncoder
from allennlp.predictors import SimpleSeq2SeqPredictor
from allennlp.training.trainer import Trainer
from allennlp.nn.activations import Activation

from tqdm import tqdm as tqdm
from allennlp.data import DatasetReader
from allennlp.models.archival import load_archive

from allennlp.models.archival import archive_model

import os

In [None]:
CUDA_DEVICE = 0

reader = Conll2003DatasetReader(
    tag_label='pos',
    token_indexers={
        "tokens": SingleIdTokenIndexer(lowercase_tokens=True),
        "token_characters": TokenCharactersIndexer(),
        "elmo" : ELMoTokenCharactersIndexer()
    }
)

train_dataset = reader.read('data/train.pos')
validation_dataset = reader.read('data/dev.pos')
test_dataset = reader.read('data/test.pos')

vocab = Vocabulary.from_instances(train_dataset + validation_dataset)

In [3]:
vocab.get_vocab_size('tokens')

17968

In [4]:
options_file = ('https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json')
weight_file = ('https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5')

elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                            pretrained_file="https://allennlp.s3.amazonaws.com/datasets/glove/glove.6B.50d.txt.gz", 
                            embedding_dim=50, 
                            trainable=True)

char_embedder = Embedding(num_embeddings=vocab.get_vocab_size('token_characters'), embedding_dim=16)
char_cnn_encoder = CnnEncoder(16, 128, (3,), conv_layer_activation=Activation.by_name("relu")())
char_embedding = TokenCharactersEncoder(char_embedder, char_cnn_encoder)

# Pass in the ElmoTokenEmbedder instance instead
word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding ,"elmo": elmo_embedder, "token_characters": char_embedding})

In [5]:
global_encoder = PytorchSeq2SeqWrapper(torch.nn.LSTM(1202, 200, 2, dropout=0.5, bidirectional=True, batch_first=True))

In [6]:
model = SimpleTagger(
    vocab = vocab,
    text_field_embedder=word_embeddings,
    encoder=global_encoder
)

In [7]:
model.to(device=CUDA_DEVICE)

SimpleTagger(
  (text_field_embedder): BasicTextFieldEmbedder(
    (token_embedder_tokens): Embedding()
    (token_embedder_elmo): ElmoTokenEmbedder(
      (_elmo): Elmo(
        (_elmo_lstm): _ElmoBiLm(
          (_token_embedder): _ElmoCharacterEncoder(
            (char_conv_0): Conv1d(16, 32, kernel_size=(1,), stride=(1,))
            (char_conv_1): Conv1d(16, 32, kernel_size=(2,), stride=(1,))
            (char_conv_2): Conv1d(16, 64, kernel_size=(3,), stride=(1,))
            (char_conv_3): Conv1d(16, 128, kernel_size=(4,), stride=(1,))
            (char_conv_4): Conv1d(16, 256, kernel_size=(5,), stride=(1,))
            (char_conv_5): Conv1d(16, 512, kernel_size=(6,), stride=(1,))
            (char_conv_6): Conv1d(16, 1024, kernel_size=(7,), stride=(1,))
            (_highways): Highway(
              (_layers): ModuleList(
                (0): Linear(in_features=2048, out_features=4096, bias=True)
                (1): Linear(in_features=2048, out_features=4096, bias=True)
     

In [8]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
iterator = BasicIterator(batch_size=32)

iterator.index_with(vocab)

In [9]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  iterator=iterator,
                  train_dataset=train_dataset,
                  validation_dataset=validation_dataset,
                  num_epochs=10,
                  model_save_interval=5,
                  num_serialized_models_to_keep=3,
                  serialization_dir='log',
                  cuda_device=CUDA_DEVICE)

trainer.train()

You provided a validation dataset but patience was set to None, meaning that early stopping is disabled
accuracy: 0.9369, accuracy3: 0.9905, loss: 0.2313 ||: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 392/392 [02:27<00:00,  2.65it/s]
accuracy: 0.9549, accuracy3: 0.9951, loss: 0.1791 ||: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  4.91it/s]
accuracy: 0.9648, accuracy3: 0.9969, loss: 0.1157 ||: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 392/392 [02:30<00:00,  2.61it/s]
accuracy: 0.9580, accuracy3: 0.9965, loss: 0.1645 ||: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:13<00:00,  4.83it/s]
accuracy: 0.9746, accuracy3: 0.9

{'best_epoch': 3,
 'peak_cpu_memory_MB': 0,
 'peak_gpu_0_memory_MB': 7899,
 'training_duration': '0:23:50.881127',
 'training_start_epoch': 1,
 'training_epochs': 8,
 'epoch': 9,
 'training_accuracy': 0.9911439980059333,
 'training_accuracy3': 0.999784953593963,
 'training_loss': 0.02030466815898651,
 'training_cpu_memory_MB': 0.0,
 'training_gpu_0_memory_MB': 7790,
 'validation_accuracy': 0.9605566600397615,
 'validation_accuracy3': 0.99610337972167,
 'validation_loss': 0.23313352734678322,
 'best_validation_accuracy': 0.9609940357852883,
 'best_validation_accuracy3': 0.9963021868787276,
 'best_validation_loss': 0.1627850676221507}

In [None]:
!wsl cp simple_tagger_pos.json log/config.json
vocab.save_to_files(os.path.join("log", "vocabulary"))
archive_model("log")

In [None]:
mymodel = load_archive('log/model.tar.gz', 0)

In [None]:
mymodel.model.eval()
dataset_reader_params = mymodel.config["dataset_reader"]
dataset_reader = DatasetReader.from_params(dataset_reader_params)

In [19]:
exmpl = dataset_reader.read("exmpl.txt")
mymodel.model.forward_on_instance(exmpl[0])

1it [00:00, ?it/s]


{'logits': array([[-5.4476541e-01, -2.3911664e+00, -1.8559260e+00,  1.5307439e+01,
         -6.1277914e+00,  2.4244719e+00,  3.9000884e-01, -2.7262022e+00,
          4.5130742e-01,  3.5875578e+00, -1.2541283e+00,  1.4740413e-01,
         -8.0464646e-02,  6.1601615e-01,  7.1576577e-01,  9.2166936e-01,
         -6.5594444e+00],
        [ 1.5166509e+00, -4.3368793e+00,  1.4236359e+01, -2.3689458e+00,
          9.0023530e-01,  4.3981639e-01, -3.1461663e+00,  2.8808618e+00,
          3.6919122e+00, -5.1226094e-04, -3.6571250e+00, -2.3723128e+00,
         -3.7944725e+00, -6.3167974e-02, -6.2558693e-01, -3.0446333e-01,
         -7.1437802e+00],
        [ 1.1854206e+01, -4.3794072e-01, -1.5564039e+00,  5.7527445e-02,
         -4.2922239e+00, -2.8847775e+00,  2.9597430e+00, -4.6049258e-01,
         -2.7570608e+00,  7.2745398e-02, -2.4443436e+00, -5.4268966e+00,
         -1.5672175e+00, -4.1237864e+00, -2.5658522e+00, -1.2483094e+00,
         -2.7547548e+00],
        [-1.5941940e-01,  1.5526590e