In [1]:
import tensorflow as tf
import numpy as np

from LMNets.models import LanguageModelAWD, LanguageModelMoS
from LMNets.losses import lm_loss_sparse
from LMNets.logging import log_scalar
from LMNets.optimize import minimize_w_clipping
from LMNets.data import penn_treebank, wikitext, iterator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
tf.enable_eager_execution()

# Helpers

In [3]:
def batch_score(model, x, y, seq_len):
    y_, _ =  model.forward(x, seq_len)
    return tf.pow(2,tf.losses.sparse_softmax_cross_entropy(y, logits=y_))

def compute_score(model, tokens, bs, bptt):
    scores = []
    for x, y, seq_len in iterator.get_bptt_batch_iterator(tokens, bs, bptt):
        scores.append(batch_score(model, x, y, seq_len))
    return np.mean(scores)

def train(model, train_tokens, val_tokens, vocab, batch_size=32, val_batch_size=32, epochs=1):
    summary_writer = tf.contrib.summary.create_file_writer("logs", flush_millis=10000)
    summary_writer.set_as_default()
    global_step = tf.train.get_or_create_global_step()
    
    curr_val_score = compute_score(model, val_tokens, val_batch_size, bptt)
    iterations = 0

    for epoch in range(epochs):
        for x, y, seq_len in iterator.get_bptt_batch_iterator(train_tokens, batch_size, bptt):
            global_step.assign_add(1)
            minimize_w_clipping(optimizer, model, lm_loss_sparse, x, y, seq_len, logging=True, it=iterations, log_every=10)
            iterations += 1
        
        # Compute validation score
        score = compute_score(model, val_tokens, val_batch_size, bptt)
        log_scalar('val_perplex', score)
        print("Validation score is {0}".format(score))

        if score < curr_val_score:
            curr_val_score = score
            model.save_model()

# Penn Treebank training

In [None]:
max_voc = 100000
bptt = 10

n_epochs = 10

In [None]:
p_tr_tokens, p_val_tokens, p_test_tokens, p_vocab = penn_treebank.load_data(max_voc)

In [None]:
voc_size = len(p_vocab)

In [None]:
awd_model = LanguageModelAWD(voc_size)

In [None]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)

In [None]:
awd_model.restore_model(ckpt='./checkpoint_awd/')

In [None]:
train(awd_model, p_tr_tokens, p_val_tokens, p_vocab, batch_size=256, epochs=100,  val_batch_size=256)

# Best Validation Score

In [None]:
compute_score(awd_model, p_val_tokens, 256, bptt)

# Test Score

In [None]:
compute_score(awd_model, p_test_tokens, 256, bptt)

# WikiText Training

In [4]:
max_voc = 100000
bptt = 10

In [5]:
w_tr_tokens, w_val_tokens, w_test_tokens, w_vocab = wikitext.load_data(max_voc)

In [8]:
voc_size = len(w_vocab)

In [9]:
awd_model = LanguageModelAWD(voc_size)

In [10]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)

In [17]:
train(awd_model, w_tr_tokens, w_val_tokens, w_vocab, batch_size=256, epochs=100,  val_batch_size=256)

In [14]:
awd_model.save_model()

In [15]:
compute_score(awd_model, w_val_tokens, 256, bptt)

167.55423

In [16]:
compute_score(awd_model, w_test_tokens, 256, bptt)

164.8462