In [1]:
import os, dill, time, random, numpy as np
from keras_gpt_2 import load_trained_model_from_checkpoint, get_bpe_from_files, generate

In [2]:
model_folder = 'models/124M'
config_path = os.path.join(model_folder, 'hparams.json')
checkpoint_path = os.path.join(model_folder, 'model.ckpt')
encoder_path = os.path.join(model_folder, 'encoder.json')
vocab_path = os.path.join(model_folder, 'vocab.bpe')


print('Load model from checkpoint...')
model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
print('Load BPE from files...')
bpe = get_bpe_from_files(encoder_path, vocab_path)

Load model from checkpoint...
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Load BPE from files...


In [3]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input (InputLayer)              [(None, None)]       0                                            
__________________________________________________________________________________________________
Embed-Token (EmbeddingRet)      [(None, None, 768),  38597376    Input[0][0]                      
__________________________________________________________________________________________________
Embed-Token-Pos (PositionEmbedd (None, None, 768)    786432      Embed-Token[0][0]                
__________________________________________________________________________________________________
Encode-0-MultiHeadAtt-Norm (Lay (None, None, 768)    1536        Embed-Token-Pos[0][0]            
______________________________________________________________________________________________

In [4]:
print('Generate text...')
start = time.time()
output = generate(model, bpe, ['From the day forth, my arm'], length=100, top_k=7, temperature=0.7)
end = time.time()
print(output[0])
print("\r\nprediction took " + str(end - start) + " seconds")

Generate text...
From the day forth, my arm would be held out, but it could not reach it; it must have had to rest for two months. But the man had no strength, and was so feeble and weak. He was a cripplesman; but, as I had seen, it could never have been so; he must never have had to work; but the doctor would tell his side of the truth. The patient could never walk or run, he must walk; he must never walk and run or walk again."



prediction took 41.48360753059387 seconds


In [5]:
print('Download shakespeare dataset...')
#get dataset
try:
    import urllib.request
    resource = urllib.request.urlopen("https://github.com/karpathy/char-rnn/blob/master/data/tinyshakespeare/input.txt")
    content =  resource.read().decode(resource.headers.get_content_charset())
    f = open('shakespeare.txt', 'w+')
    f.write(str(content))
    f.close()
except:
    pass
txt = open('shakespeare.txt', 'r')
dataset = txt.read()
txt.close()

Download shakespeare dataset...


In [6]:
print('Tokenize dataset...')
try:
    with open('tokens', 'rb') as f:
        tokens = dill.load(f)
except:
    tokens = bpe.encode(dataset) #takes a moment
    with open('tokens', 'wb') as f:
        dill.dump(tokens, f)

Tokenize dataset...


In [7]:
dictionary = list(set(tokens))
vocabulary_size = max(dictionary)
print('# of words in dataset: ' + str(len(tokens)))
print('# of tokens in dictionary: ' + str(vocabulary_size))

# of words in dataset: 340666
# of tokens in dictionary: 50255


In [8]:
def get_batch(LENGTH, batch_size):
    encoder_input_batch = []
    target_output_batch = []
    for _ in range(batch_size):
        random_start = int(random.random() * (len(tokens) - (LENGTH * 2)) + 1)
        
        encoder_input = tokens[random_start : random_start + LENGTH]
        encoder_input_batch.append(encoder_input)
        target_output = tokens[random_start + 1 : random_start + LENGTH + 1]
        target_output_batch.append(target_output)

    yield np.asarray(encoder_input_batch), np.expand_dims(np.asarray(target_output_batch), 2)

In [9]:
from keras_gpt_2.backend import backend as K
from keras_gpt_2.backend import optimizers, losses, callbacks

def perplexity(y_true, y_pred):
    """
    Popular metric for evaluating language modelling architectures.
    More info: http://cs224d.stanford.edu/lecture_notes/LectureNotes4.pdf
    """
    cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred)
    return K.mean(K.exp(K.mean(cross_entropy, axis=-1)))    

def lr_scheduler(epoch, lr):
    return lr * 0.9999

optimizer = optimizers.Adam(lr=0.001, beta_1=0.6, beta_2=0.999)
callbacks = [callbacks.LearningRateScheduler(lr_scheduler, verbose=0)]
loss = losses.sparse_categorical_crossentropy
model.compile(optimizer, loss, metrics=[perplexity])

In [10]:
epoch = 1
while epoch < 1000:
    x, y = next(get_batch(1024, 1))
    start = time.time()
    train_result = model.train_on_batch(x, y)
    train_result = model.fit(x, y, epochs = 1, callbacks = callbacks, verbose = 0)
    end = time.time()

    print(
        'epoch: ' + str(epoch)
        + '\t| loss: ' + "{:.3f}".format(train_result.history['loss'][0])
        + '\t| perplexity: ' + "{:.3f}".format(train_result.history['perplexity'][0])
        + '\t| time: ' + "{:.3f}".format(end - start) + 's'
        + '\t| lr: ' + "{:.7f}".format(train_result.history['lr'][0])
    )
    epoch = epoch + 1

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
epoch: 1	| loss: 8.016	| perplexity: 3030.255	| time: 59.123s	| lr: 0.0009999
epoch: 2	| loss: 6.974	| perplexity: 1068.365	| time: 30.130s	| lr: 0.0009998


KeyboardInterrupt: 

In [11]:
print('Generate text...')
start = time.time()
output = generate(model, bpe, ['From the day forth, my arm'], length=100, top_k=7, temperature=0.7)
end = time.time()
print(output[0])
print("\r\nprediction took " + str(end - start) + " seconds")

Generate text...
From the day forth, my arm-somewhing, it would have been more time for him. The only one to make a great, unedited out the last day of the time. I thought that was the best time I had had, I have a heart.
 I had been in an out, though it is still alive! I was too afraid of a long, long, but it seemed to make his head-ended in his heart-in.






 I am here too, he had his

prediction took 43.76806616783142 seconds
