In [1]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io
import argparse

Using TensorFlow backend.


In [2]:
!git clone --quiet https://github.com/KCL-Health-NLP/nlp_examples.git 

fatal: destination path 'nlp_examples' already exists and is not an empty directory.


In [0]:

## Shakespeare example
DEFAULT_TEXT_FILE="nlp_examples/ann/lstm/shakespeare.txt"

## MTsamples example
#DEFAULT_TEXT_FILE="nlp_examples/ann/lstm/mtsamples.txt"

In [0]:
## Shakespeare example
DEFAULT_MODEL_FILE='nlp_examples/ann/lstm/shakespeare.h5'

## MTSamples example
DEFAULT_MODEL_FILE='nlp_examples/ann/lstm/mtsamples.h5'

In [0]:
DEFAULT_EPOCHS=60

In [0]:
DESCRIPTION = """
Example script to build a model from example texts, and
generate synthetic texts from that model, using LSTMs.
At least 20 epochs are required before the generated text
starts sounding coherent.
It is recommended to run this script on GPU, as recurrent
networks are quite computationally intensive.
Make sure your corpus has at least ~100k characters.
~1M is better.
Based on a script from the Keras Team at
https://github.com/keras-team/keras/tree/master/examples
"""

In [0]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [0]:
def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)
    generate_text()

In [0]:
def generate_text():
    # Create a seed and generate some examples
    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [0]:
epochs=DEFAULT_EPOCHS
path=DEFAULT_TEXT_FILE
model = load_model(DEFAULT_MODEL_FILE)

In [28]:
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

corpus length: 599984


In [29]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 58


In [30]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 199982


In [31]:
generate_text()

----- diversity: 0.2
----- Generating with seed: "y mass. in retrospect sellar enlargement"
y mass. in retrospect sellar enlargement of the left protocol.  there is a last and and consistent with an and specific to the left parietal and which shows consistent with a lower and and latera

  after removing the cwd from sys.path.


l complex and probable to the right hemisume with and the patient was mild thickening with and brain in the left lower extremities.

course:
 the patient was and present and posterior and spontained through the left protocol and a sprain in the 
----- diversity: 0.5
----- Generating with seed: "y mass. in retrospect sellar enlargement"
y mass. in retrospect sellar enlargement with right probea.

left parotid and intermittent stress into the left front presentation.  there is a followup was compatible with and significant prior examination of the right highddlinal cord scan and interval with and dr. minimal and left plantar sinuses without disc stational perfusion and presentation.

findings:
  this is a produre that this pain.

the vessel or memogia is of left probeat
----- diversity: 1.0
----- Generating with seed: "y mass. in retrospect sellar enlargement"
y mass. in retrospect sellar enlargement y/o bene-cele on the worded through no fall, but wadoxit sourses, and collification afee