In [1]:
'''Example script to generate text from Nietzsche's writings.

At least 20 epochs are required before the generated text
starts sounding coherent.

It is recommended to run this script on GPU, as recurrent
networks are quite computationally intensive.

If you try this script on new data, make sure your corpus
has at least ~100k characters. ~1M is better.
'''

from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys

path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


# build the model: 2 stacked LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(512, return_sequences=True, input_shape=(maxlen, len(chars))))
model.add(LSTM(512, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

def sample(a, temperature=1.0):
    # helper function to sample an index from a probability array
    a = np.log(a) / temperature
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))


Using TensorFlow backend.


corpus length: 600901
total chars: 59
nb sequences: 200287
Vectorization...
Build model...


In [2]:

# train the model, output generated text after each iteration
for iteration in range(1, 60):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(X, y, batch_size=128, nb_epoch=1)

    start_index = random.randint(0, len(text) - maxlen - 1)

    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print()
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x[0, t, char_indices[char]] = 1.

            preds = model.predict(x, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()


--------------------------------------------------
Iteration 1
Epoch 1/1

----- diversity: 0.2
----- Generating with seed: "--this book has been read most
indiffere"
--this book has been read most
indiffered

ValueError: sum(pvals[:-1]) > 1.0

In [46]:
from io import StringIO
import sys

class Hider:
    def __init__(self, channels=('stdout',)):
        self._stomach = StringIO()
        self._orig = {ch : None for ch in channels}

    def __enter__(self):
        for ch in self._orig:
            self._orig[ch] = getattr(sys, ch)
            setattr(sys, ch, self)
        return self

    def write(self, string):
        self._stomach.write(string)

    def flush(self):
        pass

    def autopsy(self):
        return self._stomach.getvalue()

    def __exit__(self, *args):
        for ch in self._orig:
            setattr(sys, ch, self._orig[ch])

In [1]:
from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM
from keras.layers.core import Reshape
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from data_interpreter_Keras import dataInterpreter, metaDataEndomondo
from inputManager import inputManager
import matplotlib.pyplot as plt

zMultiple = 5

data_path = "../multimodalDBM/endomondoHR_proper.json"
summaries_dir = "logs"
endoFeatures = ["sport", "heart_rate", "gender", "altitude", "time_elapsed", "distance", "new_workout", "derived_speed", "userId"]
trainValTestSplit = [0.8, 0.1, 0.1]
targetAtt = "heart_rate"
inputOrderNames = [x for x in endoFeatures if x!=targetAtt]
trimmed_workout_len = 450
num_steps = 128
batch_size_m = 64


endo_reader = dataInterpreter(fn=data_path, scaleVals=True, trimmed_workout_length=trimmed_workout_len)
endo_reader.buildDataSchema(endoFeatures, targetAtt, trainValTestSplit, zMultiple)
input_dim = endo_reader.getInputDim(targetAtt)
target_dim = endo_reader.getTargetDim(targetAtt)

num_samples = int((trimmed_workout_len*endo_reader.numDataPoints))

print('Build model...')
model = Sequential()
#model.add(Reshape((batch_size_m, num_steps, input_dim), batch_input_shape=(batch_size_m*num_steps, input_dim)))
model.add(LSTM(128, return_sequences=True, batch_input_shape=(batch_size_m, num_steps, input_dim), stateful=True))
model.add(Dropout(0.2))
model.add(LSTM(128, return_sequences=True, stateful=True))
model.add(Dropout(0.2))
model.add(Dense(target_dim))
model.add(Activation('linear'))

model.compile(loss='mean_squared_error', optimizer='rmsprop')
print("Endomodel Built!")

Using TensorFlow backend.


Loading metadata
Metadata loaded
('Number of data points: ', 180656)
Build model...
Endomodel Built!


In [None]:
epoch_train_scores = []
epoch_valid_scores = []
for iteration in range(1, 60):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    trainDataGen = endo_reader.endoIteratorSupervised(batch_size_m, num_steps, "train")  # A generator over the endomondo data
    train_losses = []
    print("Training")
    #fit_generator(dataGen, num_samples, 1)
    num_generator_runs = 0
    for X, y in trainDataGen:
        #input_data = np.reshape(X, (batch_size_m, num_steps, input_dim), order='C')
        #target_data = np.reshape(y, (batch_size_m, num_steps, target_dim), order='C')
        #model.fit(input_data, target_data, batch_size=batch_size_m, nb_epoch=1)
        #model.fit(X, y, batch_size=batch_size_m, nb_epoch=1)
        batch_loss = model.train_on_batch(X,y)
        train_losses.append(batch_loss)
        print("-", end='')
        #num_generator_runs+=1
        #print("Current batch loss: " + str(batch_loss) + "    average loss for current epoch: " + str(np.mean(train_losses)), end='\n')
        #print("\r")
        #print("\033[F]")
    #print("num generator runs: ", num_generator_runs)
    epoch_train_loss = np.mean(train_losses)
    print("\nTraining loss: " + str(epoch_train_loss))
    epoch_train_scores.append(epoch_train_loss)
    
    validDataGen = endo_reader.endoIteratorSupervised(batch_size_m, num_steps, "valid")
    valid_losses = []
    print("Validating")
    #evaluate_generator(dataGen, num_samples, 1)
    for X, y in validDataGen:
        batch_loss = model.test_on_batch(X, y)
        valid_losses.append(batch_loss)
        print("-", end='')
        num_generator_runs+=1
        #print("Current batch loss: " + str(batch_loss) + "    average loss for current epoch: " + str(np.mean(valid_losses)), end='\n')
    epoch_valid_loss = np.mean(valid_losses)
    print("num generator runs: ", num_generator_runs)
    print("\nValidation loss: " + str(epoch_valid_loss))
    epoch_valid_scores.append(np.mean(valid_losses))
        
    
    
    #start_index = random.randint(0, len(text) - maxlen - 1)
    """
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print()
        print('----- diversity:', diversity)
        
        testDataGen = endo_reader.endoIteratorSupervised(batch_size_m, num_steps, trainValidTest)  # A generator over the endomondo data
        X, y = testDataGen.next()
        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x[0, t, char_indices[char]] = 1.

            preds = model.predict(x, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()
        """


--------------------------------------------------
Iteration 1
Training
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [2]:
#Get a data point for plotting the progress
pred_gen = endo_reader.endoIteratorSupervised(batch_size_m, num_steps, "test")
prediction_inputs = []
prediction_targets = []
for i in range(trimmed_workout_len):
    timestep = pred_gen.next()
    prediction_inputs.append(timestep[0])
    prediction_targets.append(timestep[1])

for iteration in range(1, 60):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    trainDataGen = endo_reader.endoIteratorSupervised(batch_size_m, num_steps, "train")  # A generator over the endomondo data
    print("Training")
    fit_generator(trainDataGen, int(num_samples*trainValTestSplit[0]), 1)
    
    validDataGen = endo_reader.endoIteratorSupervised(batch_size_m, num_steps, "valid")
    
    #fit_generator(trainDataGen, int(num_samples*trainValTestSplit[0]), 1, validation_data=validDataGen, nb_val_samples = int(num_samples*trainValTestSplit[0]), nb_worker=)
    
    print("Validating")
    evaluate_generator(dataGen, int(num_samples*trainValTestSplit[1]), 1)
    
    print("Predicting")
    predictions = model.predict_on_batch(prediction_inputs)
    #Now plot the predictions vs the targets...
    indices=np.array(range(len(predictions)))
    plt.figure(num=None, figsize=(15, 10), dpi=100, facecolor='w', edgecolor='k')
    plt.plot(indices, predictions, 'r--', indices, prediction_targets, 'b--')
    plt.title("Model predictions: Red   Targets: Blue")
    plt.show()

NameError: name 'endo_reader' is not defined

In [2]:
for iteration in range(1, 60):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    
    trainDataGen = endo_reader.endoIteratorSupervised(batch_size_m, num_steps, "train")  # A generator over the endomondo data    
    validDataGen = endo_reader.endoIteratorSupervised(batch_size_m, num_steps, "valid")
    
    model.fit_generator(trainDataGen, int(num_samples*trainValTestSplit[0]), 1, validation_data=validDataGen, nb_val_samples = int(num_samples*trainValTestSplit[1]))



--------------------------------------------------
Iteration 1
Epoch 1/1

Exception in thread Thread-4:
Traceback (most recent call last):
  File "/usr/lib/python2.7/threading.py", line 810, in __bootstrap_inner
    self.run()
  File "/usr/lib/python2.7/threading.py", line 763, in run
    self.__target(*self.__args, **self.__kwargs)
  File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 429, in data_generator_task
    generator_output = next(self._generator)
StopIteration



ValueError: output of generator should be a tuple (x, y, sample_weight) or (x, y). Found: None