In [1]:
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import initializers
import numpy as np
from tensorflow.keras.callbacks import ModelCheckpoint, Callback, TensorBoard

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
%load_ext tensorboard

In [3]:
filename = "wonderland.txt"

raw_text = open(filename).read()

raw_text = raw_text.lower()

In [4]:
chars = sorted(list(set(raw_text)))

char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [5]:
n_chars = len(raw_text)

n_vocab = len(chars)

print ("Total Characters: ", n_chars)

print ("Total Vocab: ", n_vocab)

Total Characters:  462145
Total Vocab:  90


In [6]:
seq_length = 100

dataX = []

dataY = []

for i in range(0, n_chars - seq_length, 1):

        seq_in = raw_text[i:i + seq_length]

        seq_out = raw_text[i + seq_length]

        dataX.append([char_to_int[char] for char in seq_in])

        dataY.append(char_to_int[seq_out])

n_patterns = len(dataX)

print ("Total Patterns: ", n_patterns)

Total Patterns:  462045


In [7]:
# reshape X to be [samples, time steps, features]

X = np.reshape(dataX, (n_patterns, seq_length, 1))

# normalize

X = X / float(n_vocab)

# one hot encode the output variable

y = to_categorical(dataY)

In [8]:
model = Sequential()

model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))

model.add(Dropout(0.2))

model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [9]:
class print_callback(Callback):
    def __init__(self, data, int_to_char, dist):
        self.data = data
        self.int_to_char = int_to_char
        self.dist = dist 
    def gen_text(self, size=100):
        start = np.random.randint(0, n_patterns-1)
        pattern = self.data[start]
        text = []
        for i in range(size):
            x = np.reshape(pattern, (1, len(pattern), 1))
            x = x / float(n_vocab)
            prediction = model.predict(x, verbose=0)
            index = np.argmax(prediction)
            result = self.int_to_char[index]
            text.append(result)
            pattern.append(index)
            pattern = pattern[1:len(pattern)]
        return "".join(text)
        
    def on_epoch_end(self, epoch, logs=None):
        if epoch % self.dist == 0 or epoch == self.params["epochs"] - 1:
          print(f'epoch {epoch}/{self.params["epochs"]}:')
          gen = self.gen_text(200)
          print('Generated text: ', gen, sep='')

In [10]:
# define the checkpoint

filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"

checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')

callbacks_list = [checkpoint, print_callback(dataX, int_to_char, 5), TensorBoard(log_dir='logs')]

In [11]:
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list, verbose=True)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/20
Epoch 00001: loss improved from inf to 3.13979, saving model to weights-improvement-01-3.1398.hdf5
epoch 0/20:
Generated text: что сылоран верао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ваоао ва
Epoch 2/20
Epoch 00002: loss improved from 3.13979 to 3.03301, saving model to weights-improvement-02-3.0330.hdf5
Epoch 3/20
Epoch 00003: loss improved from 3.03301 to 2.91212, saving model to weights-improvement-03-2.9121.hdf5
Epoch 4/20
Epoch 00004: loss improved from 2.91212 to 2.71458, saving model to weights-improvement-04-2.7146.hdf5
Epoch 5/20
Epoch 00005: loss improved from 2.71458 to 2.51789, saving model to weights-improvement-05-2.5179.hdf5
Epoch 6/20
Epoch 00006: loss improved from 2.51789 to 2.38829, saving model to weights-improvement-06-2.3883.hd

<tensorflow.python.keras.callbacks.History at 0x1db47584a88>

In [19]:
%tensorboard --logdir /logs

Reusing TensorBoard on port 6006 (pid 7020), started 0:03:32 ago. (Use '!kill 7020' to kill it.)

In [17]:
!kill 7020

kill: 7020: No such process
