In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

In [6]:
text = """He is sick\n
That is so sick\n
The sick cat is dying\n"""

In [7]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
vocab_size = len(tokenizer.word_index) + 1
print("Size of vocabulary : %d" % vocab_size)

Size of vocabulary : 9


In [8]:
print(tokenizer.word_index)

{'is': 1, 'sick': 2, 'he': 3, 'that': 4, 'so': 5, 'the': 6, 'cat': 7, 'dying': 8}


In [25]:
sequences = list()
for line in text.split('\n'):
    encoded = tokenizer.texts_to_sequences({line})[0]
    #print(encoded)
    for i in range(1, len(encoded)):
        sequence = encoded[:i+1]
        sequences.append(sequence)

In [28]:
print(sequences,"\nCount of sample : ",len(sequences))

[[3, 1], [3, 1, 2], [4, 1], [4, 1, 5], [4, 1, 5, 2], [6, 2], [6, 2, 7], [6, 2, 7, 1], [6, 2, 7, 1, 8]] 
Count of sample :  9


In [29]:
max_len = max(len(l) for l in sequences)
print("Maximum length of sample : {}".format(max_len))

Maximum length of sample : 5


In [30]:
sequences = pad_sequences(sequences, maxlen=max_len, padding='pre')

In [31]:
print(sequences)

[[0 0 0 3 1]
 [0 0 3 1 2]
 [0 0 0 4 1]
 [0 0 4 1 5]
 [0 4 1 5 2]
 [0 0 0 6 2]
 [0 0 6 2 7]
 [0 6 2 7 1]
 [6 2 7 1 8]]


In [40]:
sequences = np.array(sequences)
X = sequences[:,:-1]
y = sequences[:,-1]

In [41]:
print(X,"\n")
print(y) #y will be label

[[0 0 0 3]
 [0 0 3 1]
 [0 0 0 4]
 [0 0 4 1]
 [0 4 1 5]
 [0 0 0 6]
 [0 0 6 2]
 [0 6 2 7]
 [6 2 7 1]] 

[1 2 1 5 2 2 7 1 8]


In [42]:
y = to_categorical(y, num_classes=vocab_size)

In [43]:
print(y)

[[0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1.]]


In [55]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN

In [57]:
embedding_dim = 10
hidden_units = 32

model = Sequential()
model.add(Embedding(vocab_size, embedding_dim))
model.add(SimpleRNN(hidden_units))
model.add(Dense(vocab_size, activation='softmax'))

In [58]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=200, verbose=2)

Epoch 1/200
1/1 - 0s - loss: 2.1925 - accuracy: 0.1111 - 475ms/epoch - 475ms/step
Epoch 2/200
1/1 - 0s - loss: 2.1774 - accuracy: 0.1111 - 14ms/epoch - 14ms/step
Epoch 3/200
1/1 - 0s - loss: 2.1621 - accuracy: 0.1111 - 19ms/epoch - 19ms/step
Epoch 4/200
1/1 - 0s - loss: 2.1468 - accuracy: 0.1111 - 11ms/epoch - 11ms/step
Epoch 5/200
1/1 - 0s - loss: 2.1312 - accuracy: 0.2222 - 21ms/epoch - 21ms/step
Epoch 6/200
1/1 - 0s - loss: 2.1153 - accuracy: 0.3333 - 27ms/epoch - 27ms/step
Epoch 7/200
1/1 - 0s - loss: 2.0990 - accuracy: 0.6667 - 20ms/epoch - 20ms/step
Epoch 8/200
1/1 - 0s - loss: 2.0823 - accuracy: 0.6667 - 10ms/epoch - 10ms/step
Epoch 9/200
1/1 - 0s - loss: 2.0650 - accuracy: 0.6667 - 18ms/epoch - 18ms/step
Epoch 10/200
1/1 - 0s - loss: 2.0471 - accuracy: 0.6667 - 19ms/epoch - 19ms/step
Epoch 11/200
1/1 - 0s - loss: 2.0284 - accuracy: 0.6667 - 28ms/epoch - 28ms/step
Epoch 12/200
1/1 - 0s - loss: 2.0090 - accuracy: 0.6667 - 16ms/epoch - 16ms/step
Epoch 13/200
1/1 - 0s - loss: 1.988

Epoch 103/200
1/1 - 0s - loss: 0.5930 - accuracy: 0.8889 - 10ms/epoch - 10ms/step
Epoch 104/200
1/1 - 0s - loss: 0.5837 - accuracy: 0.8889 - 8ms/epoch - 8ms/step
Epoch 105/200
1/1 - 0s - loss: 0.5743 - accuracy: 0.8889 - 11ms/epoch - 11ms/step
Epoch 106/200
1/1 - 0s - loss: 0.5651 - accuracy: 0.8889 - 10ms/epoch - 10ms/step
Epoch 107/200
1/1 - 0s - loss: 0.5558 - accuracy: 0.8889 - 9ms/epoch - 9ms/step
Epoch 108/200
1/1 - 0s - loss: 0.5467 - accuracy: 0.8889 - 10ms/epoch - 10ms/step
Epoch 109/200
1/1 - 0s - loss: 0.5376 - accuracy: 0.8889 - 11ms/epoch - 11ms/step
Epoch 110/200
1/1 - 0s - loss: 0.5285 - accuracy: 0.8889 - 9ms/epoch - 9ms/step
Epoch 111/200
1/1 - 0s - loss: 0.5195 - accuracy: 0.8889 - 9ms/epoch - 9ms/step
Epoch 112/200
1/1 - 0s - loss: 0.5106 - accuracy: 0.8889 - 11ms/epoch - 11ms/step
Epoch 113/200
1/1 - 0s - loss: 0.5018 - accuracy: 0.8889 - 10ms/epoch - 10ms/step
Epoch 114/200
1/1 - 0s - loss: 0.4931 - accuracy: 0.8889 - 9ms/epoch - 9ms/step
Epoch 115/200
1/1 - 0s - l

<keras.callbacks.History at 0x2534668b8b0>