<a href="https://colab.research.google.com/github/Piyush01Bhatt/Deep-Learning/blob/master/Text_Generation_Using_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This is a notebook explaining text generation using LSTM(Long Short Term Memory) networks. I will be using famous poem of John Keats : A Thing of Beauty as my data. Lstm network will be implemented using Keras(Tensorflow).

In [0]:
!pip install tensorflow-gpu==2.0.0-beta1



In [0]:
import tensorflow as tf
import numpy as np
from keras.utils import np_utils

In [0]:
poem = """A THING of beauty is a joy forever:	
        Its loveliness increases; it will never	
        Pass into nothingness; but still will keep	
        A bower quiet for us, and a sleep	
        Full of sweet dreams, and health, and quiet breathing.	        
        Therefore, on every morrow, are we wreathing	
        A flowery band to bind us to the earth,	
        Spite of despondence, of the inhuman dearth	
        Of noble natures, of the gloomy days,	
        Of all the unhealthy and o’er-darkened ways	        
        Made for our searching: yes, in spite of all,	
        Some shape of beauty moves away the pall	
        From our dark spirits. Such the sun, the moon,	
        Trees old and young, sprouting a shady boon	
        For simple sheep; and such are daffodils	        
        With the green world they live in; and clear rills	
        That for themselves a cooling covert make	
        ’Gainst the hot season; the mid-forest brake,	
        Rich with a sprinkling of fair musk-rose blooms:	
        And such too is the grandeur of the dooms	        
        We have imagined for the mighty dead;	
        All lovely tales that we have heard or read:	
        An endless fountain of immortal drink,	
        Pouring unto us from the heaven’s brink."""

In [0]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [0]:
tokenizer = Tokenizer()
corpus = poem.lower().split("\n")
print(f'Corpus length = {len(corpus)}')
tokenizer.fit_on_texts(corpus)
print(tokenizer.word_index)

Corpus length = 24
{'the': 1, 'of': 2, 'a': 3, 'and': 4, 'for': 5, 'us': 6, 'we': 7, 'all': 8, 'such': 9, 'beauty': 10, 'is': 11, 'will': 12, 'quiet': 13, 'are': 14, 'to': 15, 'spite': 16, 'our': 17, 'in': 18, 'from': 19, 'with': 20, 'that': 21, 'have': 22, 'thing': 23, 'joy': 24, 'forever': 25, 'its': 26, 'loveliness': 27, 'increases': 28, 'it': 29, 'never': 30, 'pass': 31, 'into': 32, 'nothingness': 33, 'but': 34, 'still': 35, 'keep': 36, 'bower': 37, 'sleep': 38, 'full': 39, 'sweet': 40, 'dreams': 41, 'health': 42, 'breathing': 43, 'therefore': 44, 'on': 45, 'every': 46, 'morrow': 47, 'wreathing': 48, 'flowery': 49, 'band': 50, 'bind': 51, 'earth': 52, 'despondence': 53, 'inhuman': 54, 'dearth': 55, 'noble': 56, 'natures': 57, 'gloomy': 58, 'days': 59, 'unhealthy': 60, 'o’er': 61, 'darkened': 62, 'ways': 63, 'made': 64, 'searching': 65, 'yes': 66, 'some': 67, 'shape': 68, 'moves': 69, 'away': 70, 'pall': 71, 'dark': 72, 'spirits': 73, 'sun': 74, 'moon': 75, 'trees': 76, 'old': 77, '

In [0]:
print(len(tokenizer.word_index))

126


In [0]:
sequences = np.array(tokenizer.texts_to_sequences(corpus))
print(sequences[0])
padded = pad_sequences(sequences,truncating='post',padding='post')
print(padded[0])

[3, 23, 2, 10, 11, 3, 24, 25]
[ 3 23  2 10 11  3 24 25  0  0]


In [0]:
predictor = padded[:,:-1]
labels = padded[:,1:]
print(f'predictor sequence = {predictor[0]}')
print(f'label sequence = {labels[0]}')

predictor sequence = [ 3 23  2 10 11  3 24 25  0]
label sequence = [23  2 10 11  3 24 25  0  0]


In [0]:
print(predictor.shape)
n_patterns = predictor.shape[0]
seq_length = predictor.shape[1]

(24, 9)


In [0]:
# reshape X to be [samples, time steps, features]
X = np.reshape(predictor, (n_patterns, seq_length))
# one hot encode the output variable
y = np_utils.to_categorical(labels)

In [0]:
X.shape

(24, 9)

In [0]:
y.shape

(24, 9, 127)

In [0]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(127,10))
model.add(tf.keras.layers.LSTM(150,return_sequences=True))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(127,activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 10)          1270      
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 150)         96600     
_________________________________________________________________
dropout_1 (Dropout)          (None, None, 150)         0         
_________________________________________________________________
dense_1 (Dense)              (None, None, 127)         19177     
Total params: 117,047
Trainable params: 117,047
Non-trainable params: 0
_________________________________________________________________


In [0]:
model.fit(X, y, epochs=1000, verbose=1)

W0810 15:01:13.877632 140518828152704 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 24 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/

<tensorflow.python.keras.callbacks.History at 0x7fccacf1bf60>

In [0]:
start_string = 'grandeur'
str_encoded = tokenizer.texts_to_sequences([start_string])
print(str_encoded)

[[108]]


In [0]:
model.predict_classes(str_encoded)[0][0]

2

In [0]:
reverse = {}
for key in tokenizer.word_index:
  reverse[tokenizer.word_index[key]] = key
print(reverse)

{1: 'the', 2: 'of', 3: 'a', 4: 'and', 5: 'for', 6: 'us', 7: 'we', 8: 'all', 9: 'such', 10: 'beauty', 11: 'is', 12: 'will', 13: 'quiet', 14: 'are', 15: 'to', 16: 'spite', 17: 'our', 18: 'in', 19: 'from', 20: 'with', 21: 'that', 22: 'have', 23: 'thing', 24: 'joy', 25: 'forever', 26: 'its', 27: 'loveliness', 28: 'increases', 29: 'it', 30: 'never', 31: 'pass', 32: 'into', 33: 'nothingness', 34: 'but', 35: 'still', 36: 'keep', 37: 'bower', 38: 'sleep', 39: 'full', 40: 'sweet', 41: 'dreams', 42: 'health', 43: 'breathing', 44: 'therefore', 45: 'on', 46: 'every', 47: 'morrow', 48: 'wreathing', 49: 'flowery', 50: 'band', 51: 'bind', 52: 'earth', 53: 'despondence', 54: 'inhuman', 55: 'dearth', 56: 'noble', 57: 'natures', 58: 'gloomy', 59: 'days', 60: 'unhealthy', 61: 'o’er', 62: 'darkened', 63: 'ways', 64: 'made', 65: 'searching', 66: 'yes', 67: 'some', 68: 'shape', 69: 'moves', 70: 'away', 71: 'pall', 72: 'dark', 73: 'spirits', 74: 'sun', 75: 'moon', 76: 'trees', 77: 'old', 78: 'young', 79: 'sp

In [0]:
num_words = 20
text = [start_string]
word = str_encoded
for i in range(num_words):
  word = model.predict_classes(word)
  text.append(reverse[word[0][0]])
print(text)

['grandeur', 'of', 'noble', 'have', 'our', 'flowery', 'simple', 'old', 'old', 'old', 'old', 'old', 'old', 'old', 'old', 'old', 'old', 'old', 'old', 'old', 'old']


The prediction are a little gibberish and repetitive after a certain length. Its probably because of lack of data. Training the model on more corpus can result in more meaningful sentences and less repetitive words

In [0]:
!wget https://storage.googleapis.com/laurencemoroney-blog.appspot.com/irish-lyrics-eof.txt

--2019-08-11 07:50:20--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/irish-lyrics-eof.txt
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.142.128, 2607:f8b0:400e:c06::80
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.142.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68970 (67K) [text/plain]
Saving to: ‘irish-lyrics-eof.txt’


2019-08-11 07:50:20 (73.7 MB/s) - ‘irish-lyrics-eof.txt’ saved [68970/68970]



In [0]:
with open('irish-lyrics-eof.txt') as f:
  data = f.read().splitlines()
print(data[0:5])

['Come all ye maidens young and fair', 'And you that are blooming in your prime', 'Always beware and keep your garden fair', 'Let no man steal away your thyme', 'For thyme it is a precious thing']


In [0]:
len(data)

1692

In [0]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data)
print(tokenizer.word_index)



In [0]:
sequences = np.array(tokenizer.texts_to_sequences(data))
print(sequences[0])
padded = pad_sequences(sequences,truncating='post',padding='pre')
print(padded[0])

[51, 12, 96, 1217, 48, 2, 69]
[   0    0    0    0    0    0    0    0    0   51   12   96 1217   48
    2   69]


In [0]:
padded.shape

(1692, 16)

In [0]:
seq = padded[:,:-1]
labels = padded[:,-1]

In [0]:
print(f'{seq[0]}..........{labels[0]}')

[   0    0    0    0    0    0    0    0    0   51   12   96 1217   48
    2]..........69


In [0]:
seq.shape

(1692, 15)

In [0]:
labels.shape

(1692,)

In [0]:
# one hot encode the output variable
labels = np_utils.to_categorical(labels)

In [0]:
labels.shape

(1692, 2686)

In [0]:
len(tokenizer.word_index)

2689

In [0]:
#model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(2690,64))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(150)))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(2686,activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 64)          172160    
_________________________________________________________________
bidirectional (Bidirectional (None, 300)               258000    
_________________________________________________________________
dropout_2 (Dropout)          (None, 300)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 2686)              808486    
Total params: 1,238,646
Trainable params: 1,238,646
Non-trainable params: 0
_________________________________________________________________


In [0]:
model.fit(seq, labels, epochs=60, verbose=1)

Train on 1692 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<tensorflow.python.keras.callbacks.History at 0x7fcc5c8019e8>

In [0]:
reverse_dic = {value:key for key,value in tokenizer.word_index.items()}

In [0]:
def generate_text(seed,num_sent):
  for _ in range(num_sent):
    token_list = tokenizer.texts_to_sequences([seed])[0]
    token_list = pad_sequences([token_list],maxlen=15,padding='pre')
    predicted = model.predict_classes(token_list,verbose=0)[0]
    #print(f'predicted = {predicted}')
    output_word = ""
    if predicted in reverse_dic:
      output_word = reverse_dic[predicted]
    seed += " " + output_word
  return seed


In [0]:
generated_text = generate_text("that is the way i like",100)

In [0]:
generated_text

'that is the way i like athy malone lass mooncoin taken married bubblin here eyes eyes eyes eyes frame man year tears locality jewel runaway barley yet eyes eyes frisky sweet rest rest rest rest eye eyes after fulfill kilkenny after show bran caubeen entwine been moving man laughing laughing year eye eyes eyes eyes eyes eyes eyes eyes eyes frame tie rest sod pride year year hill derry craw wobblin wobblin caubeen caubeen molly grey ra sent sent sent sent sent trace boyne frame rocky ground sod sod pride unseen stick sod pride pride canal canal play wonder unseen play sends grey play unseen tory'

## character level generator

In [0]:
corpus = open('irish-lyrics-eof.txt', 'rb').read().decode(encoding='utf-8')

In [0]:
corpus[0:100]

'Come all ye maidens young and fair\nAnd you that are blooming in your prime\nAlways beware and keep yo'

In [0]:
char_tokenizer = Tokenizer(char_level=True)
char_tokenizer.fit_on_texts(corpus)
print(char_tokenizer.word_index)

{' ': 1, 'e': 2, 'a': 3, 'o': 4, 't': 5, 'n': 6, 'i': 7, 'r': 8, 'h': 9, 's': 10, 'l': 11, 'd': 12, '\n': 13, 'y': 14, 'm': 15, 'w': 16, 'u': 17, 'g': 18, 'f': 19, 'b': 20, 'c': 21, ',': 22, 'p': 23, 'v': 24, 'k': 25, '.': 26, 'j': 27, '-': 28, '!': 29, ';': 30, 'q': 31, 'x': 32, '?': 33, 'z': 34, ':': 35, 'í': 36, 'ú': 37, 'ó': 38, '3': 39, '(': 40, ')': 41, '�': 42, '1': 43, '8': 44, '0': 45, 'á': 46, '&': 47, '2': 48}


In [0]:
len(char_tokenizer.word_index)

48

In [0]:
char_sequences = np.array(char_tokenizer.texts_to_sequences(corpus))
print(char_sequences[0:10])

[[21]
 [ 4]
 [15]
 [ 2]
 [ 1]
 [ 3]
 [11]
 [11]
 [ 1]
 [14]]


In [0]:
len(corpus)

68953

In [0]:
slce = char_sequences[0:12]
type(slce)

numpy.ndarray

In [0]:
slce.T[0]

array([21,  4, 15,  2,  1,  3, 11, 11,  1, 14,  2,  1])

In [0]:
len(char_sequences)

68953

In [0]:
seq_list = np.empty((0,90), int)
for i in range(0,(len(corpus)//90)):
  k = i*90
  seq_list = np.append(seq_list,[char_sequences[k:k+90].T[0]],axis=0)
print(seq_list[0])

[21  4 15  2  1  3 11 11  1 14  2  1 15  3  7 12  2  6 10  1 14  4 17  6
 18  1  3  6 12  1 19  3  7  8 13  3  6 12  1 14  4 17  1  5  9  3  5  1
  3  8  2  1 20 11  4  4 15  7  6 18  1  7  6  1 14  4 17  8  1 23  8  7
 15  2 13  3 11 16  3 14 10  1 20  2 16  3  8  2  1  3]


In [0]:
seq_list.shape

(766, 90)

In [0]:
inp = seq_list[:,:-1]
oup = seq_list[:,1:]
print(f'input seq = {inp[0]}')
print(f'output seq = {oup[0]}')

input seq = [21  4 15  2  1  3 11 11  1 14  2  1 15  3  7 12  2  6 10  1 14  4 17  6
 18  1  3  6 12  1 19  3  7  8 13  3  6 12  1 14  4 17  1  5  9  3  5  1
  3  8  2  1 20 11  4  4 15  7  6 18  1  7  6  1 14  4 17  8  1 23  8  7
 15  2 13  3 11 16  3 14 10  1 20  2 16  3  8  2  1]
output seq = [ 4 15  2  1  3 11 11  1 14  2  1 15  3  7 12  2  6 10  1 14  4 17  6 18
  1  3  6 12  1 19  3  7  8 13  3  6 12  1 14  4 17  1  5  9  3  5  1  3
  8  2  1 20 11  4  4 15  7  6 18  1  7  6  1 14  4 17  8  1 23  8  7 15
  2 13  3 11 16  3 14 10  1 20  2 16  3  8  2  1  3]


In [0]:
print(f'input shape = {inp.shape}')
print(f'output shape = {oup.shape}')

input shape = (766, 89)
output shape = (766, 89)


In [0]:
y = np_utils.to_categorical(oup)
print(y.shape)

(766, 89, 49)


In [0]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(49,64))
model.add(tf.keras.layers.LSTM(150,return_sequences=True))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(49,activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 64)          3136      
_________________________________________________________________
lstm (LSTM)                  (None, None, 150)         129000    
_________________________________________________________________
dropout (Dropout)            (None, None, 150)         0         
_________________________________________________________________
dense (Dense)                (None, None, 49)          7399      
Total params: 139,535
Trainable params: 139,535
Non-trainable params: 0
_________________________________________________________________


In [0]:
model.fit(inp, y, epochs=1000, verbose=1)

W0811 07:55:41.263885 140718941046656 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 766 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71

<tensorflow.python.keras.callbacks.History at 0x7ffaf83b0828>

In [0]:
seed = 'hello'
seed = np.array(char_tokenizer.texts_to_sequences(seed)).T[0]
print(seed)

[ 9  2 11 11  4]


In [0]:
seed = seed.reshape(1,5)

In [0]:
seed.shape

(1, 5)

In [0]:
seed[0][seed.shape[1]-1].reshape(1,1)

array([[4]])

In [0]:
predictions = model(seed)

In [0]:
predictions

<tf.Tensor: id=172446, shape=(1, 5, 49), dtype=float32, numpy=
array([[[4.27416076e-08, 6.69673309e-02, 6.72277391e-01, 9.55607221e-02,
         2.00760737e-03, 3.84445325e-03, 6.00696922e-07, 1.08973354e-01,
         2.53442279e-03, 6.03642945e-07, 2.07328718e-04, 3.47583809e-05,
         4.51505999e-04, 3.41840750e-05, 1.08152314e-03, 1.43344188e-02,
         6.07154698e-06, 9.91225243e-03, 2.11077649e-03, 6.17920232e-05,
         2.75258295e-04, 3.68304172e-04, 1.80761572e-02, 3.97519871e-05,
         8.12248572e-07, 4.62523985e-06, 1.85449710e-04, 7.72030216e-06,
         9.09944283e-05, 5.34409832e-04, 2.48425408e-07, 1.82843269e-08,
         1.49653102e-11, 8.99441588e-09, 1.11337602e-06, 1.56662439e-09,
         2.06337722e-06, 1.60665992e-09, 1.10562496e-05, 2.91611464e-08,
         1.30208747e-07, 1.88802929e-07, 1.95287875e-08, 1.48406594e-08,
         2.82168742e-08, 4.37569270e-09, 3.54604225e-07, 1.21732583e-07,
         7.61738491e-14],
        [9.42701611e-11, 6.85415685

In [0]:
predictions = tf.squeeze(predictions,0)

In [0]:
temperature = 1.0
predictions = predictions / temperature
predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
input_eval = tf.expand_dims([predicted_id], 0)

In [0]:
input_eval.numpy()

array([[30]], dtype=int32)

In [0]:
my_pred = model.predict_classes(seed)

In [0]:
my_pred

array([[ 2,  1,  2,  1, 16]])

In [0]:
predicted_id

33

In [0]:
predictions.shape

TensorShape([5, 49])

In [0]:
model.predict_classes([[16]])

array([[7]])

In [0]:
rev_char_dic = {value:key for key,value in char_tokenizer.word_index.items()}

In [0]:
def gen_text(seed,num_chars,temperature=1.0):
  txt = []
  input_eval = np.array(char_tokenizer.texts_to_sequences(seed)).T[0].reshape(1,-1)
  print(input_eval)
  for _ in range(num_chars):
    predictions = model(input_eval)
      # remove the batch dimension
    predictions = tf.squeeze(predictions, 0)

      # using a multinomial distribution to predict the word returned by the model
    predictions = predictions / temperature
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
    #print(predicted_id)
      # We pass the predicted word as the next input to the model
      # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id], 0)
    if predicted_id == 0:
      predicted_id = 1
    txt.append(rev_char_dic[predicted_id])
  return (seed + ''.join(txt))
  

In [0]:
txt = gen_text("come all",200,0.043)
print(txt)

[[21  4 15  2  1  3 11 11]]
come all angon,
an the úthe moon the the re the o?112.
ango the the ,
aingo;
ango
and the ;
bome the the the ! .
an fo .
ang,
any the the o come fo óín ve o o the & fango 812
an e the d the pangle an xe : win


't'