In [55]:
from numpy import array
import numpy as np
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Embedding

In [2]:
#Source Data
data="""Deep learning is a kind of machine learning, which is mostly used with certain kinds of neural networks\n
As with other kinds of machine-learning, learning sessions can be unsupervised, semi-supervised, or supervised\n 
In many cases, structures are organised so that there is at least one intermediate layer, between the input layer and the output layer\n"""


In [3]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded= tokenizer.texts_to_sequences([data])[0]
encoded

[10,
 1,
 2,
 11,
 12,
 3,
 5,
 1,
 13,
 2,
 14,
 15,
 6,
 16,
 7,
 3,
 17,
 18,
 19,
 6,
 20,
 7,
 3,
 5,
 1,
 1,
 21,
 22,
 23,
 24,
 25,
 8,
 26,
 8,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 2,
 36,
 37,
 38,
 39,
 4,
 40,
 9,
 41,
 4,
 42,
 9,
 43,
 4]

In [11]:
#Checking no. of unique words
vocab_size=len(tokenizer.word_index)+1

In [5]:
sequences = list()
for i in range (1,len(encoded)):
  sequence=encoded[i-1:i+1]
  sequences.append(sequence)

print("Count of all Sequences: %d" % len(sequences))

Count of all Sequences: 56


In [6]:
sequences

[[10, 1],
 [1, 2],
 [2, 11],
 [11, 12],
 [12, 3],
 [3, 5],
 [5, 1],
 [1, 13],
 [13, 2],
 [2, 14],
 [14, 15],
 [15, 6],
 [6, 16],
 [16, 7],
 [7, 3],
 [3, 17],
 [17, 18],
 [18, 19],
 [19, 6],
 [6, 20],
 [20, 7],
 [7, 3],
 [3, 5],
 [5, 1],
 [1, 1],
 [1, 21],
 [21, 22],
 [22, 23],
 [23, 24],
 [24, 25],
 [25, 8],
 [8, 26],
 [26, 8],
 [8, 27],
 [27, 28],
 [28, 29],
 [29, 30],
 [30, 31],
 [31, 32],
 [32, 33],
 [33, 34],
 [34, 35],
 [35, 2],
 [2, 36],
 [36, 37],
 [37, 38],
 [38, 39],
 [39, 4],
 [4, 40],
 [40, 9],
 [9, 41],
 [41, 4],
 [4, 42],
 [42, 9],
 [9, 43],
 [43, 4]]

In [7]:
sequences=array(sequences)

In [8]:
x,y=sequences[:,0],sequences[:,1]

In [9]:
x

array([10,  1,  2, 11, 12,  3,  5,  1, 13,  2, 14, 15,  6, 16,  7,  3, 17,
       18, 19,  6, 20,  7,  3,  5,  1,  1, 21, 22, 23, 24, 25,  8, 26,  8,
       27, 28, 29, 30, 31, 32, 33, 34, 35,  2, 36, 37, 38, 39,  4, 40,  9,
       41,  4, 42,  9, 43])

In [10]:
y

array([ 1,  2, 11, 12,  3,  5,  1, 13,  2, 14, 15,  6, 16,  7,  3, 17, 18,
       19,  6, 20,  7,  3,  5,  1,  1, 21, 22, 23, 24, 25,  8, 26,  8, 27,
       28, 29, 30, 31, 32, 33, 34, 35,  2, 36, 37, 38, 39,  4, 40,  9, 41,
        4, 42,  9, 43,  4])

In [12]:
y=to_categorical(y,num_classes=vocab_size)
y

array([[0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [13]:
y.shape

(56, 44)

In [14]:
y[:,1]

array([1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.], dtype=float32)

In [46]:

from keras.backend import softmax
model = Sequential()
model.add(Embedding(vocab_size,10,input_length=1))
model.add(LSTM(100))
model.add(Dense(vocab_size,activation=softmax))
print(model.summary())

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_14 (Embedding)    (None, 1, 10)             440       
                                                                 
 lstm_16 (LSTM)              (None, 100)               44400     
                                                                 
 dense_13 (Dense)            (None, 44)                4444      
                                                                 
Total params: 49,284
Trainable params: 49,284
Non-trainable params: 0
_________________________________________________________________
None


In [47]:
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [48]:
model.fit(x,y,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7f1a52f5a1d0>

In [84]:
from numpy.core.multiarray import result_type
def generate_seq(model,tokenizer,text,n_pred):
  res=list()
  in_text, res= text,text

  for i in range(n_pred):
    encoded=tokenizer.texts_to_sequences([in_text])[0]
    encoded=array(encoded)
    yp=np.argmax(model.predict(encoded), axis=-1)

    out_word = ''
    for word, index in tokenizer.word_index.items():
      if index==yp:
        out_word=word
        break

    in_text,res=out_word,res+ " "+out_word  
    
  return res




In [91]:

print((generate_seq(model,tokenizer,'layer',3)))

layer between the output
