<a href="https://colab.research.google.com/github/SUNNYTHAKURCODE/MACHINELEARNING/blob/master/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from numpy import array
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding

# generate a sequence from the model
def generate_seq(model, tokenizer, seed_text, n_words):
	in_text, result = seed_text, seed_text
	# generate a fixed number of words
	for _ in range(n_words):
		# encode the text as integer
		encoded = tokenizer.texts_to_sequences([in_text])[0]
		encoded = array(encoded)
		# predict a word in the vocabulary
		yhat = model.predict_classes(encoded, verbose=0)
		# map predicted word index to word
		out_word = ''
		for word, index in tokenizer.word_index.items():
			if index == yhat:
				out_word = word
				break
		# append to input
		in_text, result = out_word, result + ' ' + out_word
	return result

# source text
data = """ Jack and Jill went up the hill\n
		To fetch a pail of water\n
		Jack fell down and broke his crown\n
		And Jill came tumbling after\n """
# integer encode text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]
# determine the vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)
# create word -> word sequences
sequences = list()
for i in range(1, len(encoded)):
	sequence = encoded[i-1:i+1]
	sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))
# split into X and y elements
sequences = array(sequences)
X, y = sequences[:,0],sequences[:,1]
# one hot encode outputs
y = to_categorical(y, num_classes=vocab_size)
# define model
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=1))
model.add(LSTM(50))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())
# compile network
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(X, y, epochs=500, verbose=2)
# evaluate
print(generate_seq(model, tokenizer, 'Jack', 6))

Using TensorFlow backend.


Vocabulary Size: 22
Total Sequences: 24
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 1, 10)             220       
_________________________________________________________________
lstm_1 (LSTM)                (None, 50)                12200     
_________________________________________________________________
dense_1 (Dense)              (None, 22)                1122      
Total params: 13,542
Trainable params: 13,542
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/500
 - 1s - loss: 3.0905 - accuracy: 0.1250
Epoch 2/500
 - 0s - loss: 3.0899 - accuracy: 0.1250
Epoch 3/500
 - 0s - loss: 3.0892 - accuracy: 0.2500
Epoch 4/500
 - 0s - loss: 3.0885 - accuracy: 0.2500
Epoch 5/500
 - 0s - loss: 3.0877 - accuracy: 0.1250
Epoch 6/500
 - 0s - loss: 3.0869 - accuracy: 0.1250
Epoch 7/500
 - 0s - loss: 3.0862 - accuracy: 0.1250
Epoch 8/500
 - 0s - loss: 3.0854 - accuracy: 0.1250
Epoch 9/500
 - 0s - loss: 3.0846 - accuracy: 0.1250
Epoch 10/500
 - 0s - loss: 3.0837 - accuracy: 0.1250
Epoch 11/500
 - 0s - loss: 3.0829 - accuracy: 0.1250
Epoch 12/500
 - 0s - loss: 3.0820 - accuracy: 0.1250
Epoch 13/500
 - 0s - loss: 3.0812 - accuracy: 0.1250
Epoch 14/500
 - 0s - loss: 3.0803 - accuracy: 0.1250
Epoch 15/500
 - 0s - loss: 3.0794 - accuracy: 0.1250
Epoch 16/500
 - 0s - loss: 3.0785 - accuracy: 0.1250
Epoch 17/500
 - 0s - loss: 3.0775 - accuracy: 0.1250
Epoch 18/500
 - 0s - loss: 3.0766 - accuracy: 0.1250
Epoch 19/500
 - 0s - loss: 3.0756 - accuracy: 0.1250
Ep

In [3]:
from numpy import array
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding

# generate a sequence from a language model
def generate_seq(model, tokenizer, max_length, seed_text, n_words):
	in_text = seed_text
	# generate a fixed number of words
	for _ in range(n_words):
		# encode the text as integer
		encoded = tokenizer.texts_to_sequences([in_text])[0]
		# pre-pad sequences to a fixed length
		encoded = pad_sequences([encoded], maxlen=max_length, padding='pre')
		# predict probabilities for each word
		yhat = model.predict_classes(encoded, verbose=0)
		# map predicted word index to word
		out_word = ''
		for word, index in tokenizer.word_index.items():
			if index == yhat:
				out_word = word
				break
		# append to input
		in_text += ' ' + out_word
	return in_text

# source text
data = """ Jack and Jill went up the hill\n
		To fetch a pail of water\n
		Jack fell down and broke his crown\n
		And Jill came tumbling after\n """
# integer encode sequences of words
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]
# retrieve vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)
# encode 2 words -> 1 word
sequences = list()
for i in range(2, len(encoded)):
	sequence = encoded[i-2:i+1]
	sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))
# pad sequences
max_length = max([len(seq) for seq in sequences])
sequences = pad_sequences(sequences, maxlen=max_length, padding='pre')
print('Max Sequence Length: %d' % max_length)
# split into input and output elements
sequences = array(sequences)
X, y = sequences[:,:-1],sequences[:,-1]
y = to_categorical(y, num_classes=vocab_size)
# define model
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=max_length-1))
model.add(LSTM(50))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())
# compile network
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(X, y, epochs=500, verbose=2)
# evaluate model
print(generate_seq(model, tokenizer, max_length-1, 'Jack and', 5))
print(generate_seq(model, tokenizer, max_length-1, 'And Jill', 3))
print(generate_seq(model, tokenizer, max_length-1, 'fell down', 5))
print(generate_seq(model, tokenizer, max_length-1, 'pail of', 5))

Vocabulary Size: 22
Total Sequences: 23
Max Sequence Length: 3
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 2, 10)             220       
_________________________________________________________________
lstm_2 (LSTM)                (None, 50)                12200     
_________________________________________________________________
dense_2 (Dense)              (None, 22)                1122      
Total params: 13,542
Trainable params: 13,542
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/500
 - 0s - loss: 3.0916 - accuracy: 0.0000e+00
Epoch 2/500
 - 0s - loss: 3.0907 - accuracy: 0.0435
Epoch 3/500
 - 0s - loss: 3.0897 - accuracy: 0.0435
Epoch 4/500
 - 0s - loss: 3.0888 - accuracy: 0.0435
Epoch 5/500
 - 0s - loss: 3.0879 - accuracy: 0.0435
Epoch 6/500
 - 0s - loss: 3.0870 - accuracy: 0.0435
Epoch 7/500
 - 0s - loss: 3.0861 - accuracy: 0.0870
Epoch 8/500
 - 0s - loss: 3.0852 - accuracy: 0.0870
Epoch 9/500
 - 0s - loss: 3.0842 - accuracy: 0.0870
Epoch 10/500
 - 0s - loss: 3.0833 - accuracy: 0.1304
Epoch 11/500
 - 0s - loss: 3.0823 - accuracy: 0.1304
Epoch 12/500
 - 0s - loss: 3.0814 - accuracy: 0.1304
Epoch 13/500
 - 0s - loss: 3.0804 - accuracy: 0.1304
Epoch 14/500
 - 0s - loss: 3.0794 - accuracy: 0.1304
Epoch 15/500
 - 0s - loss: 3.0783 - accuracy: 0.1304
Epoch 16/500
 - 0s - loss: 3.0773 - accuracy: 0.1304
Epoch 17/500
 - 0s - loss: 3.0762 - accuracy: 0.1304
Epoch 18/500
 - 0s - loss: 3.0751 - accuracy: 0.1304
Epoch 19/500
 - 0s - loss: 3.0739 - accuracy: 0.130