In [160]:
import numpy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from keras.utils.np_utils import to_categorical
import pandas as pd

In [161]:
filename = "macbeth.txt"

text = (open(filename).read()).lower()

In [162]:
text

"1606\nthe tragedy of macbeth\n\n\nby william shakespeare\n\n\n\ndramatis personae\n\n  duncan, king of scotland\n  macbeth, thane of glamis and cawdor, a general in the king's\narmy\n  lady macbeth, his wife\n  macduff, thane of fife, a nobleman of scotland\n  lady macduff, his wife\n  malcolm, elder son of duncan\n  donalbain, younger son of duncan\n  banquo, thane of lochaber, a general in the king's army\n  fleance, his son\n  lennox, nobleman of scotland\n  ross, nobleman of scotland\n  menteith nobleman of scotland\n  angus, nobleman of scotland\n  caithness, nobleman of scotland\n  siward, earl of northumberland, general of the english forces\n  young siward, his son\n  seyton, attendant to macbeth\n  hecate, queen of the witches\n  the three witches\n  boy, son of macduff \n  gentlewoman attending on lady macbeth\n  an english doctor\n  a scottish doctor\n  a sergeant\n  a porter\n  an old man\n  the ghost of banquo and other apparitions\n  lords, gentlemen, officers, soldiers,

In [172]:
text = text.replace("\n",'')

In [173]:
text

"1606the tragedy of macbethby william shakespearedramatis personae  duncan, king of scotland  macbeth, thane of glamis and cawdor, a general in the king'sarmy  lady macbeth, his wife  macduff, thane of fife, a nobleman of scotland  lady macduff, his wife  malcolm, elder son of duncan  donalbain, younger son of duncan  banquo, thane of lochaber, a general in the king's army  fleance, his son  lennox, nobleman of scotland  ross, nobleman of scotland  menteith nobleman of scotland  angus, nobleman of scotland  caithness, nobleman of scotland  siward, earl of northumberland, general of the english forces  young siward, his son  seyton, attendant to macbeth  hecate, queen of the witches  the three witches  boy, son of macduff   gentlewoman attending on lady macbeth  an english doctor  a scottish doctor  a sergeant  a porter  an old man  the ghost of banquo and other apparitions  lords, gentlemen, officers, soldiers, murtherers, attendants,     and messengers"

In [174]:
# mapping characters with integers
unique_chars = sorted(list(set(text)))

print("total unique Charts:",len(unique_chars))

total unique Charts: 29


In [166]:
unique_chars

[' ',
 "'",
 ',',
 '0',
 '1',
 '6',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'w',
 'x',
 'y']

In [167]:
char_to_int = {}
int_to_char = {}

for i, c in enumerate (unique_chars):
    char_to_int.update({c: i})
    int_to_char.update({i: c})

In [168]:
char_to_int

{' ': 0,
 "'": 1,
 ',': 2,
 '0': 3,
 '1': 4,
 '6': 5,
 'a': 6,
 'b': 7,
 'c': 8,
 'd': 9,
 'e': 10,
 'f': 11,
 'g': 12,
 'h': 13,
 'i': 14,
 'k': 15,
 'l': 16,
 'm': 17,
 'n': 18,
 'o': 19,
 'p': 20,
 'q': 21,
 'r': 22,
 's': 23,
 't': 24,
 'u': 25,
 'w': 26,
 'x': 27,
 'y': 28}

In [122]:
int_to_char

{0: ' ',
 1: "'",
 2: ',',
 3: '0',
 4: '1',
 5: '6',
 6: 'a',
 7: 'b',
 8: 'c',
 9: 'd',
 10: 'e',
 11: 'f',
 12: 'g',
 13: 'h',
 14: 'i',
 15: 'k',
 16: 'l',
 17: 'm',
 18: 'n',
 19: 'o',
 20: 'p',
 21: 'q',
 22: 'r',
 23: 's',
 24: 't',
 25: 'u',
 26: 'w',
 27: 'x',
 28: 'y'}

In [None]:
'''
Prepare the Training Data as like below

Input: 1606\nthe tragedy of macbeth\n\n\nby william shakespea
label : r

Input: 606\nthe tragedy of macbeth\n\n\nby william shakespear
label : e

Input: 06\nthe tragedy of macbeth\n\n\nby william shakespeare\n
label : \n

Input: 6\nthe tragedy of macbeth\n\n\nby william shakespeare\n\n
Label :\n

Input: \nthe tragedy of macbeth\n\n\nby william shakespeare\n\n\n
label : \n

Input: the tragedy of macbeth\n\n\nby william shakespeare\n\n\n\n
label :d

Input: he tragedy of macbeth\n\n\nby william shakespeare\n\n\n\nd
label : r

Input : e tragedy of macbeth\n\n\nby william shakespeare\n\n\n\ndr
label : a


In [125]:
# preparing input and output dataset
X = []
Y = []

for i in range(0, len(text) - 50, 1):
    sequence = text[i:i + 50]
    label =text[i + 50]
    X.append([char_to_int[char] for char in sequence])
    Y.append(char_to_int[label])

In [126]:
# Diaply the Input data for our understanding

for i in range(0,5):
    print("Input:",[[y for y in x] for x in X][i])
    print("Label:",Y[i])
    
    print("\n")

Input: [4, 5, 3, 5, 24, 13, 10, 0, 24, 22, 6, 12, 10, 9, 28, 0, 19, 11, 0, 17, 6, 8, 7, 10, 24, 13, 7, 28, 0, 26, 14, 16, 16, 14, 6, 17, 0, 23, 13, 6, 15, 10, 23, 20, 10, 6, 22, 10, 9, 22]
Label: 6


Input: [5, 3, 5, 24, 13, 10, 0, 24, 22, 6, 12, 10, 9, 28, 0, 19, 11, 0, 17, 6, 8, 7, 10, 24, 13, 7, 28, 0, 26, 14, 16, 16, 14, 6, 17, 0, 23, 13, 6, 15, 10, 23, 20, 10, 6, 22, 10, 9, 22, 6]
Label: 17


Input: [3, 5, 24, 13, 10, 0, 24, 22, 6, 12, 10, 9, 28, 0, 19, 11, 0, 17, 6, 8, 7, 10, 24, 13, 7, 28, 0, 26, 14, 16, 16, 14, 6, 17, 0, 23, 13, 6, 15, 10, 23, 20, 10, 6, 22, 10, 9, 22, 6, 17]
Label: 6


Input: [5, 24, 13, 10, 0, 24, 22, 6, 12, 10, 9, 28, 0, 19, 11, 0, 17, 6, 8, 7, 10, 24, 13, 7, 28, 0, 26, 14, 16, 16, 14, 6, 17, 0, 23, 13, 6, 15, 10, 23, 20, 10, 6, 22, 10, 9, 22, 6, 17, 6]
Label: 24


Input: [24, 13, 10, 0, 24, 22, 6, 12, 10, 9, 28, 0, 19, 11, 0, 17, 6, 8, 7, 10, 24, 13, 7, 28, 0, 26, 14, 16, 16, 14, 6, 17, 0, 23, 13, 6, 15, 10, 23, 20, 10, 6, 22, 10, 9, 22, 6, 17, 6, 24]
Label

In [127]:

# Diaply the Input data for our understanding

for i in range(0,5):
    print("Input:",[[int_to_char[y] for y in x] for x in X][i])
    print("Label:",int_to_char[Y[i]])
    
    print("\n")

Input: ['1', '6', '0', '6', 't', 'h', 'e', ' ', 't', 'r', 'a', 'g', 'e', 'd', 'y', ' ', 'o', 'f', ' ', 'm', 'a', 'c', 'b', 'e', 't', 'h', 'b', 'y', ' ', 'w', 'i', 'l', 'l', 'i', 'a', 'm', ' ', 's', 'h', 'a', 'k', 'e', 's', 'p', 'e', 'a', 'r', 'e', 'd', 'r']
Label: a


Input: ['6', '0', '6', 't', 'h', 'e', ' ', 't', 'r', 'a', 'g', 'e', 'd', 'y', ' ', 'o', 'f', ' ', 'm', 'a', 'c', 'b', 'e', 't', 'h', 'b', 'y', ' ', 'w', 'i', 'l', 'l', 'i', 'a', 'm', ' ', 's', 'h', 'a', 'k', 'e', 's', 'p', 'e', 'a', 'r', 'e', 'd', 'r', 'a']
Label: m


Input: ['0', '6', 't', 'h', 'e', ' ', 't', 'r', 'a', 'g', 'e', 'd', 'y', ' ', 'o', 'f', ' ', 'm', 'a', 'c', 'b', 'e', 't', 'h', 'b', 'y', ' ', 'w', 'i', 'l', 'l', 'i', 'a', 'm', ' ', 's', 'h', 'a', 'k', 'e', 's', 'p', 'e', 'a', 'r', 'e', 'd', 'r', 'a', 'm']
Label: a


Input: ['6', 't', 'h', 'e', ' ', 't', 'r', 'a', 'g', 'e', 'd', 'y', ' ', 'o', 'f', ' ', 'm', 'a', 'c', 'b', 'e', 't', 'h', 'b', 'y', ' ', 'w', 'i', 'l', 'l', 'i', 'a', 'm', ' ', 's', 'h', 'a', 

In [128]:
# reshaping, normalizing and one hot encoding
X_modified = numpy.reshape(X, (len(X), 50, 1))
X_modified = X_modified / float(len(unique_chars))
Y_modified = to_categorical(Y)

In [129]:
X_modified.shape

(916, 50, 1)

In [130]:
X_modified[0]

array([[0.13793103],
       [0.17241379],
       [0.10344828],
       [0.17241379],
       [0.82758621],
       [0.44827586],
       [0.34482759],
       [0.        ],
       [0.82758621],
       [0.75862069],
       [0.20689655],
       [0.4137931 ],
       [0.34482759],
       [0.31034483],
       [0.96551724],
       [0.        ],
       [0.65517241],
       [0.37931034],
       [0.        ],
       [0.5862069 ],
       [0.20689655],
       [0.27586207],
       [0.24137931],
       [0.34482759],
       [0.82758621],
       [0.44827586],
       [0.24137931],
       [0.96551724],
       [0.        ],
       [0.89655172],
       [0.48275862],
       [0.55172414],
       [0.55172414],
       [0.48275862],
       [0.20689655],
       [0.5862069 ],
       [0.        ],
       [0.79310345],
       [0.44827586],
       [0.20689655],
       [0.51724138],
       [0.34482759],
       [0.79310345],
       [0.68965517],
       [0.34482759],
       [0.20689655],
       [0.75862069],
       [0.344

In [131]:
# defining the LSTM model
model = Sequential()
model.add(LSTM(300, input_shape=(X_modified.shape[1], X_modified.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(300))
model.add(Dropout(0.2))
model.add(Dense(Y_modified.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

In [132]:
# fitting the model
model.fit(X_modified, Y_modified, epochs=1, batch_size=30)



<tensorflow.python.keras.callbacks.History at 0x16f932ef860>

In [133]:
# picking a random seed
start_index = numpy.random.randint(0, len(X)-1)
new_string = X[start_index]

In [153]:
input = numpy.reshape(X_modified[0], (1, 50, 1))

In [156]:
pred_index = numpy.argmax(model.predict(input, verbose=0))

pred_index

0

In [157]:
int_to_char[0]

' '

In [170]:
# generating characters
for i in range(50):
    x = numpy.reshape(X[0], (1, len(X[0]), 1))
    x = x / float(len(unique_chars))

    #predicting
    pred_index = numpy.argmax(model.predict(x, verbose=0))
    char_out = int_to_char[pred_index]
    seq_in = [int_to_char[value] for value in X[0]]
    #print(char_out)

    new_string.append(pred_index)
    new_string = new_string[1:len(new_string)]

In [171]:
new_string  # It is prediction 0 as integer and respectd character ' ' (space) becasue of less Iteration period

[]