Text Generation

In [26]:
#Import Dependencies
import numpy
import sys
import nltk
nltk.download('stopwords')
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [27]:
# load data
file=open("frankenstein.txt").read()

In [28]:
# tokenization
def tokenize_words(input):
    input=input.lower()
    tokenizer=RegexpTokenizer(r'\w+')
    tokens=tokenizer.tokenize(input)
    filtered=filter(lambda token: token not in stopwords.words('english'), tokens)
    return " ".join(filtered)
processed_inputs=tokenize_words(file)

In [29]:
chars=sorted(list(set(processed_inputs)))
char_to_num=dict((c,i) for i,c in enumerate(chars))

In [30]:
#check if the words to char or chars to num has worked
input_len=len(processed_inputs)
vocab_len=len(chars)
print("Total number of characters:", input_len)
print("Total vocab:",vocab_len)

Total number of characters: 195700
Total vocab: 41


In [31]:
#sege length
seq_length=100
x_data=[]
y_data=[]

In [32]:
#loop through the sequence
for i in range(0,input_len-seq_length,1):
    in_seq=processed_inputs[i:i+seq_length]
    out_seq=processed_inputs[i+seq_length]
    x_data.append([char_to_num[char] for char in in_seq])
    y_data.append(char_to_num[out_seq])

n_patterns=len(x_data)
print("Total Patterns:", n_patterns)

Total Patterns: 195600


In [33]:
# convert input sequence to np array and so on
X=numpy.reshape(x_data,(n_patterns, seq_length,1))
X=X/float(vocab_len)

In [34]:
# One-hot encoding
y = to_categorical(y_data)

In [35]:
#Creating the model
model=Sequential()
model.add(LSTM(256, input_shape=(X.shape[1],X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1],activation='softmax'))

In [36]:
#Compile the model
model.compile(loss='categorical_crossentropy',optimizer='adam')

In [37]:
#saving the weights
filepath='model_weights_saved.keras'
checkpoint=ModelCheckpoint(filepath, monitor='loss',verbose=1,save_best_only=True,mode='min')
desired_callbacks=[checkpoint]

In [38]:
#fit the model and let it train
model.fit(X,y,epochs=100,batch_size=256,callbacks=desired_callbacks)

Epoch 1/100
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - loss: 2.9491
Epoch 1: loss improved from inf to 2.85765, saving model to model_weights_saved.keras
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 81ms/step - loss: 2.9490
Epoch 2/100
[1m764/765[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 85ms/step - loss: 2.6263
Epoch 2: loss improved from 2.85765 to 2.57741, saving model to model_weights_saved.keras
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 85ms/step - loss: 2.6262
Epoch 3/100
[1m764/765[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 85ms/step - loss: 2.4314
Epoch 3: loss improved from 2.57741 to 2.37873, saving model to model_weights_saved.keras
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 85ms/step - loss: 2.4312
Epoch 4/100
[1m764/765[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 85ms/step - loss: 2.2383
Epoch 4: loss improved from 2.37873 to 2.19952

<keras.src.callbacks.history.History at 0x79e268178610>

In [40]:
#recompile the model with saved weights
filename='model_weights_saved.keras'
model.load_weights(filename)
model.compile(loss='categorical_crossentropy',optimizer='adam')

In [41]:
# output of the models back into characters
num_to_char=dict((i,c) for i,c in enumerate(chars))

In [42]:
#random seed to help generate
start=numpy.random.randint(0,len(x_data)-1)
pattern=x_data[start]
print("Random Seed :")
print("\"",''.join([num_to_char[value] for value in pattern]),"\"")

Random Seed :
" used embers nearly extinguished night came found pleasure fire gave light well heat discovery elemen "


In [43]:
# generate the text
for i in range(1000):
    x=numpy.reshape(pattern,(1,len(pattern),1))
    x=x/float(vocab_len)
    prediction=model.predict(x, verbose=0)
    index=numpy.argmax(prediction)
    result=num_to_char[index]
    seg_in=[num_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern=pattern[1:len(pattern)]

t rest nature spent wiolently language completely descended also formed formed sufferings remained spoke procure companion saw supply wretch saw felt saw words consideration whose existence soon perceived considerably shall content father gentle breeze sea life appeared despair exes spirits one man soon perceived considerably shall content father gentle breeze sea life appeared despair exes spirits one man soon perceived considerably shall content father gentle breeze sea life appeared despair exes spirits one man soon perceived considerably shall content father gentle breeze sea life appeared despair exes spirits one man soon perceived considerably shall content father gentle breeze sea life appeared despair exes spirits one man soon perceived considerably shall content father gentle breeze sea life appeared despair exes spirits one man soon perceived considerably shall content father gentle breeze sea life appeared despair exes spirits one man soon perceived considerably shall conten

In [None]:
#This is the generated text for 100 epochs
t rest nature spent wiolently language completely descended also formed formed
sufferings remained spoke procure companion saw supply wretch saw felt saw words
consideration whose existence soon perceived considerably shall content father
gentle breeze sea life appeared despair exes spirits one man soon perceived
considerably shall content father gentle breeze sea life appeared despair exes
spirits one man soon perceived considerably shall content father gentle breeze
sea life appeared despair exes spirits one man soon perceived considerably shall
content father gentle breeze sea life appeared despair exes spirits one man soon
perceived considerably shall content father gentle breeze sea life appeared
despair exes spirits one man soon perceived considerably shall content father
gentle breeze sea life appeared despair exes spirits one man soon perceived
considerably shall content father gentle breeze sea life appeared despair exes
spirits one man soon perceived considerably shall conten
