In [None]:
import random
import numpy as np
import pickle
import pandas as pd

from nltk.tokenize import RegexpTokenizer
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Activation
from tensorflow.keras.optimizers import RMSprop

In [None]:
filepath = "whatsapp_chat.txt"
file= open(filepath,'r')
partial_text=file.read()

In [None]:
tokenizer = RegexpTokenizer(r"\w+")
tokens = tokenizer.tokenize(partial_text.lower())


In [None]:
unique_tokens = np.unique(tokens)
unique_token_index = {token : idx for idx , token in enumerate(unique_tokens)}


In [None]:
n_words = 10
input_words = []
next_words = []
for i in range (len(tokens)-n_words) : 
  input_words.append(tokens[i:i+n_words])
  next_words.append(tokens[i+n_words])


In [None]:
X=np.zeros((len(input_words),n_words,len(unique_tokens)),dtype=np.bool_)
Y=np.zeros((len(next_words),len(unique_tokens)),dtype=np.bool_)

In [None]:
for i , words in enumerate(input_words):
  for j ,  word in enumerate(words):
    X[i,j,unique_token_index[word]]=1
  Y[i,unique_token_index[next_words[i]]]=1

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape = (n_words,len(unique_tokens)),return_sequences=True))
model.add(LSTM(128))
model.add(Dense(len(unique_tokens)))
model.add(Activation('softmax'))


In [None]:
model.compile(loss='categorical_crossentropy',optimizer=RMSprop(learning_rate=0.005),metrics=["accuracy"])
model.fit(X,Y,batch_size=128,epochs=25,shuffle = True)

In [None]:
def predict_next_word(input_text,n_best):
  input_text=input_text.lower()
  X=np.zeros((1,n_words,len(unique_tokens)))
  for i,word in enumerate(input_text.split()):
    X[0,i,unique_token_index[word]]=1
    
  predictions =  model.predict(X)[0]
  return np.argpartition(predictions,-n_best)[-n_best:]


In [None]:
def generate_text(input_text,text_length,creativity =3):
  word_sequence = input_text.split()
  current = 0
  for _ in range(text_length):
    sub_sequence = " ".join(tokenizer.tokenize(" ".join(word_sequence).lower())[current:current+n_words])
    try : 
      choice = unique_tokens[random.choice(predict_next_word(sub_sequence,creativity))]
    except:
      choice = random.choice(unique_tokens)
    word_sequence.append(choice)
    current+=1
  return " ".join(word_sequence)     

In [None]:
generate_text("Hello",15,5) #length=15,best 5
generate_text("Sunday",10,3) #length =10,best 3