In [128]:
%%time
import numpy as np 
import os
import pickle as pk
import matplotlib.pyplot as plt
from random import randint
# Machine Learning
import tensorflow as tf
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras import initializers, regularizers, constraints, optimizers, layers
from keras.models import load_model

Wall time: 0 ns


In [135]:
class Predictor:
    ''' class to load models, encoders, and other methods that help in making predictions '''
    def __init__(self):
        self.encoder = None
        self.model = None
        self.sequences = None
        self.seq_length = None
        self.seed_text = None
        self.encoded_seed = None
        self.section = None
    
    def load_sequences(self,path, binary = True):
        ''' function that is used to load the text sequences from either
        a binary pickle file or a text file. returns a list of sequences 
        and the expected sequence length'''
    
        # checks if the file is serialized or not
        if binary:
            with open(path,'rb') as file:
                # loads the sequences 
                self.sequences = pk.load(file)
                print(f'Sequences loaded from: {path}')
                
        # loads the file from an unserialized format
        else:
            with open(path,'r') as file:
                doc = file.read()
                self.sequences = doc.split('\n')

        # seq_len is a vector of size 50
        self.seq_length = len(self.sequences[0].split()) - 1
        return self.sequences, self.seq_length
    
    def load_encoder(self,path):
        with open(path,'rb') as file:
            self.encoder = pk.load(file)
            print(f'Encoder loaded from: {path}')

        return self.encoder
    
    def load_network(self,path):
        self.model = load_model(path)
        return self.model
    
    def generate_seed(self,sequences = None, index = None):
        sequences = self.sequences if sequences is None else sequences
        if index is not None:
            section = index
        else:
            section = randint(0,len(sequences[0]))
        self.seed_text = self.sequences[section]
        print(f'Generated from section: {section}')
        return self.seed_text
    
    def pad_input_sequence(self,seed = None):
        # the seed text must be encoded to integers using 
        # the same tokenizer that we used when training the model.
        if self.encoder is None:
            raise TypeError(f'Encoder can not be of type: {self.encoder}')
        
        # load the input sequnce 
        seed = self.seed_text if seed is None else seed
        self.encoded_seed = self.encoder.texts_to_sequences([seed])[0]
        # Truncate the sequence to a fixed length 
        self.encoded_seed = pad_sequences([self.encoded_seed], maxlen = self.seq_length, truncating='pre')
        return self.encoded_seed
    
    def generate_sequence(self, model = None, seed = None, seq_len = None, output_len = 100, save_data = True, section = None):
        
        # all of the input values are set to none by default so the first step is to hanlde this
        model = self.model if model is None else model
        seq_len = self.seq_length if seq_len is None else seq_len
        
        
        # the list that the output sequence will be loaded into
        self.result = list()
        self.section = section if section is not None else None
        input_text = self.generate_seed(index = self.section) if seed is None else seed
        
        # generate a fixed number of words
        for _ in range(output_len):
            # encode the text as integer
            encoded = self.pad_input_sequence(input_text)
            
            # predict probabilities for each word
            pred = self.model.predict_classes(encoded, verbose = 0)
            
            # map predicted word index to word
            predicted_word = ''
            for word, index in self.encoder.word_index.items():
                # check to see if the current index is the index of the predicted word
                if index == pred:
                    predicted_word = word
                    break
                    
            # append to the input text (this is so that our next predicted word is based on the word we just predicted) +=
            input_text += ' ' + predicted_word
            self.result.append(predicted_word) # this list will be our newly generated sequence
        
        self.result = ' '.join(self.result)
        if save_data:
            dest = r'E:\Documents\My Projects\Text Generation\Generated Text'
            
            filename = input("Enter the filename: ")
            self.saveIO((input_text,self.result),dest, name = filename )
        return  self.result
    
    def saveIO(self, data, dest, name = None):
        ''' This saves the input and output to a file 
        
            dest: the file destination
            '''
        if len(data) != 2:
            raise ValueError('Data must be the input and output from the model')
        else:
            x,y = data
          
        # if a filename is not privided
        if name is not None and self.section is not None:
            section = self.section
            with open(os.path.join(dest,str(section)),'w') as file:
                # save the input
                x = str(x)
                
                file.write(f'Input seed: {}')
                file.write(f'Generated text: {str(y)}') 
        else:
            if name is None:
                raise TypeError(f"Name cannot be of type: {None}")
            
            print(name)
            dest = os.path.join(dest,name)
            with open(dest,'w') as file:
                # save the input
                file.write(f'Input seed: {str(x)}')
                file.write(f'Generated text: {str(y)}')
                
        print(f'File saved to: {dest}')
                
                
        
        

In [136]:
seq_path = r'E:\Documents\My Projects\Text Generation\data\HEAM.seq'
encoder_path = r'E:\Documents\My Projects\Text Generation\Models\encoder.pkl'
model_path = r'E:\Documents\My Projects\Text Generation\Models\BiLSTM_Language_Generation.hdf5'

# instantiate a predictor class
p = Predictor()
p.load_network(model_path)
p.load_encoder(encoder_path)
seq = p.load_sequences(seq_path)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Encoder loaded from: E:\Documents\My Projects\Text Generation\Models\encoder.pkl
Sequences loaded from: E:\Documents\My Projects\Text Generation\data\HEAM.seq


In [137]:
seed = p.generate_seed(sequences=seq)
print(len(seed))
seed

Generated from section: 72992
287


'body budgets if their brain serves up more glucose than their body needs a quick scamper up a tree will bring their energy level back into balance humans are unique in that we can regulate the budget without moving using purely mental concepts but when this skill fails you remember that'

In [138]:
index = 307
gen = p.generate_sequence(seed = seed)

Enter the filename: test.txt
test.txt
File saved to: E:\Documents\My Projects\Text Generation\Generated Text\test.txt


In [139]:
print(gen)

you too can categorize you have unpleasant affect when you feel ill and when you might get a parent to limit with even if you learn a few true or parent even even or influence your conceptual system from your body budget as a matter of depression your favorite promotion you other street or quick alive and if you know that your perception is highly variable your eyes with your boss your inner bag and creating your genes nevertheless you construct your experiences and corrected by your incoming sensory input your brain predicts you might have more finely based on
