In [1]:
%%time
import numpy as np 
import os
import pickle as pk
import matplotlib.pyplot as plt
from random import randint
# Machine Learning
import tensorflow as tf
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras import initializers, regularizers, constraints, optimizers, layers
from keras.models import load_model

Wall time: 2.92 s


Using TensorFlow backend.


In [37]:
class Predictor:
    ''' class to load models, encoders, and other methods that help in making predictions '''
    def __init__(self):
        self.encoder = None
        self.model = None
        self.sequences = None
        self.seq_length = None
        self.seed_text = None
        self.encoded_seed = None
    
    def load_sequences(self,path, binary = True):
        ''' function that is used to load the text sequences from either
        a binary pickle file or a text file. returns a list of sequences 
        and the expected sequence length'''
    
        # checks if the file is serialized or not
        if binary:
            with open(path,'rb') as file:
                # loads the sequences 
                self.sequences = pk.load(file)
                print(f'Sequences loaded from: {path}')
                
        # loads the file from an unserialized format
        else:
            with open(path,'r') as file:
                doc = file.read()
                self.sequences = doc.split('\n')

        # seq_len is a vector of size 50
        self.seq_length = len(self.sequences[0].split()) - 1
        return self.sequences, self.seq_length
    
    def load_encoder(self,path):
        with open(path,'rb') as file:
            self.encoder = pk.load(file)
            print(f'Encoder loaded from: {path}')

        return self.encoder
    
    def load_network(self,path):
        self.model = load_model(path)
        return self.model
    
    def generate_seed(self,sequences = None):
        sequences = self.sequences if sequences is None else sequences
        section = randint(0,len(sequences[0]))
        self.seed_text = self.sequences[section]
        print(f'Generated from section: {section}')
        return self.seed_text
    
    def pad_input_sequence(self,seed = None):
        # the seed text must be encoded to integers using 
        # the same tokenizer that we used when training the model.
        if self.encoder is None:
            raise TypeError(f'Encoder can not be of type: {self.encoder}')
        
        # load the input sequnce 
        seed = self.seed_text if seed is None else seed
        self.encoded_seed = self.encoder.texts_to_sequences([seed])[0]
        # Truncate the sequence to a fixed length 
        self.encoded_seed = pad_sequences([self.encoded_seed], maxlen = self.seq_length, truncating='pre')
        return self.encoded_seed
    
    def generate_sequence(self, model = None, seed = None, seq_len = None, output_len = 100):
        
        # all of the input values are set to none by default so the first step is to hanlde this
        model = self.model if model is None else model
        seq_len = self.seq_length if seq_len is None else seq_len
        
        
        # the list that the output sequence will be loaded into
        result = list()
        input_text = self.generate_seed() if seed is None else seed
        
        # generate a fixed number of words
        for _ in range(output_len):
            # encode the text as integer
            encoded = self.pad_input_sequence(input_text)
            
            # predict probabilities for each word
            pred = self.model.predict_classes(encoded, verbose = 0)
            
            # map predicted word index to word
            predicted_word = ''
            for word, index in self.encoder.word_index.items():
                # check to see if the current index is the index of the predicted word
                if index == pred:
                    predicted_word = word
                    break
                    
            # append to the input text (this is so that our next predicted word is based on the word we just predicted) +=
            input_text += ' ' + predicted_word
            result.append(predicted_word) # this list will be our newly generated sequence
        
        return ' '.join(result)      

In [38]:
seq_path = r'E:\Documents\My Projects\Text Generation\data\HEAM.seq'
encoder_path = r'E:\Documents\My Projects\Text Generation\Models\encoder.pkl'
model_path = r'E:\Documents\My Projects\Text Generation\Models\BiLSTM_Language_Generation.hdf5'

# instantiate a predictor class
p = Predictor()
p.load_network(model_path)
p.load_encoder(encoder_path)
seq = p.load_sequences(seq_path)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Encoder loaded from: E:\Documents\My Projects\Text Generation\Models\encoder.pkl
Sequences loaded from: E:\Documents\My Projects\Text Generation\data\HEAM.seq


In [54]:
seed = p.generate_seed(sequences=seq)
print(len(seed))
seed

Generated from section: 112847
290


'of the hunt for the second inevitability of the mind is that you have concepts because the human brain is wired to construct a conceptual system you build concepts for the smallest physical details like fleeting bits of light and sound and for incredibly complex ideas like and not to bring'

In [55]:
gen = p.generate_sequence(seed = seed)

In [56]:
print(gen)

your boss and deep out of wavelengths but usually they can benefit in the circumstances you have a banana to his friend you want to get worse but if we try to mention greater intelligent of emotion people who showed them of animals alone like ideas when you see what he gave it or if someone else i am correct angry so not wrong feel them or danger is rendered emotionally longterm learn an emotional creatures who encourage us to learn goalbased concepts like is an instance of anger for someone just like you might use a question for about
