In [1]:
from tensorflow.keras.models import load_model

In [2]:
#Loding the saved encoder and decoder models

encoder_model = load_model("enc_model_spell.h5")
decoder_model = load_model("dec_model_spell.h5")



In [3]:
encoder_model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 38, 27)]          0         
                                                                 
 lstm (LSTM)                 [(None, 150),             106800    
                              (None, 150),                       
                              (None, 150)]                       
                                                                 
Total params: 106,800
Trainable params: 106,800
Non-trainable params: 0
_________________________________________________________________


In [4]:
import numpy as np

In [5]:
import re

In [6]:
re_special_char = "[^a-z\s]"

In [7]:
def text_preprocessing(word):
    word = str(word)
    #Normalizing the case 
    word = word.lower()
    #Removal of special characters
    word = re.sub(re_special_char,"", word)
    return word

In [8]:
chars = list(" abcdefghijklmnopqrstuvwxyz")

In [34]:
char_to_pos = {}
pos_to_char = {}

In [35]:
for i,each_char in enumerate(chars):
#     print(i,each_char)
      char_to_pos[each_char] = i
      pos_to_char[i] = each_char

In [36]:
count = len(chars)

In [37]:
count

27

In [38]:
dec_codes = ["\t","\n"]

In [39]:
max_enc_len = 38
max_dec_len = 40

In [40]:
for each_char in dec_codes:
    char_to_pos[each_char] = count
    pos_to_char[count] = each_char
    count = count+1

In [41]:
char_to_pos

{' ': 0,
 'a': 1,
 'b': 2,
 'c': 3,
 'd': 4,
 'e': 5,
 'f': 6,
 'g': 7,
 'h': 8,
 'i': 9,
 'j': 10,
 'k': 11,
 'l': 12,
 'm': 13,
 'n': 14,
 'o': 15,
 'p': 16,
 'q': 17,
 'r': 18,
 's': 19,
 't': 20,
 'u': 21,
 'v': 22,
 'w': 23,
 'x': 24,
 'y': 25,
 'z': 26,
 '\t': 27,
 '\n': 28}

In [56]:
pos_to_char

{0: ' ',
 1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z',
 27: '\t',
 28: '\n'}

-----------------------

In [18]:
input_text = "pricea"

In [20]:
#1 -- 1 word 1 row

enc_ip_data = np.zeros((1, max_enc_len, len(chars)), dtype = "float32")

In [21]:
enc_ip_data.shape

(1, 38, 27)

In [23]:
for each_col, each_char in enumerate(input_text):  # Iterate over each character in the input word
        # Set the corresponding index in the array to 1 to represent the character
        enc_ip_data[0, each_col, char_to_pos[each_char]] = 1

In [24]:
enc_ip_data

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]], dtype=float32)

In [25]:
encoder_state_a, encoder_state_c = encoder_model.predict(enc_ip_data, verbose = 0)

In [28]:
target_seq = np.zeros((1, 1, len(chars) + 2), dtype = "float32")

In [42]:
target_seq[0,0,char_to_pos["\t"]] = 1

In [43]:
target_seq

array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]]],
      dtype=float32)

In [47]:
output, (dec_state_a, dec_state_c)  = decoder_model.predict([target_seq, encoder_state_a, encoder_state_c], verbose = 0)

In [53]:
np.max(output[0][0])

0.98920584

In [60]:
predicted_char_idx = np.argmax(output[0][0])

In [57]:
pos_to_char[np.argmax(output[0][0])]

'p'

In [61]:
target_seq = np.zeros((1, 1, len(chars) + 2), dtype = "float32")
target_seq[0,0,predicted_char_idx] = 1

In [62]:
output, (dec_state_a, dec_state_c)  = decoder_model.predict([target_seq, dec_state_a, dec_state_c], verbose = 0)

In [63]:
predicted_char_idx = np.argmax(output[0][0])

In [64]:
pos_to_char[predicted_char_idx]

'r'

In [67]:
def spell_correction(word_list):
    result = []  # Initialize an empty list to store the corrected words
    for input_txt in word_list:  # Iterate over each word in the input word list
        
        
        # Create an array of zeros to represent the input word in the correct format for the encoder model
        enc_ip_data = np.zeros((1, max_enc_len, len(chars)), dtype = "float32")
        
        #Same way we have done in training phase
        for each_col, each_char in enumerate(input_txt):  # Iterate over each character in the input word
            # Set the corresponding index in the array to 1 to represent the character
            enc_ip_data[0, each_col, char_to_pos[each_char]] = 1

        # Predict the encoder states for the input word which will further feed to decode
        encoder_state_a, encoder_state_c = encoder_model.predict(enc_ip_data, verbose = 0)
        
        # Initialize the target sequence with a start token
        target_seq = np.zeros((1, 1, len(chars) + 2), dtype = "float32")
        target_seq[0,0,char_to_pos["\t"]] = 1
        flag = True  # Initialize a flag to control the decoding loop
        decoded_sentence = ""  # Initialize an empty string to store the decoded word
        
        #Iterate unit flag is false --  if the end token is reached or the maximum decoder sequence length is exceeded
        while flag: 
            # Predict the next character in the sequence using the decoder model
            decoder_outputs, (decoder_state_a, decoder_state_c) = decoder_model.predict([target_seq, encoder_state_a, encoder_state_c], verbose = 0)
            char_idx = np.argmax(decoder_outputs[0,0,:])  # Get the index of the predicted character
            char = pos_to_char[char_idx]  # Get the character corresponding to the index
            decoded_sentence += char  # Append the character to the decoded word

            # Check if the end token is reached or the maximum decoder sequence length is exceeded
            if char == "\n" or len(decoded_sentence) > max_dec_len:
                flag = False  # Set the flag to False to exit the decoding loop

            # Update the target sequence for the next iteration
            target_seq = np.zeros((1, 1, len(chars) + 2), dtype = "float32")
            target_seq[0,0,char_idx] = 1
            
             # Update the encoder states for the next iteration
            encoder_state_a, encoder_state_c = decoder_state_a, decoder_state_c 
        result.append(decoded_sentence.replace("\n", ""))  # Append the decoded word to the result list
    return result  # Return the list of corrected words

    

In [71]:
spell_correction(["information","price"])

['information', 'price']