#### About

> Automatic Text correction


In [119]:
import numpy as np
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical


In [120]:

# Define the manually defined text pairs
input_texts = ['i lik to go for a wal', 'plese buy som milk']
target_texts = ['i like to go for a walk', 'please buy some milk']


In [121]:
# Create a vocabulary set from the input and target texts
vocab = set()
for text in input_texts + target_texts:
    vocab.update(text.split())

In [122]:
# Create word-to-index and index-to-word dictionaries
word2idx = {word: idx + 1 for idx, word in enumerate(vocab)}
idx2word = {idx: word for word, idx in word2idx.items()}

# Convert the input and target texts into sequences of indices
encoder_input_data = []
decoder_input_data = []
decoder_target_data = []

In [123]:
for input_text, target_text in zip(input_texts, target_texts):
    encoder_input_seq = [word2idx[word] for word in input_text.split()]
    decoder_input_seq = [word2idx[word] for word in target_text.split()]
    decoder_target_seq = decoder_input_seq[1:] + [0]  # Add padding token at the end

    encoder_input_data.append(encoder_input_seq)
    decoder_input_data.append(decoder_input_seq)
    decoder_target_data.append(decoder_target_seq)


In [124]:

# Pad the input and target sequences
max_encoder_seq_length = max(len(seq) for seq in encoder_input_data)
max_decoder_seq_length = max(len(seq) for seq in decoder_input_data)

encoder_input_data = pad_sequences(encoder_input_data, maxlen=max_encoder_seq_length, padding='post')
decoder_input_data = pad_sequences(decoder_input_data, maxlen=max_decoder_seq_length, padding='post')
decoder_target_data = pad_sequences(decoder_target_data, maxlen=max_decoder_seq_length, padding='post')

# Convert the target sequences to one-hot encoded format
decoder_target_data = to_categorical(decoder_target_data, num_classes=len(vocab) + 1)


In [125]:
# Define the model architecture
vocab_size = len(vocab) + 1  # +1 for padding token
embedding_dim = 50
hidden_units = 128


In [126]:
# Encoder input
encoder_inputs = Input(shape=(max_encoder_seq_length,))
encoder_embedding = Embedding(vocab_size, embedding_dim)(encoder_inputs)
encoder_lstm = LSTM(hidden_units, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]


2023-05-15 18:09:08.561394: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-15 18:09:08.568856: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-15 18:09:08.570763: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [127]:

# Decoder input
decoder_inputs = Input(shape=(max_decoder_seq_length,))
decoder_embedding = Embedding(vocab_size, embedding_dim)(decoder_inputs)
decoder_lstm = LSTM(hidden_units, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

2023-05-15 18:09:09.139709: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-15 18:09:09.148234: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-15 18:09:09.151657: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [128]:



# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(
    [encoder_input_data, decoder_input_data],
    decoder_target_data,
    batch_size=32,
    epochs=10,
    validation_split=0.2)

Epoch 1/10


2023-05-15 18:09:09.965735: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-15 18:09:09.970321: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-15 18:09:09.973884: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2023-05-15 18:09:17.212103: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-15 18:09:17.215328: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-15 18:09:17.220575: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f840a7d4f40>

In [129]:
# Predict on new input
input_text = 'i lik to go for a wal'
encoder_input_seq = [word2idx[word] for word in input_text.split()]
encoder_input_seq = pad_sequences([encoder_input_seq], maxlen=max_encoder_seq_length, padding='post')


In [130]:
# Predict output for the single input sequence
predicted_output = model.predict([encoder_input_seq, decoder_input_data[:1]])[0]


2023-05-15 18:09:19.977994: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-15 18:09:19.980370: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-15 18:09:19.987037: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



In [131]:
# Convert predicted output to text
predicted_text = ' '.join(idx2word[idx] for idx in np.argmax(predicted_output, axis=-1) if idx != 0)
print('Original input:', input_text)
print('Predicted output:', predicted_text)


Original input: i lik to go for a wal
Predicted output: go go go go a walk walk
