<a href="https://colab.research.google.com/github/Rajfekar/PythonML/blob/main/TEST_ENCODER_DECODER_MODEL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import numpy as np
import pickle
import json
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences

# ✅ Load the model
model = load_model("base_encoder_decoder_model.h5")  # or .keras

# ✅ Load tokenizers and reverse dictionary
with open("tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

with open("tokenizer_hindi.pkl", "rb") as f:
    tokenizer_hindi = pickle.load(f)

with open("rev_token_hindi.json", "r") as f:
    reverse_tokenizer = json.load(f)

# ✅ Define the prediction function
def predict_translation(input_text):
    try:
        tokenized = tokenizer.texts_to_sequences([input_text])
        encoder_input = pad_sequences(tokenized, maxlen=260)

        sos_token_id = tokenizer_hindi.word_index.get('start', 1)
        max_len = 260
        decoder_input = np.zeros((1, max_len))
        decoder_input[0, 0] = sos_token_id

        predictions = model.predict([encoder_input, decoder_input], verbose=0)
        predicted_tokens = np.argmax(predictions, axis=-1)

        sentence = ' '.join([
            reverse_tokenizer.get(str(token), '<unk>')
            for token in predicted_tokens[0]
            if token not in [0, tokenizer_hindi.word_index.get('start'), tokenizer_hindi.word_index.get('end')]
        ])

        return sentence
    except Exception as e:
        return f"Error: {str(e)}"




In [23]:
def predict_translation(input_text):
    try:
        max_len = 260
        sos_token = tokenizer_hindi.word_index.get('start', 1)
        eos_token = tokenizer_hindi.word_index.get('end', 2)

        # Tokenize & pad encoder input
        encoder_input = tokenizer.texts_to_sequences([input_text])
        encoder_input = pad_sequences(encoder_input, maxlen=max_len, padding='post')

        # Initialize decoder input
        decoder_input = np.zeros((1, max_len))
        decoder_input[0, 0] = sos_token

        output_sentence = []

        for i in range(1, max_len):
            predictions = model.predict([encoder_input, decoder_input], verbose=0)
            print(predictions)
            predicted_token_id = np.argmax(predictions[0, i - 1, :])
            print(predicted_token_id)

            if predicted_token_id == eos_token or predicted_token_id == 0:
                break

            predicted_word = reverse_tokenizer.get(predicted_token_id, '<unk>')
            output_sentence.append(predicted_word)

            decoder_input[0, i] = predicted_token_id

        return ' '.join(output_sentence)

    except Exception as e:
        return f"Error: {str(e)}"



In [32]:

reverse_tokenizer = {v: k for k, v in tokenizer_hindi.word_index.items()}

input_text = "As for the other derivatives of sulphur"

In [33]:
print({v: k for k, v in tokenizer_hindi.word_index.items()}[3])


start


In [None]:
top_k = predictions[0, i - 1, :].argsort()[-5:][::-1]
print("Top-5 token IDs:", top_k)
print("Top-5 words:", [reverse_tokenizer.get(idx, '<unk>') for idx in top_k])


In [None]:
pred = predict_translation(input_text)
pred

[[[5.9477316e-06 2.5498490e-05 5.1641751e-07 ... 1.8433486e-12
   2.3331196e-12 2.7211302e-12]
  [9.9998385e-01 1.1670096e-06 3.7769230e-08 ... 1.5867604e-10
   4.5656029e-10 2.2535858e-10]
  [9.9999183e-01 2.0690935e-07 3.1854599e-09 ... 7.5267313e-11
   2.3823010e-10 1.2213106e-10]
  ...
  [9.9999714e-01 8.7286525e-09 9.7287902e-09 ... 2.8424204e-11
   9.5631079e-11 5.0976202e-11]
  [9.9999714e-01 8.7286685e-09 9.7287902e-09 ... 2.8424204e-11
   9.5631079e-11 5.0976202e-11]
  [9.9999714e-01 8.7286685e-09 9.7287902e-09 ... 2.8424204e-11
   9.5631079e-11 5.0976202e-11]]]
3
[[[5.9477316e-06 2.5498490e-05 5.1641751e-07 ... 1.8433486e-12
   2.3331196e-12 2.7211302e-12]
  [5.7846296e-06 1.2555216e-05 4.8414108e-07 ... 1.9710791e-12
   2.2384638e-12 2.3629907e-12]
  [9.9998313e-01 1.0117328e-06 4.9933767e-08 ... 1.7792330e-10
   5.1713422e-10 2.1267718e-10]
  ...
  [9.9999714e-01 8.7286525e-09 9.7287902e-09 ... 2.8424204e-11
   9.5631079e-11 5.0976202e-11]
  [9.9999714e-01 8.7286685e-09 9.7