In [None]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding

# Example vocabulary sizes (replace with your actual values)
num_encoder_tokens = 1000   # vocab size for source
num_decoder_tokens = 1000   # vocab size for target

# Encoder
encoder_inputs = Input(shape=(None,))
enc_emb = Embedding(num_encoder_tokens, 256)(encoder_inputs)
encoder_lstm = LSTM(256, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(enc_emb)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,))
dec_emb = Embedding(num_decoder_tokens, 256)(decoder_inputs)
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Seq2Seq Model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile (use sparse_categorical_crossentropy if targets are integers)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

print("✅ Seq2Seq model built and compiled successfully!")


✅ Seq2Seq model built and compiled successfully!


In [36]:
# Install TensorFlow (if fresh Colab)
!pip install tensorflow

import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense
import numpy as np

# Hyperparameters
vocab_inp_size = 5000
vocab_tar_size = 5000
embedding_dim = 256
units = 512

# ----- Encoder -----
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units):
        super(Encoder, self).__init__()
        self.enc_units = enc_units
        self.embedding = Embedding(vocab_size, embedding_dim)
        self.lstm = LSTM(self.enc_units, return_sequences=True, return_state=True)

    def call(self, x):
        x = self.embedding(x)
        output, state_h, state_c = self.lstm(x)
        return output, state_h, state_c

# ----- Bahdanau Attention -----
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, query, values):
        # query: (batch, hidden_size), values: (batch, max_len, hidden_size)
        query_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(query_with_time_axis) + self.W2(values)))
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, attention_weights

# ----- Decoder -----
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, dec_units):
        super(Decoder, self).__init__()
        self.dec_units = dec_units
        self.embedding = Embedding(vocab_size, embedding_dim)
        self.lstm = LSTM(self.dec_units, return_sequences=True, return_state=True)
        self.fc = Dense(vocab_size)
        self.attention = BahdanauAttention(self.dec_units)

    def call(self, x, hidden, enc_output):
        # hidden: (batch, hidden_size)
        context_vector, attention_weights = self.attention(hidden, enc_output)
        x = self.embedding(x)
        # Concatenate context vector at each time step
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
        output, state_h, state_c = self.lstm(x)
        output = tf.reshape(output, (-1, output.shape[2]))
        x = self.fc(output)
        return x, state_h, state_c, attention_weights

# ----- Example usage -----
encoder = Encoder(vocab_inp_size, embedding_dim, units)
decoder = Decoder(vocab_tar_size, embedding_dim, units)

# Dummy input (batch=2, seq_len=10)
sample_input = tf.random.uniform((2, 10), dtype=tf.int32, maxval=vocab_inp_size)

enc_output, enc_h, enc_c = encoder(sample_input)

# Decoder step with dummy input token
dec_input = tf.random.uniform((2, 1), dtype=tf.int32, maxval=vocab_tar_size)
pred, dec_h, dec_c, attn = decoder(dec_input, enc_h, enc_output)

print("✅ Encoder output shape:", enc_output.shape)
print("✅ Decoder prediction shape:", pred.shape)


✅ Encoder output shape: (2, 10, 512)
✅ Decoder prediction shape: (2, 5000)


In [37]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
english_sentences = ["How are you?", "I love coding."]
hindi_sentences   = ["तुम कैसे हो?", "मुझे कोडिंग पसंद है।"]
hindi_sentences = ["<start> " + s + " <end>" for s in hindi_sentences]
input_chars = sorted(list(set("".join(english_sentences))))
target_chars = sorted(list(set("".join(hindi_sentences))))
num_encoder_tokens = len(input_chars)
num_decoder_tokens = len(target_chars)
max_encoder_seq_length = max([len(s) for s in english_sentences])
max_decoder_seq_length = max([len(s) for s in hindi_sentences])
input_token_index = {c:i for i, c in enumerate(input_chars)}
target_token_index = {c:i for i, c in enumerate(target_chars)}
reverse_target_index = {i:c for c,i in target_token_index.items()}
encoder_input_data = np.zeros((len(english_sentences), max_encoder_seq_length, num_encoder_tokens), dtype="float32")
decoder_input_data = np.zeros((len(hindi_sentences), max_decoder_seq_length, num_decoder_tokens), dtype="float32")
decoder_target_data = np.zeros((len(hindi_sentences), max_decoder_seq_length, num_decoder_tokens), dtype="float32")
for i, (input_text, target_text) in enumerate(zip(english_sentences, hindi_sentences)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t, input_token_index[char]] = 1.
    for t, char in enumerate(target_text):
        decoder_input_data[i, t, target_token_index[char]] = 1.
        if t > 0:
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.
latent_dim = 256
encoder_inputs = Input(shape=(None, num_encoder_tokens))
encoder = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]
decoder_inputs = Input(shape=(None, num_decoder_tokens))
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation="softmax")
decoder_outputs = decoder_dense(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer="rmsprop", loss="categorical_crossentropy")
model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=2, epochs=100)
encoder_model = Model(encoder_inputs, encoder_states)
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)
def decode_sequence(input_seq):
    states_value = encoder_model.predict(input_seq)
    target_seq = np.zeros((1,1,num_decoder_tokens))
    target_seq[0,0,target_token_index['<']] = 1.
    decoded_sentence = ""
    stop_condition = False
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_index[sampled_token_index]
        decoded_sentence += sampled_char
        if (sampled_char == ">" or len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True
        target_seq = np.zeros((1,1,num_decoder_tokens))
        target_seq[0,0,sampled_token_index] = 1.
        states_value = [h,c]
    return decoded_sentence
for seq_index in range(len(english_sentences)):
    input_seq = encoder_input_data[seq_index:seq_index+1]
    decoded_sentence = decode_sequence(input_seq)
    print("Input:", english_sentences[seq_index])
    print("Predicted:", decoded_sentence)
    print("Correct (Y/N):", "Y" if hindi_sentences[seq_index] in decoded_sentence else "N")
    print("----")


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 2.8449
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step - loss: 2.8203
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step - loss: 2.7983
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step - loss: 2.7741
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - loss: 2.7418
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step - loss: 2.6878
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - loss: 2.6191
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step - loss: 2.6541
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step - loss: 2.5578
Epoch 10/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step - loss: 2.4963
Ep

In [43]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, Attention, Add, Concatenate
english_sentences = ["He is reading a book"]
hindi_sentences   = ["<start> वह एक पुस्तक पढ़ रहा है। <end>"]
inp_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
tar_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
inp_tokenizer.fit_on_texts(english_sentences)
tar_tokenizer.fit_on_texts(hindi_sentences)
encoder_input = inp_tokenizer.texts_to_sequences(english_sentences)
decoder_input = tar_tokenizer.texts_to_sequences(hindi_sentences)
encoder_input = tf.keras.preprocessing.sequence.pad_sequences(encoder_input, padding='post')
decoder_input = tf.keras.preprocessing.sequence.pad_sequences(decoder_input, padding='post')
vocab_inp_size = len(inp_tokenizer.word_index) + 1
vocab_tar_size = len(tar_tokenizer.word_index) + 1
print("Input vocab:", vocab_inp_size)
print("Target vocab:", vocab_tar_size)
latent_dim = 128
enc_inputs = Input(shape=(None,))
enc_emb = Embedding(vocab_inp_size, latent_dim)(enc_inputs)
enc_out, state_h, state_c = LSTM(latent_dim, return_sequences=True, return_state=True)(enc_emb)
enc_states = [state_h, state_c]
dec_inputs = Input(shape=(None,))
dec_emb = Embedding(vocab_tar_size, latent_dim)(dec_inputs)
attention = Attention()([dec_emb, enc_out, enc_out])
dec_combined_attention = Concatenate(axis=-1)([dec_emb, attention])
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_combined_attention, initial_state=enc_states)
decoder_dense = Dense(vocab_tar_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
attention_model = Model([enc_inputs, dec_inputs], decoder_outputs)
attention_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
print("✅ Simplified Attention-based Seq2Seq model built using Keras Attention layer.")
print("\nTest Case Comparison:")
print("Input: He is reading a book")
print("Attention Model Output: (Mocked - requires training for real output)")

Input vocab: 6
Target vocab: 9
✅ Simplified Attention-based Seq2Seq model built using Keras Attention layer.

Test Case Comparison:
Input: He is reading a book
Attention Model Output: (Mocked - requires training for real output)
