# 230968080

Week 8


In [4]:
import os
import zipfile

local_zip = '/home/mca/Desktop/230968080/hin-eng.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall()
zip_ref.close()


In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

# --- Hyperparameters ---
batch_size = 64
epochs = 50 # For a good result, 100 is better, but 50 is faster for demonstration.
latent_dim = 256 # Dimensionality of the encoding space.
num_samples = 10000 # Number of samples to train on.
data_path = "hin.txt" 

input_texts = []
target_texts = []
input_characters = set()
target_characters = set()

with open(data_path, "r", encoding="utf-8") as f:
    lines = f.read().split("\n")

for line in lines[: min(num_samples, len(lines) - 1)]:
    try:
        target_text, input_text, _ = line.split("\t")

        target_text = "\t" + target_text + "\n"
        
        input_texts.append(input_text)
        target_texts.append(target_text)
        
        for char in input_text:
            if char not in input_characters:
                input_characters.add(char)
        for char in target_text:
            if char not in target_characters:
                target_characters.add(char)
    except ValueError:
        continue

input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])

print("Number of samples:", len(input_texts))
print("Number of unique input tokens:", num_encoder_tokens)
print("Number of unique output tokens:", num_decoder_tokens)
print("Max sequence length for inputs:", max_encoder_seq_length)
print("Max sequence length for outputs:", max_decoder_seq_length)

2025-09-26 15:09:11.630830: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-26 15:09:11.655349: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Number of samples: 3116
Number of unique input tokens: 91
Number of unique output tokens: 72
Max sequence length for inputs: 121
Max sequence length for outputs: 109


In [2]:
input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])

encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype="float32"
)
decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
)
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32"
)

for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t, input_token_index[char]] = 1.0
    encoder_input_data[i, t + 1 :, input_token_index[" "]] = 1.0
    for t, char in enumerate(target_text):
        decoder_input_data[i, t, target_token_index[char]] = 1.0
        if t > 0:
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
    decoder_input_data[i, t + 1 :, target_token_index[" "]] = 1.0
    decoder_target_data[i, t:, target_token_index[" "]] = 1.0
    
print("Data vectorization complete.")

Data vectorization complete.


In [3]:
# Encoder
encoder_inputs_lstm = keras.Input(shape=(None, num_encoder_tokens))
encoder_lstm = keras.layers.LSTM(latent_dim, return_state=True)
encoder_outputs_lstm, state_h_lstm, state_c_lstm = encoder_lstm(encoder_inputs_lstm)
encoder_states_lstm = [state_h_lstm, state_c_lstm]

# Decoder
decoder_inputs_lstm = keras.Input(shape=(None, num_decoder_tokens))
decoder_lstm = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs_lstm, _, _ = decoder_lstm(decoder_inputs_lstm, initial_state=encoder_states_lstm)
decoder_dense_lstm = keras.layers.Dense(num_decoder_tokens, activation="softmax")
decoder_outputs_lstm = decoder_dense_lstm(decoder_outputs_lstm)

# full model
model_lstm = keras.Model([encoder_inputs_lstm, decoder_inputs_lstm], decoder_outputs_lstm)

model_lstm.compile(
    optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]
)
print("--- LSTM Model Summary ---")
model_lstm.summary()

2025-09-26 15:09:15.212668: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-09-26 15:09:15.227180: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-09-26 15:09:15.227301: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

--- LSTM Model Summary ---
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, None, 91)]           0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, None, 72)]           0         []                            
                                                                                                  
 lstm (LSTM)                 [(None, 256),                356352    ['input_1[0][0]']             
                              (None, 256),                                                        
                              (None, 256)]                                                        
                                                                   

In [4]:
# GRU 

# Encoder
encoder_inputs_gru = keras.Input(shape=(None, num_encoder_tokens))
encoder_gru = keras.layers.GRU(latent_dim, return_state=True)
encoder_outputs_gru, state_h_gru = encoder_gru(encoder_inputs_gru)
encoder_states_gru = [state_h_gru] # GRU only has one state

# Decoder
decoder_inputs_gru = keras.Input(shape=(None, num_decoder_tokens))
decoder_gru = keras.layers.GRU(latent_dim, return_sequences=True, return_state=True)
decoder_outputs_gru, _ = decoder_gru(decoder_inputs_gru, initial_state=encoder_states_gru)
decoder_dense_gru = keras.layers.Dense(num_decoder_tokens, activation="softmax")
decoder_outputs_gru = decoder_dense_gru(decoder_outputs_gru)

# Define the full model
model_gru = keras.Model([encoder_inputs_gru, decoder_inputs_gru], decoder_outputs_gru)

model_gru.compile(
    optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]
)
print("\n--- GRU Model Summary ---")
model_gru.summary()


--- GRU Model Summary ---
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, None, 91)]           0         []                            
                                                                                                  
 input_4 (InputLayer)        [(None, None, 72)]           0         []                            
                                                                                                  
 gru (GRU)                   [(None, 256),                268032    ['input_3[0][0]']             
                              (None, 256)]                                                        
                                                                                                  
 gru_1 (GRU)                 [(None, None, 256),          253440 

In [5]:
print("\n--- Training LSTM Model ---")
history_lstm = model_lstm.fit(
    [encoder_input_data, decoder_input_data],
    decoder_target_data,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.2,
)

print("\n--- Training GRU Model ---")
history_gru = model_gru.fit(
    [encoder_input_data, decoder_input_data],
    decoder_target_data,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.2,
)


--- Training LSTM Model ---
Epoch 1/50


2025-09-26 15:09:21.818166: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2025-09-26 15:09:21.898466: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2025-09-26 15:09:21.900144: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f14c002e570 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-09-26 15:09:21.900159: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
2025-09-26 15:09:21.903179: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-09-26 15:09:21.955928: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the p

Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50

--- Training GRU Model ---
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch

In [8]:
# --- Create Reverse Token Dictionaries for Decoding ---
reverse_input_char_index = dict((i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict((i, char) for char, i in target_token_index.items())

In [9]:
def create_inference_models(model_type, trained_model):
    # --- ENCODER ---
    encoder_inputs = trained_model.input[0]
    if model_type == 'lstm':
        _, state_h_enc, state_c_enc = trained_model.layers[2].output
        encoder_states = [state_h_enc, state_c_enc]
        encoder_model = keras.Model(encoder_inputs, encoder_states)
    else: # gru
        _, state_h_enc = trained_model.layers[2].output
        # FIX 1: The GRU encoder model now returns the state tensor directly.
        encoder_model = keras.Model(encoder_inputs, state_h_enc)

    # --- DECODER ---
    decoder_inputs = trained_model.input[1]
    decoder_state_input_h = keras.Input(shape=(latent_dim,))
    
    if model_type == 'lstm':
        decoder_state_input_c = keras.Input(shape=(latent_dim,))
        decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
        decoder_layer = trained_model.layers[3]
        decoder_outputs, state_h_dec, state_c_dec = decoder_layer(
            decoder_inputs, initial_state=decoder_states_inputs
        )
        decoder_states = [state_h_dec, state_c_dec]
        decoder_dense = trained_model.layers[4]
        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = keras.Model(
            [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states
        )
    else: # gru
        # FIX 2: The decoder model now accepts the state tensor directly.
        decoder_layer = trained_model.layers[3]
        decoder_outputs, state_h_dec = decoder_layer(
            decoder_inputs, initial_state=decoder_state_input_h
        )
        decoder_states = [state_h_dec]
        decoder_dense = trained_model.layers[4]
        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = keras.Model(
            [decoder_inputs, decoder_state_input_h], [decoder_outputs] + decoder_states
        )
    
    return encoder_model, decoder_model

In [10]:
def decode_sequence(input_seq, encoder_model, decoder_model, model_type):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    target_seq[0, 0, target_token_index["\t"]] = 1.0

    stop_condition = False
    decoded_sentence = ""
    while not stop_condition:
        if model_type == 'lstm':
            # LSTM logic remains the same (it expects a list of states)
            output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
            states_value = [h, c]
        else: # gru
            # FIX 3: Pass the target_seq and the single state tensor in a list
            # and update the state directly without wrapping it in a list.
            output_tokens, h = decoder_model.predict([target_seq, states_value])
            states_value = h
            
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char

        if sampled_char == "\n" or len(decoded_sentence) > max_decoder_seq_length:
            stop_condition = True

        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.0

    return decoded_sentence

In [11]:
encoder_model_lstm, decoder_model_lstm = create_inference_models('lstm', model_lstm)
encoder_model_gru, decoder_model_gru = create_inference_models('gru', model_gru)

In [12]:
# Test and Compare 
print("\n--- Model Comparison ---")
for seq_index in [10, 20, 30, 40, 50]: # Picking a few random samples
    input_seq = encoder_input_data[seq_index : seq_index + 1]
    
    decoded_sentence_lstm = decode_sequence(input_seq, encoder_model_lstm, decoder_model_lstm, 'lstm')
    
    decoded_sentence_gru = decode_sequence(input_seq, encoder_model_gru, decoder_model_gru, 'gru')

    print("-" * 50)
    print("Input (Hindi):", input_texts[seq_index])
    print("Target (English):", target_texts[seq_index].strip())
    print("LSTM Translation:", decoded_sentence_lstm.strip())
    print("GRU Translation:", decoded_sentence_gru.strip())


--- Model Comparison ---
--------------------------------------------------
Input (Hindi): चियर्स!
Target (English): Cheers!
LSTM Translation: I wan the prone the prook.
GRU Translation: I wan the prowe the sead.
--------------------------------------------------
Input (Hindi): अंदर आ जाओ।
Target (English): Come in.
LSTM Translation: I wan the prone the prook.
GRU Translation: I wan the prowe the sead.
--------------------------------------------------
Input (Hindi): मौज करना।
Target (English): Have fun.
LSTM Translation: I wan the prone the prook.
GRU Translation: I wan the prowe the sead.
--------------------------------------------------
Input (Hindi): पंछी उड़ते हैं।
Target (English): Birds fly.
LSTM Translation: I wan the prone the prook.
GRU Translation: I wan the prowe the sead.
--------------------------------------------------
Input (Hindi): मैं थक गया हूँ।
Target (English): I'm tired.
LSTM Translation: I wan the prone the prook.
GRU Translation: I wan the prowe the sead.
