# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras # Ensures keras is from tensorflow
from tensorflow.keras.layers import Input, Embedding, LSTM, GRU, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import wandb
from wandb.integration.keras import WandbMetricsLogger, WandbModelCheckpoint


import os
import re
import time
import unicodedata

# Weights & Biases Login

In [None]:
wandb.login()

# Data Loading and Initial Parsing

In [2]:
def load_data(filepath):
    """Loads data from a TSV file."""
    try:
        df = pd.read_csv(filepath, sep='\t', header=None, on_bad_lines='skip', names=['native', 'roman', 'count'])
        # The problem statement is romanized (input) to native (target)
        # e.g., "ajanabee अजनबी" -> x = "ajanabee", y = "अजनबी"
        # The Dakshina dataset lexicon format is: native_word, romanization, count
        # So, for our task: input_texts = df['roman'], target_texts = df['native']
        
        # Handle cases where lines might not have 3 columns or have NaN values
        df.dropna(subset=['native', 'roman'], inplace=True)
        
        input_texts = df['roman'].astype(str).tolist()
        target_texts = df['native'].astype(str).tolist()
        return input_texts, target_texts
    except Exception as e:
        print(f"Error loading data from {filepath}: {e}")
        return [], []

# --- Defining data file paths ---
dataset_base_dir = 'dakshina_dataset_v1.0' 
language = 'hi' # Hindi

train_file = os.path.join(dataset_base_dir, language, 'lexicons', f'{language}.translit.sampled.train.tsv')
dev_file = os.path.join(dataset_base_dir, language, 'lexicons', f'{language}.translit.sampled.dev.tsv')
test_file = os.path.join(dataset_base_dir, language, 'lexicons', f'{language}.translit.sampled.test.tsv')

# Loading data
input_texts_train_full, target_texts_train_full = load_data(train_file)
input_texts_val, target_texts_val = load_data(dev_file)
input_texts_test, target_texts_test = load_data(test_file) # Test set for final evaluation after sweep

input_texts_train, target_texts_train = input_texts_train_full, target_texts_train_full

print(f"Training samples: {len(input_texts_train)}")
print(f"Validation samples: {len(input_texts_val)}")
print(f"Test samples: {len(input_texts_test)}")

if len(input_texts_train) > 0 and len(target_texts_train) > 0:
    print("\nSample training data:")
    for i in range(min(3, len(input_texts_train))):
        print(f"Input: {input_texts_train[i]}, Target: {target_texts_train[i]}")
else:
    print("No training data loaded. Please check file paths and content.")

if len(input_texts_val) == 0:
    print("No validation data loaded. Sweeps will not work correctly without validation data.")

Training samples: 44202
Validation samples: 4358
Test samples: 4502

Sample training data:
Input: an, Target: अं
Input: ankganit, Target: अंकगणित
Input: uncle, Target: अंकल


# Data Preprocessing - Vocabulary, Tokenization, Padding

In [3]:
# --- Character sets and tokenization ---
input_characters = set()
target_characters = set()

for text in input_texts_train:
    for char in text:
        if char not in input_characters:
            input_characters.add(char)

for text in target_texts_train:
    for char in text:
        if char not in target_characters:
            target_characters.add(char)

# Add special tokens
SOS_TOKEN = '\t' # Start Of Sequence
EOS_TOKEN = '\n' # End Of Sequence
target_characters.add(SOS_TOKEN)
target_characters.add(EOS_TOKEN)

input_char_list = sorted(list(input_characters))
target_char_list = sorted(list(target_characters))

num_encoder_tokens = len(input_char_list)
num_decoder_tokens = len(target_char_list)

# Creating char-to-index and index-to-char mappings
input_token_index = {char: i for i, char in enumerate(input_char_list)}
target_token_index = {char: i for i, char in enumerate(target_char_list)}

reverse_input_char_index = {i: char for char, i in input_token_index.items()}
reverse_target_char_index = {i: char for char, i in target_token_index.items()}

# Determining max sequence lengths
max_encoder_seq_length = max(len(text) for text in input_texts_train + input_texts_val)
max_decoder_seq_length = max(len(text) for text in target_texts_train + target_texts_val) + 2 # +2 for SOS and EOS

print(f"\nNumber of unique input tokens: {num_encoder_tokens}")
print(f"Number of unique output tokens: {num_decoder_tokens}")
print(f"Max sequence length for inputs: {max_encoder_seq_length}")
print(f"Max sequence length for outputs: {max_decoder_seq_length}")


# --- Vectorizing the data ---
def vectorize_data(input_texts, target_texts, is_training=True):
    encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length), dtype="float32")
    decoder_input_data = np.zeros((len(target_texts), max_decoder_seq_length), dtype="float32")
    decoder_target_data = np.zeros((len(target_texts), max_decoder_seq_length, num_decoder_tokens), dtype="float32") # For sparse_categorical_crossentropy, this should be integers

    for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
        for t, char in enumerate(input_text):
            if char in input_token_index: # Handling chars not in vocab if any
                 encoder_input_data[i, t] = input_token_index[char]
            # else: ignoring unknown char or map to a special <UNK> token if defined
        
        # Decoder target data is ahead of decoder input data by one timestep
        # and includes the start token.
        # Decoder input: <SOS> char1 char2 ...
        # Decoder target: char1 char2 ... <EOS>
        
        processed_target_text = SOS_TOKEN + target_text + EOS_TOKEN
        for t, char in enumerate(processed_target_text):
            if t < max_decoder_seq_length:
                if char in target_token_index:
                    decoder_input_data[i, t] = target_token_index[char]
                    if t > 0: # decoder_target_data will be one timestep ahead
                        decoder_target_data[i, t - 1, target_token_index[char]] = 1.0 # One-hot
            # else: sequence is longer than max_decoder_seq_length, truncate
    
    return encoder_input_data, decoder_input_data, decoder_target_data


encoder_input_train, decoder_input_train, decoder_target_train = vectorize_data(input_texts_train, target_texts_train)
encoder_input_val, decoder_input_val, decoder_target_val = vectorize_data(input_texts_val, target_texts_val)

print("\nShape of encoder_input_train:", encoder_input_train.shape)
print("Shape of decoder_input_train:", decoder_input_train.shape)
print("Shape of decoder_target_train:", decoder_target_train.shape)



Number of unique input tokens: 26
Number of unique output tokens: 65
Max sequence length for inputs: 20
Max sequence length for outputs: 21

Shape of encoder_input_train: (44202, 20)
Shape of decoder_input_train: (44202, 21)
Shape of decoder_target_train: (44202, 21, 65)


# Model Building Function

In [4]:
def build_seq2seq_model(config): # Expects config to be an object with attributes
    """Builds the Encoder-Decoder model based on wandb config."""
    
    # Encoder
    encoder_inputs = Input(shape=(None,), name="encoder_inputs") 
    emb_enc = Embedding(num_encoder_tokens, config.input_embedding_size, name="encoder_embedding")(encoder_inputs)
    
    current_encoder_output = emb_enc 
    
    # Selecting RNN cell type
    if config.cell_type == "LSTM":
        RNNCell = LSTM
    elif config.cell_type == "GRU":
        RNNCell = GRU
    else: 
        RNNCell = keras.layers.SimpleRNN

    encoder_states_list = [] 

    for i in range(config.encoder_layers):
        is_last_layer = (i == config.encoder_layers - 1)
        rnn_layer = RNNCell(config.hidden_size, 
                            return_sequences=not is_last_layer, 
                            return_state=True, 
                            dropout=config.dropout_rate, 
                            name=f"encoder_{config.cell_type}_{i}")
        
        if config.cell_type == "LSTM":
            encoder_rnn_output_seq, state_h, state_c = rnn_layer(current_encoder_output)
            layer_states = [state_h, state_c]
        else: 
            encoder_rnn_output_seq, state_h = rnn_layer(current_encoder_output)
            layer_states = [state_h]
        
        if is_last_layer: 
            encoder_states_list = layer_states
        current_encoder_output = encoder_rnn_output_seq

    # Decoder
    decoder_inputs = Input(shape=(None,), name="decoder_inputs")
    emb_dec = Embedding(num_decoder_tokens, config.input_embedding_size, name="decoder_embedding")(decoder_inputs)
    current_decoder_output = emb_dec
    decoder_rnn_output_seq = current_decoder_output

    for i in range(config.decoder_layers):
        rnn_layer_dec = RNNCell(config.hidden_size, 
                                return_sequences=True, 
                                return_state=True,    
                                dropout=config.dropout_rate,
                                name=f"decoder_{config.cell_type}_{i}")
        
        initial_states_for_layer = encoder_states_list if i == 0 else None
        
        if config.cell_type == "LSTM":
            decoder_rnn_output_seq, _, _ = rnn_layer_dec(decoder_rnn_output_seq, initial_state=initial_states_for_layer)
        else: 
            decoder_rnn_output_seq, _ = rnn_layer_dec(decoder_rnn_output_seq, initial_state=initial_states_for_layer)
            
    decoder_dense = Dense(num_decoder_tokens, activation="softmax", name="decoder_output_dense")
    decoder_outputs = decoder_dense(decoder_rnn_output_seq)

    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

    # Optimizer selection
    if config.optimizer == 'adam':
        optimizer_choice = tf.keras.optimizers.Adam(learning_rate=config.learning_rate)
    elif config.optimizer == 'rmsprop':
        optimizer_choice = tf.keras.optimizers.RMSprop(learning_rate=config.learning_rate)
    elif config.optimizer == 'nadam': # Added Nadam
        optimizer_choice = tf.keras.optimizers.Nadam(learning_rate=config.learning_rate)
    elif config.optimizer == 'sgd':
        optimizer_choice = tf.keras.optimizers.SGD(learning_rate=config.learning_rate)
    else: # Default to Adam
        print(f"Optimizer {config.optimizer} not explicitly supported, defaulting to Adam.")
        optimizer_choice = tf.keras.optimizers.Adam(learning_rate=config.learning_rate)
            
    model.compile(optimizer=optimizer_choice, loss="categorical_crossentropy", metrics=["accuracy"])
    
    return model

# Inference Models and Beam Search Decode Function

In [5]:
def build_inference_models(training_model, config):
    # --- Encoder Model for Inference ---
    encoder_inputs_inf = Input(shape=(None,), name="encoder_inputs_inf") 
    
    # Getting encoder embedding layer output through the layer's weights
    encoder_embedding_layer = training_model.get_layer("encoder_embedding")
    encoder_embedding_inf = encoder_embedding_layer(encoder_inputs_inf)
    
    current_encoder_output = encoder_embedding_inf
    encoder_states_inf_list = []

    for i in range(config.encoder_layers):
        encoder_rnn_layer_inf = training_model.get_layer(f"encoder_{config.cell_type}_{i}")
        if config.cell_type == "LSTM":
            current_encoder_output, state_h_enc, state_c_enc = encoder_rnn_layer_inf(current_encoder_output)
            if i == config.encoder_layers - 1:
                 encoder_states_inf_list = [state_h_enc, state_c_enc]
        else: # GRU or SimpleRNN
            current_encoder_output, state_h_enc = encoder_rnn_layer_inf(current_encoder_output)
            if i == config.encoder_layers - 1:
                encoder_states_inf_list = [state_h_enc]

    # Creating encoder model with the new input
    encoder_model_inf = Model(encoder_inputs_inf, encoder_states_inf_list)
    # --- Decoder Model for Inference ---
    decoder_state_input_h_list = []
    decoder_state_input_c_list = [] # Only for LSTM
    decoder_states_inputs_inf_list = []

    for i in range(config.decoder_layers):
        state_h = Input(shape=(config.hidden_size,), name=f"decoder_state_input_h_{i}")
        decoder_state_input_h_list.append(state_h)
        decoder_states_inputs_inf_list.append(state_h)
        if config.cell_type == "LSTM":
            state_c = Input(shape=(config.hidden_size,), name=f"decoder_state_input_c_{i}")
            decoder_state_input_c_list.append(state_c)
            decoder_states_inputs_inf_list.append(state_c)


    decoder_inputs_inf_single_step = Input(shape=(1,), name="decoder_inputs_single_step") # Input is one char at a time
    decoder_embedding_inf = training_model.get_layer("decoder_embedding")(decoder_inputs_inf_single_step)

    current_decoder_output_inf = decoder_embedding_inf
    decoder_states_output_inf_list = []

    # The initial states for the *first* decoder layer during inference come from the encoder.
    # However, the inference decoder model needs to be general and accept states for *all its layers*.
    # For the first step, we pass encoder_states to the first decoder layer.
    # For subsequent steps, we pass the output states of the previous step.
    
    # We need to reconstruct the state inputs for each decoder layer carefully
    # Keras layers expect a list of states if stateful, or if return_state=True
    
    temp_decoder_states_inputs_inf = []
    if config.cell_type == "LSTM":
        for i in range(config.decoder_layers):
            temp_decoder_states_inputs_inf.extend([decoder_state_input_h_list[i], decoder_state_input_c_list[i]])
    else: # GRU/SimpleRNN
        for i in range(config.decoder_layers):
            temp_decoder_states_inputs_inf.append(decoder_state_input_h_list[i])

    idx = 0
    for i in range(config.decoder_layers):
        decoder_rnn_layer_inf = training_model.get_layer(f"decoder_{config.cell_type}_{i}")
        
        # Preparing initial_state for this specific layer from the input states list
        if config.cell_type == "LSTM":
            layer_initial_states = [temp_decoder_states_inputs_inf[idx], temp_decoder_states_inputs_inf[idx+1]]
            idx += 2
            current_decoder_output_inf, state_h_dec, state_c_dec = decoder_rnn_layer_inf(
                current_decoder_output_inf, initial_state=layer_initial_states
            )
            decoder_states_output_inf_list.extend([state_h_dec, state_c_dec])
        else: # GRU or SimpleRNN
            layer_initial_states = [temp_decoder_states_inputs_inf[idx]]
            idx += 1
            current_decoder_output_inf, state_h_dec = decoder_rnn_layer_inf(
                current_decoder_output_inf, initial_state=layer_initial_states
            )
            decoder_states_output_inf_list.append(state_h_dec)
            
    decoder_dense_inf = training_model.get_layer("decoder_output_dense")
    decoder_outputs_inf = decoder_dense_inf(current_decoder_output_inf)
    
    decoder_model_inf = Model(
        [decoder_inputs_inf_single_step] + temp_decoder_states_inputs_inf, 
        [decoder_outputs_inf] + decoder_states_output_inf_list
    )
    
    return encoder_model_inf, decoder_model_inf


def decode_sequence_beam_search(input_seq_vectorized, encoder_model, decoder_model, beam_width, config):
    # Encoding the input as state vectors.
    states_value_list = encoder_model.predict(input_seq_vectorized, verbose=0)
    
    if not isinstance(states_value_list, list):
        states_value_list = [states_value_list]  # Convert to list if single state
    
    # Initializing decoder states
    current_states_for_decoder_model = []
    
    # Populating initial states for the first decoder layer from encoder
    current_states_for_decoder_model.extend(states_value_list)
    
    # Populating zero states for subsequent decoder layers (if any)
    batch_size = 1  # For single sequence decoding
    num_states_per_layer = 2 if config.cell_type == "LSTM" else 1
    for _ in range(1, config.decoder_layers):
        for _ in range(num_states_per_layer):
            current_states_for_decoder_model.append(np.zeros((batch_size, config.hidden_size)))

    # Starting with the SOS token
    target_seq = np.array([[target_token_index[SOS_TOKEN]]])
    
    # Initial beam: (sequence_indices, log_probability, states_for_decoder_model)
    initial_beam = [([target_token_index[SOS_TOKEN]], 0.0, current_states_for_decoder_model)]
    live_hypotheses = initial_beam

    for _ in range(max_decoder_seq_length):
        new_hypotheses = []
        for seq_indices, score, current_states in live_hypotheses:
            if seq_indices[-1] == target_token_index[EOS_TOKEN] and len(seq_indices) > 1:
                new_hypotheses.append((seq_indices, score, current_states))
                continue

            # Predicting next token
            last_token_idx = np.array([[seq_indices[-1]]])
            
            # Making sure all inputs have batch_size=1 and correct shapes
            decoder_model_inputs = [last_token_idx] + [
                np.reshape(state, (1, -1)) if state.shape[0] != 1 else state 
                for state in current_states
            ]
            
            output_tokens_probs_list = decoder_model.predict(decoder_model_inputs, verbose=0)
            
            output_tokens_probs = output_tokens_probs_list[0]  # Shape should be (1, 1, num_decoder_tokens)
            new_states_list = output_tokens_probs_list[1:]

            # Calculating log probabilities for all tokens
            log_probs = np.log(output_tokens_probs[0, 0] + 1e-9)  # Getting probabilities for the first (and only) timestep
            top_k_indices = np.argsort(log_probs)[-beam_width:]  # Getting indices of top k probabilities
            
            for token_idx in top_k_indices:
                if token_idx < len(log_probs):  # Adding bounds check
                    new_seq_indices = seq_indices + [token_idx]
                    new_score = score + log_probs[token_idx]
                    new_hypotheses.append((new_seq_indices, new_score, new_states_list))

        # Sorting and keeping top beam_width hypotheses
        if new_hypotheses:
            live_hypotheses = sorted(new_hypotheses, key=lambda x: x[1], reverse=True)[:beam_width]
        else:
            break 

        if all(h[0][-1] == target_token_index[EOS_TOKEN] for h in live_hypotheses if len(h[0]) > 1):
            break

    # Choosing best hypothesis and converting to text
    if not live_hypotheses: 
        return ""
        
    best_hypothesis = max(live_hypotheses, key=lambda x: x[1]/len(x[0]) if len(x[0]) > 1 else x[1])
    decoded_sentence_indices = best_hypothesis[0]
    
    decoded_sentence = ""
    for token_idx in decoded_sentence_indices:
        if token_idx == target_token_index[SOS_TOKEN]:
            continue
        if token_idx == target_token_index[EOS_TOKEN]:
            break
        if token_idx in reverse_target_char_index:
             decoded_sentence += reverse_target_char_index[token_idx]
    return decoded_sentence


# Training and Evaluation Function (train_evaluate) (Modified)

In [6]:
from tqdm import tqdm
def train_evaluate():
    keras.backend.clear_session()
    
    run = wandb.init() 
                       
    config = wandb.config

    # Build the training model
    print(f"--- Building model for run {run.id if run else 'N/A'} with config: {dict(config)} ---")
    training_model = build_seq2seq_model(config)
    
    # Callbacks
    early_stopping = EarlyStopping(monitor='val_loss', 
                                   patience=config.early_stopping_patience, 
                                   restore_best_weights=True, 
                                   verbose=1)
    wandb_metrics_logger = WandbMetricsLogger(log_freq="epoch")

    # Training the model
    print(f"--- Starting training for run {run.id if run else 'N/A'} ---")
    history = training_model.fit(
        [encoder_input_train, decoder_input_train],
        decoder_target_train,
        batch_size=config.batch_size,
        epochs=config.epochs, 
        validation_data=([encoder_input_val, decoder_input_val], decoder_target_val),
        callbacks=[early_stopping, wandb_metrics_logger],
        verbose=1 
    )
    
    wandb.log({"val_exact_match_accuracy": history.history['val_accuracy'][-1]})

# Wandb Sweep Configuration

In [7]:
sweep_config = {
    'method': 'bayes',  # Bayesian optimization
    'metric': {
        'name': 'val_exact_match_accuracy', 
        'goal': 'maximize'   
    },
    'parameters': {
        'input_embedding_size': {
            'values': [32, 64, 128] 
        },
        'hidden_size': {
            'values': [64, 128, 256] 
        },
        'encoder_layers': {
            'values': [1, 2]
        },
        'decoder_layers': {
            'values': [1, 2]
        },
        'cell_type': {
            'values': ['RNN', 'GRU', 'LSTM']
        },
        'dropout_rate': {
            'values': [0.2, 0.3]
        },
        'learning_rate': {
            'values': [0.001, 0.0001]
        },
        'batch_size': {
            'values': [64, 128, 256]
        },
        'epochs': { 
            'values': [50] 
        },
        'early_stopping_patience': {
            'values': [5]
        },
        'beam_size': { 
            'values': [1, 3, 5] 
        },
        'optimizer': {
            'values': ['adam', 'nadam']
        }
    }
}

# Start the Sweep Agent

In [None]:
sweep_id = wandb.sweep(sweep_config, entity="ce21b097-indian-institute-of-technology-madras", project="CE21B097 - DA6401 - Assignment 3")

wandb.agent(sweep_id, function=train_evaluate, count=100)

print("\n--- Sweep Finished ---")
print("Go to your W&B project page to see the results, including:")
print("- Accuracy v/s Created plot")
print("- Parallel Co-ordinates plot")
print("- Correlation Summary table")

# For Question 4: Evaluate Best Model on Test Set

In [8]:
class BestConfig:
    def __init__(self, config_dict):
        for key, value in config_dict.items():
            setattr(self, key, value)

best_config_dict = {
    "batch_size": 356,
    "beam_size": 5,
    "cell_type": "GRU",
    "decoder_layers": 2,
    "dropout_rate": 0.2,
    "early_stopping_patience": 5,
    "encoder_layers": 1,
    "epochs": 50, 
    "hidden_size": 256,
    "input_embedding_size": 64,
    "learning_rate": 0.001,
    "optimizer": "nadam"
}
best_config = BestConfig(best_config_dict)
print("Using Best Configuration:", best_config_dict)

print("\n--- Retraining model on full training data with best configuration ---")
keras.backend.clear_session()

# Vectorizing
encoder_input_train_full_vec, decoder_input_train_full_vec, decoder_target_train_full_vec = vectorize_data(
    input_texts_train_full, target_texts_train_full
)

best_model = build_seq2seq_model(best_config)

early_stopping_retrain = EarlyStopping(
    monitor='val_loss', 
    patience=best_config.early_stopping_patience, 
    restore_best_weights=True,
    verbose=1
)

history_retrain = best_model.fit(
    [encoder_input_train_full_vec, decoder_input_train_full_vec],
    decoder_target_train_full_vec,
    batch_size=best_config.batch_size, 
    epochs=best_config.epochs, 
    validation_data=([encoder_input_val, decoder_input_val], decoder_target_val),
    callbacks=[early_stopping_retrain],
    verbose=1
)
print("Retraining complete.")

# --- 2. Preparing Test Data ---
if not input_texts_test:
    print("Test data not loaded. Please check Cell 3.")
else:
    encoder_input_test_vec, _, _ = vectorize_data(input_texts_test, [""]*len(input_texts_test)) # Dummy targets
    print(f"\nTest samples: {len(input_texts_test)}")
    print(f"Shape of encoder_input_test_vec: {encoder_input_test_vec.shape}")


    # --- 3. Building Inference Models ---
    print("\n--- Building inference models for test set evaluation ---")
    encoder_model_inf_test, decoder_model_inf_test = build_inference_models(best_model, best_config)

    # --- 4. Making Predictions on Test Set ---
    print(f"\n--- Predicting on test set using beam_size: {best_config.beam_size} ---")
    test_predictions = []
    correct_test_predictions = 0
    
    # Creating directory for predictions
    predictions_dir = "predictions_vanilla"
    os.makedirs(predictions_dir, exist_ok=True)
    prediction_file_path = os.path.join(predictions_dir, "test_predictions_vanilla.tsv")

    with open(prediction_file_path, "w", encoding="utf-8") as f_out:
        f_out.write("Input (Roman)\tActual (Devanagari)\tPredicted (Devanagari)\n") # Header
        
        for i in tqdm(range(len(input_texts_test))):
            input_seq_vectorized_test = encoder_input_test_vec[i:i+1] # Shape (1, max_encoder_seq_length)
            
            decoded_sentence_test = decode_sequence_beam_search(
                input_seq_vectorized_test,
                encoder_model_inf_test,
                decoder_model_inf_test,
                best_config.beam_size,
                best_config # Passing the whole config
            )
            test_predictions.append(decoded_sentence_test)
            
            actual_target = target_texts_test[i]
            f_out.write(f"{input_texts_test[i]}\t{actual_target}\t{decoded_sentence_test}\n")

            if decoded_sentence_test == actual_target:
                correct_test_predictions += 1
            
            if (i+1) % 100 == 0: # Printing progress
                 print(f"Processed {i+1}/{len(input_texts_test)} test samples...")


    # --- (a) Reporting Accuracy on Test Set ---
    test_accuracy = correct_test_predictions / len(input_texts_test) if len(input_texts_test) > 0 else 0.0
    print(f"\n--- Test Set Evaluation (Best Model) ---")
    print(f"Exact Match Accuracy on Test Set: {test_accuracy:.4f} ({correct_test_predictions}/{len(input_texts_test)})")
    
    # --- (b) Providing Sample Inputs and Predictions ---
    print("\n--- Sample Predictions from Test Set ---")
    sample_indices = np.random.choice(len(input_texts_test), min(15, len(input_texts_test)), replace=False)
    
    results_data = []
    for i in sample_indices:
        input_word = input_texts_test[i]
        true_translation = target_texts_test[i]
        model_prediction = test_predictions[i]
        is_correct = "✅" if model_prediction == true_translation else "❌"
        results_data.append([input_word, true_translation, model_prediction, is_correct])
        
    results_df = pd.DataFrame(results_data, columns=["Input (Roman)", "Actual (Devanagari)", "Predicted (Devanagari)", "Correct?"])
    
    from IPython.display import display, Markdown
    display(Markdown(results_df.to_markdown(index=False)))

    print(f"\nAll predictions saved to: {prediction_file_path}")
    print("Uploaded this file to my GitHub project in a folder named 'predictions_vanilla'.")

Using Best Configuration: {'batch_size': 356, 'beam_size': 5, 'cell_type': 'GRU', 'decoder_layers': 2, 'dropout_rate': 0.2, 'early_stopping_patience': 5, 'encoder_layers': 1, 'epochs': 50, 'hidden_size': 256, 'input_embedding_size': 64, 'learning_rate': 0.001, 'optimizer': 'nadam'}

--- Retraining model on full training data with best configuration ---
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Retraining complete.

Test samples: 4502
Shape of enco

  2%|█▋                                                                           | 100/4502 [02:30<2:31:17,  2.06s/it]

Processed 100/4502 test samples...


  4%|███▍                                                                         | 200/4502 [05:01<1:55:34,  1.61s/it]

Processed 200/4502 test samples...


  7%|█████▏                                                                       | 300/4502 [07:09<1:32:33,  1.32s/it]

Processed 300/4502 test samples...


  9%|██████▊                                                                      | 400/4502 [09:11<1:57:14,  1.71s/it]

Processed 400/4502 test samples...


 11%|████████▌                                                                    | 500/4502 [11:23<1:15:42,  1.14s/it]

Processed 500/4502 test samples...


 13%|██████████▎                                                                  | 600/4502 [13:35<1:41:00,  1.55s/it]

Processed 600/4502 test samples...


 16%|███████████▉                                                                 | 700/4502 [15:56<1:17:44,  1.23s/it]

Processed 700/4502 test samples...


 18%|█████████████▋                                                               | 800/4502 [18:07<1:06:58,  1.09s/it]

Processed 800/4502 test samples...


 20%|███████████████▍                                                             | 900/4502 [20:26<1:37:02,  1.62s/it]

Processed 900/4502 test samples...


 22%|████████████████▉                                                           | 1000/4502 [22:32<1:53:55,  1.95s/it]

Processed 1000/4502 test samples...


 24%|██████████████████▌                                                         | 1100/4502 [24:52<1:32:56,  1.64s/it]

Processed 1100/4502 test samples...


 27%|████████████████████▊                                                         | 1200/4502 [26:56<52:14,  1.05it/s]

Processed 1200/4502 test samples...


 29%|██████████████████████▌                                                       | 1300/4502 [28:57<54:19,  1.02s/it]

Processed 1300/4502 test samples...


 31%|███████████████████████▋                                                    | 1400/4502 [31:04<1:10:17,  1.36s/it]

Processed 1400/4502 test samples...


 33%|█████████████████████████▉                                                    | 1500/4502 [33:16<51:29,  1.03s/it]

Processed 1500/4502 test samples...


 36%|███████████████████████████                                                 | 1600/4502 [35:33<1:00:08,  1.24s/it]

Processed 1600/4502 test samples...


 38%|████████████████████████████▋                                               | 1700/4502 [37:43<1:06:53,  1.43s/it]

Processed 1700/4502 test samples...


 40%|███████████████████████████████▏                                              | 1800/4502 [39:58<56:27,  1.25s/it]

Processed 1800/4502 test samples...


 42%|████████████████████████████████                                            | 1900/4502 [42:07<1:06:01,  1.52s/it]

Processed 1900/4502 test samples...


 44%|█████████████████████████████████▊                                          | 2000/4502 [44:27<1:04:45,  1.55s/it]

Processed 2000/4502 test samples...


 47%|████████████████████████████████████▍                                         | 2100/4502 [46:28<47:23,  1.18s/it]

Processed 2100/4502 test samples...


 49%|██████████████████████████████████████                                        | 2200/4502 [48:46<53:24,  1.39s/it]

Processed 2200/4502 test samples...


 51%|███████████████████████████████████████▊                                      | 2300/4502 [50:54<50:16,  1.37s/it]

Processed 2300/4502 test samples...


 53%|█████████████████████████████████████████▌                                    | 2400/4502 [53:07<44:20,  1.27s/it]

Processed 2400/4502 test samples...


 56%|███████████████████████████████████████████▎                                  | 2500/4502 [55:41<56:55,  1.71s/it]

Processed 2500/4502 test samples...


 58%|█████████████████████████████████████████████                                 | 2600/4502 [57:57<41:49,  1.32s/it]

Processed 2600/4502 test samples...


 60%|█████████████████████████████████████████████▌                              | 2700/4502 [1:00:02<40:03,  1.33s/it]

Processed 2700/4502 test samples...


 62%|██████████████████████████████████████████████                            | 2800/4502 [1:02:47<1:09:06,  2.44s/it]

Processed 2800/4502 test samples...


 64%|████████████████████████████████████████████████▉                           | 2900/4502 [1:06:12<53:23,  2.00s/it]

Processed 2900/4502 test samples...


 67%|██████████████████████████████████████████████████▋                         | 3000/4502 [1:09:35<42:31,  1.70s/it]

Processed 3000/4502 test samples...


 69%|████████████████████████████████████████████████████▎                       | 3100/4502 [1:13:31<52:52,  2.26s/it]

Processed 3100/4502 test samples...


 71%|████████████████████████████████████████████████████▌                     | 3200/4502 [1:17:22<1:11:09,  3.28s/it]

Processed 3200/4502 test samples...


 73%|███████████████████████████████████████████████████████▋                    | 3300/4502 [1:21:41<21:06,  1.05s/it]

Processed 3300/4502 test samples...


 76%|█████████████████████████████████████████████████████████▍                  | 3400/4502 [1:23:58<36:52,  2.01s/it]

Processed 3400/4502 test samples...


 78%|███████████████████████████████████████████████████████████                 | 3500/4502 [1:26:05<16:29,  1.01it/s]

Processed 3500/4502 test samples...


 80%|████████████████████████████████████████████████████████████▊               | 3600/4502 [1:28:14<24:18,  1.62s/it]

Processed 3600/4502 test samples...


 82%|██████████████████████████████████████████████████████████████▍             | 3700/4502 [1:30:56<17:19,  1.30s/it]

Processed 3700/4502 test samples...


 84%|████████████████████████████████████████████████████████████████▏           | 3800/4502 [1:33:56<21:38,  1.85s/it]

Processed 3800/4502 test samples...


 87%|█████████████████████████████████████████████████████████████████▊          | 3900/4502 [1:38:07<18:59,  1.89s/it]

Processed 3900/4502 test samples...


 89%|███████████████████████████████████████████████████████████████████▌        | 4000/4502 [1:41:02<15:57,  1.91s/it]

Processed 4000/4502 test samples...


 91%|█████████████████████████████████████████████████████████████████████▏      | 4100/4502 [1:43:57<11:33,  1.73s/it]

Processed 4100/4502 test samples...


 93%|██████████████████████████████████████████████████████████████████████▉     | 4200/4502 [1:46:22<07:51,  1.56s/it]

Processed 4200/4502 test samples...


 96%|████████████████████████████████████████████████████████████████████████▌   | 4300/4502 [1:48:46<04:23,  1.30s/it]

Processed 4300/4502 test samples...


 98%|██████████████████████████████████████████████████████████████████████████▎ | 4400/4502 [1:51:31<03:31,  2.07s/it]

Processed 4400/4502 test samples...


100%|███████████████████████████████████████████████████████████████████████████▉| 4500/4502 [1:53:52<00:03,  1.76s/it]

Processed 4500/4502 test samples...


100%|████████████████████████████████████████████████████████████████████████████| 4502/4502 [1:53:55<00:00,  1.52s/it]



--- Test Set Evaluation (Best Model) ---
Exact Match Accuracy on Test Set: 0.3110 (1400/4502)

--- Sample Predictions from Test Set ---


ImportError: Missing optional dependency 'tabulate'.  Use pip or conda to install tabulate.

| Input (Roman)   | Actual (Devanagari)   | Predicted (Devanagari)   | Correct?   |
|:----------------|:----------------------|:-------------------------|:-----------|
| pahantaa        | पहनता                 | पहनता                    | ✅         |
| moryon          | मौर्यों                 | मोर्यों                    | ❌         |
| vidyaon         | विद्याओं                | विद्याओं                   | ✅         |
| bahamaas        | बहामास                | बहमास                    | ❌         |
| standhaari      | स्तनधारी               | स्तंधारी                   | ❌         |
| mahanirdeshak   | महानिदेशक              | महानिर्देशों                | ❌         |
| darul           | दारूल                  | दारुल                     | ❌         |
| siswa           | सिसवा                 | सिस्व                     | ❌         |
| anawashyak      | अनावश्यक               | अनवाश्यक                  | ❌         |
| zuban           | ज़ुबान                  | जुबान                     | ❌         |
| halt            | हाल्ट                  | हल्ट                      | ❌         |
| maisore         | मैसोर                  | मैसोर                     | ✅         |
| siyah           | सियाह                 | सियाह                    | ✅         |
| nageene         | नगीने                  | नगीने                     | ✅         |
| alind           | अलिंद                  | अलिंद                     | ✅         |


All predictions saved to: predictions_vanilla\test_predictions_vanilla.tsv
Please upload this file to your GitHub project in a folder named 'predictions_vanilla'.

--- Comments on Errors (Qualitative Analysis based on samples) ---
*   Average input length for sampled errors: 6.44
*   Number of sampled errors on input sequences longer than 7 chars: 12 out of 50 sampled errors.

*   **General Observations on Error Types (manual inspection needed for specifics):**
    *   **Length Mismatches:** The model might produce outputs that are shorter or longer than the target.
        - In 50 sampled errors, 32 had length mismatches between actual and predicted.
    *   **Specific Character Confusions:** Look for patterns like one vowel being substituted for another, or similar-sounding consonants being mixed up (e.g., 'b' vs 'bh', 'd' vs 'dh'). This requires careful manual review of the `test_predictions_vanilla.tsv` file.
    *   **Handling of Complex Conjuncts (Sanyuktakshar):** Hindi has man