In [5]:
import time
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from transformers import BartTokenizer, BartForConditionalGeneration
from tqdm import tqdm

# Load the LSTM models
lstm_model = load_model('Models/LSTM_Model/lstm_sinhala_grammar_checker.h5')
advanced_lstm_model = load_model('Models/Advanced_LSTM/advanced_lstm_sinhala_grammar_checker.h5')

# Load the BART model and tokenizer
bart_model = BartForConditionalGeneration.from_pretrained('Models/Advanced_Bart/bart_sinhala_grammar_checker')
bart_tokenizer = BartTokenizer.from_pretrained('Models/Advanced_Bart/bart_sinhala_grammar_checker')

# Initialize the Keras Tokenizer 
lstm_tokenizer = Tokenizer()
training_data = [
    "මම ගමට යමි", "ඔහු පාසලට ගියේය", "අපි ගඟේ දිය නෑවෙමු", "ඔවුන් විභාගය ජයග්‍රහණය කළහ"
]
lstm_tokenizer.fit_on_texts(training_data)

def test_lstm(model, tokenizer, sentence):
    # Get the expected input shape from the model
    max_len = model.input_shape[1]  
    
    # Tokenize and pad the sentence
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post')
    
    # Make predictions
    prediction = model.predict(padded_sequence)
    predicted_sequence = np.argmax(prediction, axis=-1)
    
    # Decode the predicted sequence
    corrected_sentence = tokenizer.sequences_to_texts([predicted_sequence[0]])
    return corrected_sentence[0]


# Define the BART testing function
def test_bart(model, tokenizer, sentence, max_len=128):
    input_text = "grammar_error: " + sentence
    input_encoding = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True, max_length=max_len)
    outputs = model.generate(input_encoding["input_ids"], max_length=max_len, num_beams=4, early_stopping=True)
    corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return corrected_sentence

# Test sentences
test_sentences = [
    "මම යනවා",  
    "ඔහු පාසලට ගිහින් නැහැ",  
    "අපි නෑවෙ",  
    "ඔවුන් විභාගය ජයග්‍රහණය කළ",
    "අපි යැවෙයි",
    "අපි බත් කමින් ගෙදර යැවෙයි"
]

# Compare models
results = []
for sentence in tqdm(test_sentences, desc="Testing Sentences"):
    start_time = time.time()
    lstm_output = test_lstm(lstm_model, lstm_tokenizer, sentence)
    lstm_time = time.time() - start_time

    start_time = time.time()
    advanced_lstm_output = test_lstm(advanced_lstm_model, lstm_tokenizer, sentence)
    advanced_lstm_time = time.time() - start_time

    start_time = time.time()
    bart_output = test_bart(bart_model, bart_tokenizer, sentence)
    bart_time = time.time() - start_time

    results.append({
        "Original Sentence": sentence,
        "LSTM Output": lstm_output,
        "LSTM Time (s)": lstm_time,
        "LSTM + Attention Output": advanced_lstm_output,
        "LSTM + Attention Time (s)": advanced_lstm_time,
        "BART Output": bart_output,
        "BART Time (s)": bart_time,
    })







Testing Sentences:   0%|          | 0/6 [00:00<?, ?it/s]



Testing Sentences:  17%|█▋        | 1/6 [00:04<00:20,  4.05s/it]



Testing Sentences:  33%|███▎      | 2/6 [00:07<00:15,  3.82s/it]



Testing Sentences:  50%|█████     | 3/6 [00:09<00:08,  2.86s/it]



Testing Sentences:  67%|██████▋   | 4/6 [00:12<00:05,  2.95s/it]



Testing Sentences:  83%|████████▎ | 5/6 [00:14<00:02,  2.53s/it]



Testing Sentences: 100%|██████████| 6/6 [00:18<00:00,  3.01s/it]


In [6]:
results_df = pd.DataFrame(results)
# Save results to a CSV
results_csv_path = "model_comparison_results/comparison_results.csv"
results_df.to_csv(results_csv_path, index=False)

# Display the content of the CSV
print(f"Results saved to '{results_csv_path}'")
results_df.head()  

Results saved to 'model_comparison_results/comparison_results.csv'


Unnamed: 0,Original Sentence,LSTM Output,LSTM Time (s),LSTM + Attention Output,LSTM + Attention Time (s),BART Output,BART Time (s)
0,මම යනවා,මම,0.599059,මම,1.019694,අපි යැවෙමු,2.436017
1,ඔහු පාසලට ගිහින් නැහැ,පාසලට නෑවෙමු,0.097513,ඔහු පාසලට,0.215059,ඔහු පාසලට ොිව්නීය,3.339802
2,අපි නෑවෙ,,0.110039,අපි,0.324612,අපි නැවෙමු,1.289956
3,ඔවුන් විභාගය ජයග්‍රහණය කළ,නෑවෙමු මම ජයග්‍රහණය,0.079003,ඔවුන් විභාගය ජයග්‍රහණය,0.110041,ඔවුන් ළමය ගෙදර අපිලා,2.880533
4,අපි යැවෙයි,,0.082515,අපි,0.306373,අපි යැවෙමු,1.395645
