In [1]:
skip_tokens = {"។", "៕", "!", "?", "^", ".", ",", "%", "...", "៖", "«", "»", "<NUM>"}

def predict_top_k_ngram(context, model, skip_tokens, k=5):
    """
    context: str (bigram) or tuple (trigram)
    model: bigram or trigram dictionary
    """
    candidates = model.get(context, {})

    filtered = [
        (tok, prob)
        for tok, prob in candidates.items()
        if tok not in skip_tokens and tok not in ['<s>', '</s>']
    ]

    filtered.sort(key=lambda x: x[1], reverse=True)
    return filtered[:k]

In [2]:
def autocomplete_bigram(sentence, model, skip_tokens, k=5):
    tokens = sentence.split()
    last_word = tokens[-1]
    return predict_top_k_ngram(last_word, model, skip_tokens, k)

In [3]:
import pickle

# --- Loading the model ---
def load_model(filepath):
    with open(filepath, 'rb') as f:
        model = pickle.load(f)
    print(f"Model loaded from {filepath}")
    return model

In [4]:
loaded_trigram = load_model('../saved_ngram/trigram_model.pkl')

# Predict using the loaded model
context = ("ខ្ញុំ", "ទៅ")  # trigram context example
top_preds = predict_top_k_ngram(context, loaded_trigram, skip_tokens, k=5)
print(top_preds)

Model loaded from ../saved_ngram/trigram_model.pkl
[('ផ្សារ', 0.25), ('សាលា', 0.25), ('ភូមិ', 0.125), ('កាន់', 0.125), ('លេងង', 0.125)]
