In [3]:
import numpy as np
from collections import defaultdict
from itertools import permutations
from typing import List

class NgramWordOrderCorrector:
    def __init__(self):
        self.trigram_counts = defaultdict(int)
        self.bigram_counts = defaultdict(int)

    def train(self, sentences: List[str]):
        """Train on correct Sinhala sentences."""
        for sentence in sentences:
            words = ['<s>', '<s>'] + sentence.split() + ['</s>']
            for i in range(len(words) - 2):
                self.trigram_counts[tuple(words[i:i + 3])] += 1
                self.bigram_counts[tuple(words[i:i + 2])] += 1

    def correct_word_order(self, sentence: str) -> str:
        words = sentence.split()
        best_order = words
        max_score = self._calculate_score(words)

        for perm in self._get_valid_permutations(words):
            perm_list = list(perm)
            score = self._calculate_score(perm_list)
            if score > max_score:
                max_score = score
                best_order = perm_list

        return ' '.join(best_order)

    def _calculate_score(self, words: List[str]) -> int:
        words = ['<s>', '<s>'] + words + ['</s>']
        score = 0
        for i in range(len(words) - 2):
            trigram = tuple(words[i:i + 3])
            score += self.trigram_counts[trigram]
        return score

    def _get_valid_permutations(self, words: List[str]):
        """Generates permutations of the sentence (constrained for short inputs)."""
        if len(words) > 7:
            return [words]
        return permutations(words)


In [4]:
if __name__ == "__main__":
    print("=== N-gram Word Order Correction ===")
    corrector = NgramWordOrderCorrector()
    corrector.train(["මම පොත කියවමි", "අපි පාසල් යමු"])
    result = corrector.correct_word_order("පොත මම කියවමි")
    print(f"Input Sentence: 'පොත මම කියවමි'")
    print(f"Corrected Sentence: '{result}'\n")


=== N-gram Word Order Correction ===
Input Sentence: 'පොත මම කියවමි'
Corrected Sentence: 'මම පොත කියවමි'



In [5]:
import re
from typing import Dict, List

class POSBasedAgreementChecker:
    def __init__(self):

        self.pos_patterns = {
            'subject': r'[මඅඔඇ][a-zA-Z\u0D80-\u0DFF]+',
            'verb': r'[a-zA-Z\u0D80-\u0DFF]+[නව]වා$'
        }


        self.agreement_rules = {
            'මම': {'suffix': 'මි', 'person': 'first', 'number': 'singular'},
            'අපි': {'suffix': 'මු', 'person': 'first', 'number': 'plural'},
            'ඔබ': {'suffix': 'යි', 'person': 'second', 'number': 'singular'},
            'ඔබලා': {'suffix': 'මු', 'person': 'second', 'number': 'plural'},
            'ඔහු': {'suffix': 'යි', 'person': 'third', 'number': 'singular'},
            'ඇය': {'suffix': 'යි', 'person': 'third', 'number': 'singular'},
            'ඔවුන්': {'suffix': 'ති', 'person': 'third', 'number': 'plural'}
        }

    def check_agreement(self, sentence: str) -> Dict:
        """Checks subject-verb agreement in a given Sinhala sentence."""
        words = sentence.split()
        subject = None
        verb = None

        for word in words:
            if re.match(self.pos_patterns['subject'], word):
                subject = word
            elif re.match(self.pos_patterns['verb'], word):
                verb = word

        if not subject or not verb:
            return {'is_correct': False, 'error': 'Missing subject or verb'}

        if subject in self.agreement_rules:
            expected_suffix = self.agreement_rules[subject]['suffix']
            if not verb.endswith(expected_suffix):
                base_verb = self._get_verb_base(verb)
                corrected_verb = base_verb + expected_suffix
                return {
                    'is_correct': False,
                    'correction': corrected_verb,
                    'error': 'Subject-verb agreement mismatch'
                }

        return {'is_correct': True}

    def _get_verb_base(self, verb: str) -> str:
        """Extract the base of the verb by removing common suffixes."""
        for ending in ['නවා', 'යි', 'මි', 'මු', 'ති']:
            if verb.endswith(ending):
                return verb[:-len(ending)]
        return verb


In [7]:
if __name__ == "__main__":
    print("=== POS-Based Subject-Verb Agreement ===")

    checker = POSBasedAgreementChecker()
    sentence1 = "මම පොත කියවමි"
    result1 = checker.check_agreement(sentence1)
    print(f"Input Sentence: '{sentence1}'")
    if result1['is_correct']:
        print("The sentence is correct.\n")
    else:
        print(f"Error: {result1['error']}")
        if 'correction' in result1:
            print(f"Suggested Correction: '{result1['correction']}'\n")

    sentence2 = "මම යනවා"
    result2 = checker.check_agreement(sentence2)
    print(f"Input Sentence: '{sentence2}'")
    if result2['is_correct']:
        print("The sentence is correct.\n")
    else:
        print(f"Error: {result2['error']}")
        if 'correction' in result2:
            print(f"Suggested Correction: '{result2['correction']}'\n")


=== POS-Based Subject-Verb Agreement ===
Input Sentence: 'මම පොත කියවමි'
Error: Missing subject or verb
Input Sentence: 'මම යනවා'
Error: Subject-verb agreement mismatch
Suggested Correction: 'යමි'

