<a href="https://colab.research.google.com/github/Rishpraveen/Natural-Language-Processing-21MID0151/blob/main/TamilLexicon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Creating a Lexicon of words for Tamil.

In [None]:
import pandas as pd
import random

#The Tamil Script
Tamil letters come in **two** main groups:

####**роЙропро┐ро░рпЖро┤рпБродрпНродрпБроХрпНроХро│рпН (Independent Vowels)**:
Example: роЕ, роЖ, роЗ, роИ, роЙ, роК, роО, роП, роР, роТ, роУ, роФ

####**роорпЖропрпНропрпЖро┤рпБродрпНродрпБроХрпНроХро│рпН (Consonants)**:
Written with a "virama" (рпН) to indicate that they normally carry no vowel sound by themselves. For instance, роХрпН, роЩрпН,роЪрпН, роЮрпН, ...

When forming a syllable, a consonant (after removing the virama) combines with a vowel marker. For example,

- Base: роХрпН (which is actually "k" with a virama)
- Combined with the vowel "роЕ" (the inherent vowel) gives you роХ
- With other vowels, you attach a marker. For instance:
 - "роЖ" тЖТ marker ро╛ gives роХро╛
 - "роЗ" тЖТ marker ро┐ gives роХро┐

| Vowel | Marker | Resulting Form (with base "роХрпН") |
|---|---|---|
| роЕ | (none) | роХ |
| роЖ | ро╛ | роХро╛ |
| роЗ | ро┐ | роХро┐ |
| роИ | рпА | роХрпА |
| роЙ | рпБ | роХрпБ |
| роК | рпВ | роХрпВ |
| роО | рпЖ | роХрпЖ |
| роП | рпЗ | роХрпЗ |
| роР | рпИ | роХрпИ |
| роТ | рпК | роХрпК |
| роУ | рпЛ | роХрпЛ |
| роФ | рпМ | роХрпМ |

# Define Allowed Syllable Structures
For a more natural word, we want syllables that follow Tamil phonotactics. A simplified set of allowed syllable types is:

- V: A vowel-only syllable.
- CV: A consonant-vowel pair (most common).

(Note: Real Tamil can have more complex structures, but CV is a good starting point.)

In [None]:
# Define Tamil characters
tamil_vowels = ['роЕ', 'роЖ', 'роЗ', 'роИ', 'роЙ', 'роК', 'роО', 'роП', 'роР', 'роТ', 'роУ', 'роФ']
tamil_consonants = ['роХ', 'роЩ', 'роЪ', 'роЮ', 'роЯ', 'рог', 'род', 'рои', 'рок', 'роо', 'роп', 'ро░', 'ро▓', 'ро╡', 'ро┤', 'ро│', 'ро▒', 'рой']

# Vowel markers (to be added to consonants)
vowel_markers = {
    'роЕ': '',  # No marker for 'роЕ' (inherent vowel)
    'роЖ': 'ро╛',
    'роЗ': 'ро┐',
    'роИ': 'рпА',
    'роЙ': 'рпБ',
    'роК': 'рпВ',
    'роО': 'рпЖ',
    'роП': 'рпЗ',
    'роР': 'рпИ',
    'роТ': 'рпК',
    'роУ': 'рпЛ',
    'роФ': 'рпМ'
}

def generate_tamil_syllable():
    """
    Generate a single Tamil syllable.
    Returns either a vowel (V) or a consonant-vowel (CV) combination.
    """
    # Decide syllable type: V or CV (70% chance for CV)
    syllable_type = random.choices(['V', 'CV'], weights=[20, 80])[0]

    if syllable_type == 'V':
        # Just return an independent vowel (роЙропро┐ро░рпЖро┤рпБродрпНродрпБ)
        return random.choice(tamil_vowels)
    else:
        # Generate a CV combination
        consonant = random.choice(tamil_consonants)
        vowel = random.choice(tamil_vowels)

        # Add vowel marker to consonant
        marker = vowel_markers[vowel]
        if marker:
            return consonant + marker
        else:
            # For 'роЕ', just return the consonant as it already has the inherent vowel
            return consonant

def generate_tamil_word(min_syllables=2, max_syllables=4):
    """
    Generate a Tamil-like word with a random number of syllables.
    """
    num_syllables = random.randint(min_syllables, max_syllables)
    word = ''

    # First syllable can be V or CV
    word += generate_tamil_syllable()

    # For remaining syllables
    for _ in range(num_syllables - 1):
        # For middle syllables, we can add some rules to make words more natural
        # For example, avoiding too many vowels in sequence
        if word[-1] in tamil_vowels:
            # If last char is a vowel, force a CV syllable
            consonant = random.choice(tamil_consonants)
            vowel = random.choice(tamil_vowels)
            marker = vowel_markers[vowel]
            if marker:
                word += consonant + marker
            else:
                word += consonant
        else:
            word += generate_tamil_syllable()

    return word

def generate_tamil_text(num_words=5, min_syllables=1, max_syllables=4):
    """
    Generate multiple Tamil-like words.
    """
    words = []
    for _ in range(num_words):
        words.append(generate_tamil_word(min_syllables, max_syllables))

    return ' '.join(words)

# Create a visually appealing output in Colab
from IPython.display import HTML, display
import pandas as pd

def display_generated_text(num_examples=10):
    """
    Generate and display Tamil-like text examples in a nice format.
    """
    examples = []
    for i in range(num_examples):
        word_count = random.randint(1, 5)
        text = generate_tamil_text(word_count, 1, 4)
        examples.append({
            "Example": i+1,
            "Generated Text": text,
            "Word Count": word_count
        })

    df = pd.DataFrame(examples)
    display(df)

    # Also create a colorful box with a random example
    random_example = generate_tamil_text(random.randint(3, 7), 2, 4)
    html = f"""
    <div style="background: linear-gradient(45deg, #FF9671, #FFC75F);
                border-radius: 10px;
                padding: 20px;
                margin: 20px 0;
                color: white;
                font-size: 24px;
                text-align: center;
                box-shadow: 0 4px 8px rgba(0,0,0,0.1);">
        {random_example}
    </div>
    """
    display(HTML(html))

    # Add an explanation section
    explanation_html = f"""
    <div style="background: blue;
                border-left: 5px solid #4285F4;
                padding: 15px;
                margin: 20px 0;">
        <h3>Tamil Script Explanation</h3>
        <p>This generator creates Tamil-like text following these rules:</p>
        <ul>
            <li><strong>роЙропро┐ро░рпЖро┤рпБродрпНродрпБроХрпНроХро│рпН (Independent Vowels)</strong>: {', '.join(tamil_vowels)}</li>
            <li><strong>роорпЖропрпНропрпЖро┤рпБродрпНродрпБроХрпНроХро│рпН (Consonants)</strong>: {', '.join(tamil_consonants)}</li>
            <li>Vowel markers are only applied to consonants, not to independent vowels</li>
            <li>Words are formed from syllables that follow Tamil phonotactics (V or CV structure)</li>
        </ul>
    </div>
    """
    display(HTML(explanation_html))

    return examples

# Run the generation and display
print("ЁЯЗоЁЯЗ│ Tamil-Like Word Generator ЁЯЗоЁЯЗ│")
print("==================================")
print("Generating examples based on Tamil script rules...")
examples = display_generated_text(10)

# Interactive generation
from ipywidgets import interact, IntSlider

@interact(words=IntSlider(min=1, max=10, step=1, value=3, description="Words:"),
          min_syllables=IntSlider(min=1, max=5, step=1, value=1, description="Min Syllables:"),
          max_syllables=IntSlider(min=1, max=7, step=1, value=3, description="Max Syllables:"))
def generate_interactive(words, min_syllables, max_syllables):
    text = generate_tamil_text(words, min_syllables, max_syllables)
    html = f"""
    <div style="background: linear-gradient(45deg, #845EC2, #D65DB1);
                border-radius: 10px;
                padding: 15px;
                margin: 10px 0;
                color: white;
                font-size: 20px;
                text-align: center;">
        {text}
    </div>
    """
    display(HTML(html))

    # Show the breakdown of syllables
    parts = []
    for word in text.split():
        # This is a simplified syllable breakdown and won't be perfect
        syllables = []
        i = 0
        while i < len(word):
            if i+1 < len(word) and word[i+1] in ''.join(vowel_markers.values()):
                syllables.append(word[i:i+2])
                i += 2
            else:
                syllables.append(word[i])
                i += 1
        parts.append(syllables)

    breakdown_html = f"""
    <div style="background: blue;
                border: 1px solid #ddd;
                border-radius: 5px;
                padding: 10px;
                margin: 10px 0;">
        <h4>Word Breakdown:</h4>
        <ul style="list-style-type: none; padding-left: 0;">
            {"".join(f"<li>{' + '.join(word)} = {' '.join(word)}</li>" for word in parts)}
        </ul>
    </div>
    """
    display(HTML(breakdown_html))

ЁЯЗоЁЯЗ│ Tamil-Like Word Generator ЁЯЗоЁЯЗ│
Generating examples based on Tamil script rules...


Unnamed: 0,Example,Generated Text,Word Count
0,1,роЪрпЖроП ро▒рпЛро░рпЖ ро░ роирпМродрпЗ роЩро▒рпКроЯрпМрогрпБ,5
1,2,рогро┐ро▓ро╛ роЯрпВро▒рпЖроИроЯрпМ роЪрпАроК роЕроЪрпЛрокрпМ,4
2,3,роЯрпБроирпЗрокрпКроЪрпИ ро░рпЗроО рокро┐родрпАроХрпЖро▒рпА,3
3,4,ропрпЖродрпАроЪрпА роЙрогрпМроЯрпМроТ,2
4,5,роЮрпЖ,1
5,6,ро│ро┐ройрпАроЗро│рпЗ,1
6,7,ро┤ро│роЕро▒рпЖ ро▓рпБ роЪрпЖроЖропрпАроЮро┐ роорокрпКроЪрпМроЩрпК роЙ,5
7,8,роП роорпБ роирпМ,3
8,9,ро▒рпК,1
9,10,родрпИ ро┤рпМро│рпВ роЮрпЛроирпАро░рпЖроЯрпМ ро╡ро┐,4


interactive(children=(IntSlider(value=3, description='Words:', max=10, min=1), IntSlider(value=1, description=тАж

 # Syllable Generator
- above we wrote a function that generates a syllable according to the rules above. where we  decide randomly whether to create a vowel-only syllable or a consonant-vowel (CV) syllable. In a more advanced version, you might favor CV syllables or vary them based on word position.


#Word Generator Using the Syllable Generator
- Then we combined syllables to form a word. We decide on a number of syllables (say between 2 and 5) and then join them. For a more natural feel, we might allow the first syllable to sometimes be vowel-only and then force subsequent syllables to be CV.

In [None]:
# Tamil to Roman transliteration system

# Mapping dictionary for Tamil characters to Roman
romanized_map = {
    # Independent vowels
    'роЕ': 'a', 'роЖ': 'aa', 'роЗ': 'i', 'роИ': 'ii', 'роЙ': 'u', 'роК': 'uu',
    'роО': 'e', 'роП': 'ee', 'роР': 'ai', 'роТ': 'o', 'роУ': 'oo', 'роФ': 'au',

    # Consonants
    'роХ': 'k', 'роЩ': 'ng', 'роЪ': 'ch', 'роЮ': 'nj', 'роЯ': 't', 'рог': 'n',
    'род': 'th', 'рои': 'n', 'рок': 'p', 'роо': 'm', 'роп': 'y', 'ро░': 'r',
    'ро▓': 'l', 'ро╡': 'v', 'ро┤': 'zh', 'ро│': 'l', 'ро▒': 'r', 'рой': 'n','ha':'ро╣',

    # Vowel markers (combining signs)
    'ро╛': 'aa', 'ро┐': 'i', 'рпА': 'ii', 'рпБ': 'u', 'рпВ': 'uu',
    'рпЖ': 'e', 'рпЗ': 'ee', 'рпИ': 'ai', 'рпК': 'o', 'рпЛ': 'oo', 'рпМ': 'au',

    # Special characters
    'рпН': '', # pulli (removes inherent vowel)
    'роГ': 'h' ,# aaytham

}

# List of Tamil consonants
tamil_consonants = [
    'роХ', 'роЩ', 'роЪ', 'роЮ', 'роЯ', 'рог', 'род', 'рои', 'рок', 'роо',
    'роп', 'ро░', 'ро▓', 'ро╡', 'ро┤', 'ро│', 'ро▒', 'рой'
]

# List of vowel markers
vowel_markers = ['ро╛', 'ро┐', 'рпА', 'рпБ', 'рпВ', 'рпЖ', 'рпЗ', 'рпИ', 'рпК', 'рпЛ', 'рпМ', 'рпН']

def romanize_tamil_word(word):
    """
    Convert a Tamil word into its romanized form.

    Args:
        word (str): Tamil word to be romanized

    Returns:
        str: Romanized version of the Tamil word

    Notes:
        - Consonants without vowel markers take the inherent vowel 'a'
        - Consonants with pulli ('рпН') have no vowel sound
        - Consonants with vowel markers combine with those markers
        - Independent vowels are directly transliterated
    """
    romanized = ""
    i = 0

    while i < len(word):
        char = word[i]

        # Case 1: Character is a consonant
        if char in tamil_consonants:
            consonant_roman = romanized_map[char]

            # Look ahead for vowel marker or pulli
            if i + 1 < len(word) and word[i+1] in vowel_markers:
                if word[i+1] == 'рпН':  # pulli - no vowel
                    romanized += consonant_roman
                else:  # vowel marker
                    romanized += consonant_roman + romanized_map[word[i+1]]
                i += 2
            else:
                # No vowel marker: add inherent 'a'
                romanized += consonant_roman + 'a'
                i += 1

        # Case 2: Character is an independent vowel or other mapped character
        elif char in romanized_map and char not in vowel_markers:
            romanized += romanized_map[char]
            i += 1

        # Case 3: Vowel marker appearing independently (unusual, but handle it)
        elif char in vowel_markers:
            # Skip if it's a stray vowel marker (unusual case)
            i += 1

        # Case 4: Character not in our mapping
        else:
            romanized += char  # Keep as is (numbers, punctuation, etc.)
            i += 1

    return romanized

# Function to handle multiple words or sentences
def romanize_tamil_text(text):
    """
    Romanize a Tamil text (can contain multiple words).

    Args:
        text (str): Tamil text to romanize

    Returns:
        str: Romanized version of the Tamil text
    """
    words = text.split()
    romanized_words = [romanize_tamil_word(word) for word in words]
    return " ".join(romanized_words)

# Example usage
if __name__ == "__main__":
    # Test with various Tamil words
    test_words = [
        "роХро╛родро▓рпН",       # kaathal (love)
        "родрооро┐ро┤рпН",       # thamizh (Tamil)
        "ро╡рогроХрпНроХроорпН",     # vanakkam (hello)
        "роиройрпНро▒ро┐",       # nanri (thank you)
        "роЗроирпНродро┐ропро╛"      # inthiyaa (India)
    ]

    print("Tamil to Roman Transliteration Examples:")
    print("---------------------------------------")

    for word in test_words:
        romanized = romanize_tamil_word(word)
        print(f"Tamil: {word} тЖТ Romanized: {romanized}")

    # Test with a sentence
    tamil_sentence = " родрооро┐ро┤рпН роорпКро┤ро┐ рооро┐роХро╡рпБроорпН роЕро┤роХро╛ройродрпБ"  # Tamil mozhi mihavum azhahanathu
    romanized_sentence = romanize_tamil_text(tamil_sentence)
    print("\nTamil sentence:")
    print(f"Original: {tamil_sentence}")
    print(f"Romanized: {romanized_sentence}")

Tamil to Roman Transliteration Examples:
---------------------------------------
Tamil: роХро╛родро▓рпН тЖТ Romanized: kaathal
Tamil: родрооро┐ро┤рпН тЖТ Romanized: thamizh
Tamil: ро╡рогроХрпНроХроорпН тЖТ Romanized: vanakkam
Tamil: роиройрпНро▒ро┐ тЖТ Romanized: nanri
Tamil: роЗроирпНродро┐ропро╛ тЖТ Romanized: inthiyaa

Tamil sentence:
Original:  родрооро┐ро┤рпН роорпКро┤ро┐ рооро┐роХро╡рпБроорпН роЕро┤роХро╛ройродрпБ
Romanized: thamizh mozhi mikavum azhakaanathu


In [None]:
# Roman to Tamil transliteration system

# First, create the reverse mapping for simple conversions
tamil_map = {
    # Vowels (independent forms)
    'a': 'роЕ', 'aa': 'роЖ', 'i': 'роЗ', 'ii': 'роИ', 'u': 'роЙ', 'uu': 'роК',
    'e': 'роО', 'ee': 'роП', 'ai': 'роР', 'o': 'роТ', 'oo': 'роУ', 'au': 'роФ',

    # Consonants without vowels (with pulli)
    'k': 'роХрпН', 'ng': 'роЩрпН', 'ch': 'роЪрпН', 'nj': 'роЮрпН', 't': 'роЯрпН', 'n': 'роирпН',
    'th': 'родрпН', 'p': 'рокрпН', 'm': 'роорпН', 'y': 'ропрпН', 'r': 'ро░рпН', 'l': 'ро▓рпН',
    'v': 'ро╡рпН', 'zh': 'ро┤рпН', 'r': 'ро▒рпН', 'n': 'ройрпН',

    # Special character
    'h': 'роГ' , # aaytham
    # 'ha':'ро╣'
}

# Dictionary for consonants with vowels
consonant_vowel_combinations = {
    # k + vowels
    'ka': 'роХ', 'kaa': 'роХро╛', 'ki': 'роХро┐', 'kii': 'роХрпА', 'ku': 'роХрпБ', 'kuu': 'роХрпВ',
    'ke': 'роХрпЖ', 'kee': 'роХрпЗ', 'kai': 'роХрпИ', 'ko': 'роХрпК', 'koo': 'роХрпЛ', 'kau': 'роХрпМ',

    # ng + vowels
    'nga': 'роЩ', 'ngaa': 'роЩро╛', 'ngi': 'роЩро┐', 'ngii': 'роЩрпА', 'ngu': 'роЩрпБ', 'nguu': 'роЩрпВ',
    'nge': 'роЩрпЖ', 'ngee': 'роЩрпЗ', 'ngai': 'роЩрпИ', 'ngo': 'роЩрпК', 'ngoo': 'роЩрпЛ', 'ngau': 'роЩрпМ',

    # ch + vowels
    'cha': 'роЪ', 'chaa': 'роЪро╛', 'chi': 'роЪро┐', 'chii': 'роЪрпА', 'chu': 'роЪрпБ', 'chuu': 'роЪрпВ',
    'che': 'роЪрпЖ', 'chee': 'роЪрпЗ', 'chai': 'роЪрпИ', 'cho': 'роЪрпК', 'choo': 'роЪрпЛ', 'chau': 'роЪрпМ',

    # nj + vowels
    'nja': 'роЮ', 'njaa': 'роЮро╛', 'nji': 'роЮро┐', 'njii': 'роЮрпА', 'nju': 'роЮрпБ', 'njuu': 'роЮрпВ',
    'nje': 'роЮрпЖ', 'njee': 'роЮрпЗ', 'njai': 'роЮрпИ', 'njo': 'роЮрпК', 'njoo': 'роЮрпЛ', 'njau': 'роЮрпМ',

    # t + vowels
    'ta': 'роЯ', 'taa': 'роЯро╛', 'ti': 'роЯро┐', 'tii': 'роЯрпА', 'tu': 'роЯрпБ', 'tuu': 'роЯрпВ',
    'te': 'роЯрпЖ', 'tee': 'роЯрпЗ', 'tai': 'роЯрпИ', 'to': 'роЯрпК', 'too': 'роЯрпЛ', 'tau': 'роЯрпМ',

    # th + vowels
    'tha': 'род', 'thaa': 'родро╛', 'thi': 'родро┐', 'thii': 'родрпА', 'thu': 'родрпБ', 'thuu': 'родрпВ',
    'the': 'родрпЖ', 'thee': 'родрпЗ', 'thai': 'родрпИ', 'tho': 'родрпК', 'thoo': 'родрпЛ', 'thau': 'родрпМ',

    # n + vowels (multiple 'n' consonants in Tamil, using the dental 'рои' version)
    'na': 'рои', 'naa': 'роиро╛', 'ni': 'роиро┐', 'nii': 'роирпА', 'nu': 'роирпБ', 'nuu': 'роирпВ',
    'ne': 'роирпЖ', 'nee': 'роирпЗ', 'nai': 'роирпИ', 'no': 'роирпК', 'noo': 'роирпЛ', 'nau': 'роирпМ',

    # p + vowels
    'pa': 'рок', 'paa': 'рокро╛', 'pi': 'рокро┐', 'pii': 'рокрпА', 'pu': 'рокрпБ', 'puu': 'рокрпВ',
    'pe': 'рокрпЖ', 'pee': 'рокрпЗ', 'pai': 'рокрпИ', 'po': 'рокрпК', 'poo': 'рокрпЛ', 'pau': 'рокрпМ',

    # m + vowels
    'ma': 'роо', 'maa': 'рооро╛', 'mi': 'рооро┐', 'mii': 'роорпА', 'mu': 'роорпБ', 'muu': 'роорпВ',
    'me': 'роорпЖ', 'mee': 'роорпЗ', 'mai': 'роорпИ', 'mo': 'роорпК', 'moo': 'роорпЛ', 'mau': 'роорпМ',

    # y + vowels
    'ya': 'роп', 'yaa': 'ропро╛', 'yi': 'ропро┐', 'yii': 'ропрпА', 'yu': 'ропрпБ', 'yuu': 'ропрпВ',
    'ye': 'ропрпЖ', 'yee': 'ропрпЗ', 'yai': 'ропрпИ', 'yo': 'ропрпК', 'yoo': 'ропрпЛ', 'yau': 'ропрпМ',

    # r + vowels (using 'ро░')
    'ra': 'ро░', 'raa': 'ро░ро╛', 'ri': 'ро░ро┐', 'rii': 'ро░рпА', 'ru': 'ро░рпБ', 'ruu': 'ро░рпВ',
    're': 'ро░рпЖ', 'ree': 'ро░рпЗ', 'rai': 'ро░рпИ', 'ro': 'ро░рпК', 'roo': 'ро░рпЛ', 'rau': 'ро░рпМ',

    # l + vowels (using 'ро▓')
    'la': 'ро▓', 'laa': 'ро▓ро╛', 'li': 'ро▓ро┐', 'lii': 'ро▓рпА', 'lu': 'ро▓рпБ', 'luu': 'ро▓рпВ',
    'le': 'ро▓рпЖ', 'lee': 'ро▓рпЗ', 'lai': 'ро▓рпИ', 'lo': 'ро▓рпК', 'loo': 'ро▓рпЛ', 'lau': 'ро▓рпМ',

    # v + vowels
    'va': 'ро╡', 'vaa': 'ро╡ро╛', 'vi': 'ро╡ро┐', 'vii': 'ро╡рпА', 'vu': 'ро╡рпБ', 'vuu': 'ро╡рпВ',
    've': 'ро╡рпЖ', 'vee': 'ро╡рпЗ', 'vai': 'ро╡рпИ', 'vo': 'ро╡рпК', 'voo': 'ро╡рпЛ', 'vau': 'ро╡рпМ',

    # zh + vowels
    'zha': 'ро┤', 'zhaa': 'ро┤ро╛', 'zhi': 'ро┤ро┐', 'zhii': 'ро┤рпА', 'zhu': 'ро┤рпБ', 'zhuu': 'ро┤рпВ',
    'zhe': 'ро┤рпЖ', 'zhee': 'ро┤рпЗ', 'zhai': 'ро┤рпИ', 'zho': 'ро┤рпК', 'zhoo': 'ро┤рпЛ', 'zhau': 'ро┤рпМ',

    # Additional variants for different Tamil 'n' sounds
    # Alveolar 'n' (рой) + vowels
    'na': 'рой', 'naa': 'ройро╛', 'ni': 'ройро┐', 'nii': 'ройрпА', 'nu': 'ройрпБ', 'nuu': 'ройрпВ',
    'ne': 'ройрпЖ', 'nee': 'ройрпЗ', 'nai': 'ройрпИ', 'no': 'ройрпК', 'noo': 'ройрпЛ', 'nau': 'ройрпМ',

    # Retroflex 'n' (рог) + vowels
    'na': 'рог', 'naa': 'рогро╛', 'ni': 'рогро┐', 'nii': 'рогрпА', 'nu': 'рогрпБ', 'nuu': 'рогрпВ',
    'ne': 'рогрпЖ', 'nee': 'рогрпЗ', 'nai': 'рогрпИ', 'no': 'рогрпК', 'noo': 'рогрпЛ', 'nau': 'рогрпМ',

    # Retroflex 'r' (ро▒) + vowels
    'ra': 'ро▒', 'raa': 'ро▒ро╛', 'ri': 'ро▒ро┐', 'rii': 'ро▒рпА', 'ru': 'ро▒рпБ', 'ruu': 'ро▒рпВ',
    're': 'ро▒рпЖ', 'ree': 'ро▒рпЗ', 'rai': 'ро▒рпИ', 'ro': 'ро▒рпК', 'roo': 'ро▒рпЛ', 'rau': 'ро▒рпМ',

    # Retroflex 'l' (ро│) + vowels
    'la': 'ро│', 'laa': 'ро│ро╛', 'li': 'ро│ро┐', 'lii': 'ро│рпА', 'lu': 'ро│рпБ', 'luu': 'ро│рпВ',
    'le': 'ро│рпЖ', 'lee': 'ро│рпЗ', 'lai': 'ро│рпИ', 'lo': 'ро│рпК', 'loo': 'ро│рпЛ', 'lau': 'ро│рпМ'
}

# List of consonant prefixes
consonant_prefixes = [
    'k', 'ng', 'ch', 'nj', 't', 'th', 'n', 'p',
    'm', 'y', 'r', 'l', 'v', 'zh','ha'
]
# List of vowel suffixes
vowel_suffixes = [
    'a', 'aa', 'i', 'ii', 'u', 'uu',
    'e', 'ee', 'ai', 'o', 'oo', 'au'
]

def convert_roman_to_tamil(romanized_text):
    """
    Convert romanized text to Tamil.

    Args:
        romanized_text (str): The romanized text to convert

    Returns:
        str: Tamil text
    """
    words = romanized_text.split()
    tamil_words = []

    for word in words:
        tamil_word = ""
        i = 0

        while i < len(word):
            # Try to match longest possible segments
            matched = False

            # First check for consonant-vowel combinations (longest matches)
            for prefix in sorted(consonant_prefixes, key=len, reverse=True):
                if word[i:].startswith(prefix):
                    for suffix in sorted(vowel_suffixes, key=len, reverse=True):
                        combo = prefix + suffix
                        if word[i:].startswith(combo) and combo in consonant_vowel_combinations:
                            tamil_word += consonant_vowel_combinations[combo]
                            i += len(combo)
                            matched = True
                            break
                    if matched:
                        break

            # If no consonant-vowel combo matched, check for standalone vowels or consonants
            if not matched:
                # Try matching vowels (independent forms)
                for vowel in sorted(vowel_suffixes, key=len, reverse=True):
                    if word[i:].startswith(vowel) and vowel in tamil_map:
                        tamil_word += tamil_map[vowel]
                        i += len(vowel)
                        matched = True
                        break

                # Try matching consonants (with pulli)
                if not matched:
                    for consonant in sorted(consonant_prefixes, key=len, reverse=True):
                        if word[i:].startswith(consonant) and consonant in tamil_map:
                            tamil_word += tamil_map[consonant]
                            i += len(consonant)
                            matched = True
                            break

            # If still no match, keep the character as is
            if not matched:
                tamil_word += word[i]
                i += 1

        tamil_words.append(tamil_word)

    return " ".join(tamil_words)

# Example usage
if __name__ == "__main__":
    # Test with romanized Tamil words
    test_words = [
        "kaathal",      # роХро╛родро▓рпН (love)
        "thamizh",      # родрооро┐ро┤рпН (Tamil)
        "vanakkam",     # ро╡рогроХрпНроХроорпН (hello)
        "nanri",        # роиройрпНро▒ро┐ (thank you)
        "inthiyaa"      # роЗроирпНродро┐ропро╛ (India)
    ]

    print("Roman to Tamil Transliteration Examples:")
    print("---------------------------------------")

    for word in test_words:
        tamil = convert_roman_to_tamil(word)
        print(f"Romanized: {word} тЖТ Tamil: {tamil}")

    # Test with a romanized sentence
    romanized_sentence = " thamizh mozhi mihavum azhahanathu"  # родрооро┐ро┤рпН роорпКро┤ро┐ рооро┐роХро╡рпБроорпН роЕро┤роХро╛ройродрпБ
    tamil_sentence = convert_roman_to_tamil(romanized_sentence)
    print("\nRomanized sentence:")
    print(f"Original: {romanized_sentence}")
    print(f"Tamil: {tamil_sentence}")

Roman to Tamil Transliteration Examples:
---------------------------------------
Romanized: kaathal тЖТ Tamil: роХро╛родро▓рпН
Romanized: thamizh тЖТ Tamil: родрооро┐ро┤рпН
Romanized: vanakkam тЖТ Tamil: ро╡рогроХрпНроХроорпН
Romanized: nanri тЖТ Tamil: рогройрпНро▒ро┐
Romanized: inthiyaa тЖТ Tamil: роЗройрпНродро┐ропро╛

Romanized sentence:
Original:  thamizh mozhi mihavum azhahanathu
Tamil: родрооро┐ро┤рпН роорпКро┤ро┐ рооро┐hроЕро╡рпБроорпН роЕро┤hроЕрогродрпБ


**Syllable Structure:** Tamil characters are syllabic. A typical syllable is formed by combining a consonant with a vowel. When a consonant stands alone, it usually carries an inherent vowel sound or is marked with a special symbol (the "pulli") to indicate the absence of a vowel.

**Complete Words:** Genuine Tamil words usually consist of both vowels (роЙропро┐ро░рпЖро┤рпБродрпНродрпБроХрпНроХро│рпН) and consonants (роорпЖропрпНропрпЖро┤рпБродрпНродрпБроХрпНроХро│рпН). Using only consonants, as in the dummy function, produces strings that do not represent standard syllable structures, making them less natural or pronounceable.

In [None]:
# Enhanced Tamil transliteration system with grammar rules
import re

# ===== TRANSLITERATION MAPPINGS =====

# Mapping dictionary for Tamil characters to Roman
romanized_map = {
    # Independent vowels
    'роЕ': 'a', 'роЖ': 'aa', 'роЗ': 'i', 'роИ': 'ii', 'роЙ': 'u', 'роК': 'uu',
    'роО': 'e', 'роП': 'ee', 'роР': 'ai', 'роТ': 'o', 'роУ': 'oo', 'роФ': 'au',

    # Consonants
    'роХ': 'k', 'роЩ': 'ng', 'роЪ': 'ch', 'роЮ': 'nj', 'роЯ': 't', 'рог': 'n',
    'род': 'th', 'рои': 'n', 'рок': 'p', 'роо': 'm', 'роп': 'y', 'ро░': 'r',
    'ро▓': 'l', 'ро╡': 'v', 'ро┤': 'zh', 'ро│': 'l', 'ро▒': 'r', 'рой': 'n', 'ро╣': 'ha',

    # Vowel markers (combining signs)
    'ро╛': 'aa', 'ро┐': 'i', 'рпА': 'ii', 'рпБ': 'u', 'рпВ': 'uu',
    'рпЖ': 'e', 'рпЗ': 'ee', 'рпИ': 'ai', 'рпК': 'o', 'рпЛ': 'oo', 'рпМ': 'au',

    # Special characters
    'рпН': '',  # pulli (removes inherent vowel)
    'роГ': 'h'  # aaytham
}

# Reverse mapping for Roman to Tamil
tamil_map = {
    # Vowels (independent forms)
    'a': 'роЕ', 'aa': 'роЖ', 'i': 'роЗ', 'ii': 'роИ', 'u': 'роЙ', 'uu': 'роК',
    'e': 'роО', 'ee': 'роП', 'ai': 'роР', 'o': 'роТ', 'oo': 'роУ', 'au': 'роФ',

    # Consonants without vowels (with pulli)
    'k': 'роХрпН', 'ng': 'роЩрпН', 'ch': 'роЪрпН', 'nj': 'роЮрпН', 't': 'роЯрпН', 'n': 'роирпН',
    'th': 'родрпН', 'p': 'рокрпН', 'm': 'роорпН', 'y': 'ропрпН', 'r': 'ро░рпН', 'l': 'ро▓рпН',
    'v': 'ро╡рпН', 'zh': 'ро┤рпН', 'r': 'ро▒рпН', 'n': 'ройрпН',

    # Special character
    'h': 'роГ',  # aaytham
    'ha': 'ро╣'
}

# Dictionary for consonants with vowels (similar to original)
consonant_vowel_combinations = {
   # k + vowels
    'ka': 'роХ', 'kaa': 'роХро╛', 'ki': 'роХро┐', 'kii': 'роХрпА', 'ku': 'роХрпБ', 'kuu': 'роХрпВ',
    'ke': 'роХрпЖ', 'kee': 'роХрпЗ', 'kai': 'роХрпИ', 'ko': 'роХрпК', 'koo': 'роХрпЛ', 'kau': 'роХрпМ',

    # ng + vowels
    'nga': 'роЩ', 'ngaa': 'роЩро╛', 'ngi': 'роЩро┐', 'ngii': 'роЩрпА', 'ngu': 'роЩрпБ', 'nguu': 'роЩрпВ',
    'nge': 'роЩрпЖ', 'ngee': 'роЩрпЗ', 'ngai': 'роЩрпИ', 'ngo': 'роЩрпК', 'ngoo': 'роЩрпЛ', 'ngau': 'роЩрпМ',

    # ch + vowels
    'cha': 'роЪ', 'chaa': 'роЪро╛', 'chi': 'роЪро┐', 'chii': 'роЪрпА', 'chu': 'роЪрпБ', 'chuu': 'роЪрпВ',
    'che': 'роЪрпЖ', 'chee': 'роЪрпЗ', 'chai': 'роЪрпИ', 'cho': 'роЪрпК', 'choo': 'роЪрпЛ', 'chau': 'роЪрпМ',

    # nj + vowels
    'nja': 'роЮ', 'njaa': 'роЮро╛', 'nji': 'роЮро┐', 'njii': 'роЮрпА', 'nju': 'роЮрпБ', 'njuu': 'роЮрпВ',
    'nje': 'роЮрпЖ', 'njee': 'роЮрпЗ', 'njai': 'роЮрпИ', 'njo': 'роЮрпК', 'njoo': 'роЮрпЛ', 'njau': 'роЮрпМ',

    # t + vowels
    'ta': 'роЯ', 'taa': 'роЯро╛', 'ti': 'роЯро┐', 'tii': 'роЯрпА', 'tu': 'роЯрпБ', 'tuu': 'роЯрпВ',
    'te': 'роЯрпЖ', 'tee': 'роЯрпЗ', 'tai': 'роЯрпИ', 'to': 'роЯрпК', 'too': 'роЯрпЛ', 'tau': 'роЯрпМ',

    # th + vowels
    'tha': 'род', 'thaa': 'родро╛', 'thi': 'родро┐', 'thii': 'родрпА', 'thu': 'родрпБ', 'thuu': 'родрпВ',
    'the': 'родрпЖ', 'thee': 'родрпЗ', 'thai': 'родрпИ', 'tho': 'родрпК', 'thoo': 'родрпЛ', 'thau': 'родрпМ',

    # n + vowels (multiple 'n' consonants in Tamil, using the dental 'рои' version)
    'na': 'рои', 'naa': 'роиро╛', 'ni': 'роиро┐', 'nii': 'роирпА', 'nu': 'роирпБ', 'nuu': 'роирпВ',
    'ne': 'роирпЖ', 'nee': 'роирпЗ', 'nai': 'роирпИ', 'no': 'роирпК', 'noo': 'роирпЛ', 'nau': 'роирпМ',

    # p + vowels
    'pa': 'рок', 'paa': 'рокро╛', 'pi': 'рокро┐', 'pii': 'рокрпА', 'pu': 'рокрпБ', 'puu': 'рокрпВ',
    'pe': 'рокрпЖ', 'pee': 'рокрпЗ', 'pai': 'рокрпИ', 'po': 'рокрпК', 'poo': 'рокрпЛ', 'pau': 'рокрпМ',

    # m + vowels
    'ma': 'роо', 'maa': 'рооро╛', 'mi': 'рооро┐', 'mii': 'роорпА', 'mu': 'роорпБ', 'muu': 'роорпВ',
    'me': 'роорпЖ', 'mee': 'роорпЗ', 'mai': 'роорпИ', 'mo': 'роорпК', 'moo': 'роорпЛ', 'mau': 'роорпМ',

    # y + vowels
    'ya': 'роп', 'yaa': 'ропро╛', 'yi': 'ропро┐', 'yii': 'ропрпА', 'yu': 'ропрпБ', 'yuu': 'ропрпВ',
    'ye': 'ропрпЖ', 'yee': 'ропрпЗ', 'yai': 'ропрпИ', 'yo': 'ропрпК', 'yoo': 'ропрпЛ', 'yau': 'ропрпМ',

    # r + vowels (using 'ро░')
    'ra': 'ро░', 'raa': 'ро░ро╛', 'ri': 'ро░ро┐', 'rii': 'ро░рпА', 'ru': 'ро░рпБ', 'ruu': 'ро░рпВ',
    're': 'ро░рпЖ', 'ree': 'ро░рпЗ', 'rai': 'ро░рпИ', 'ro': 'ро░рпК', 'roo': 'ро░рпЛ', 'rau': 'ро░рпМ',

    # l + vowels (using 'ро▓')
    'la': 'ро▓', 'laa': 'ро▓ро╛', 'li': 'ро▓ро┐', 'lii': 'ро▓рпА', 'lu': 'ро▓рпБ', 'luu': 'ро▓рпВ',
    'le': 'ро▓рпЖ', 'lee': 'ро▓рпЗ', 'lai': 'ро▓рпИ', 'lo': 'ро▓рпК', 'loo': 'ро▓рпЛ', 'lau': 'ро▓рпМ',

    # v + vowels
    'va': 'ро╡', 'vaa': 'ро╡ро╛', 'vi': 'ро╡ро┐', 'vii': 'ро╡рпА', 'vu': 'ро╡рпБ', 'vuu': 'ро╡рпВ',
    've': 'ро╡рпЖ', 'vee': 'ро╡рпЗ', 'vai': 'ро╡рпИ', 'vo': 'ро╡рпК', 'voo': 'ро╡рпЛ', 'vau': 'ро╡рпМ',

    # zh + vowels
    'zha': 'ро┤', 'zhaa': 'ро┤ро╛', 'zhi': 'ро┤ро┐', 'zhii': 'ро┤рпА', 'zhu': 'ро┤рпБ', 'zhuu': 'ро┤рпВ',
    'zhe': 'ро┤рпЖ', 'zhee': 'ро┤рпЗ', 'zhai': 'ро┤рпИ', 'zho': 'ро┤рпК', 'zhoo': 'ро┤рпЛ', 'zhau': 'ро┤рпМ',

    # Additional variants for different Tamil 'n' sounds
    # Alveolar 'n' (рой) + vowels
    'na': 'рой', 'naa': 'ройро╛', 'ni': 'ройро┐', 'nii': 'ройрпА', 'nu': 'ройрпБ', 'nuu': 'ройрпВ',
    'ne': 'ройрпЖ', 'nee': 'ройрпЗ', 'nai': 'ройрпИ', 'no': 'ройрпК', 'noo': 'ройрпЛ', 'nau': 'ройрпМ',

    # Retroflex 'n' (рог) + vowels
    'na': 'рог', 'naa': 'рогро╛', 'ni': 'рогро┐', 'nii': 'рогрпА', 'nu': 'рогрпБ', 'nuu': 'рогрпВ',
    'ne': 'рогрпЖ', 'nee': 'рогрпЗ', 'nai': 'рогрпИ', 'no': 'рогрпК', 'noo': 'рогрпЛ', 'nau': 'рогрпМ',

    # Retroflex 'r' (ро▒) + vowels
    'ra': 'ро▒', 'raa': 'ро▒ро╛', 'ri': 'ро▒ро┐', 'rii': 'ро▒рпА', 'ru': 'ро▒рпБ', 'ruu': 'ро▒рпВ',
    're': 'ро▒рпЖ', 'ree': 'ро▒рпЗ', 'rai': 'ро▒рпИ', 'ro': 'ро▒рпК', 'roo': 'ро▒рпЛ', 'rau': 'ро▒рпМ',

    # Retroflex 'l' (ро│) + vowels
    'la': 'ро│', 'laa': 'ро│ро╛', 'li': 'ро│ро┐', 'lii': 'ро│рпА', 'lu': 'ро│рпБ', 'luu': 'ро│рпВ',
    'le': 'ро│рпЖ', 'lee': 'ро│рпЗ', 'lai': 'ро│рпИ', 'lo': 'ро│рпК', 'loo': 'ро│рпЛ', 'lau': 'ро│рпМ'

}

# ===== GRAMMATICAL CATEGORIES =====

# Classification of Rationality
rational_nouns = [
    'роЖрогрпН', 'рокрпЖрогрпН', 'рооройро┐родройрпН', 'роЕро╡ройрпН', 'роЕро╡ро│рпН', 'роиро╛ройрпН', 'роирпА', 'роиро╛роорпН', 'роирпАроЩрпНроХро│рпН',
    'роЕро╡ро░рпН', 'роЗро╡ро░рпН', 'роЙро╡ро░рпН', 'родро╛роорпН', 'родро╛роЩрпНроХро│рпН', 'ропро╛ро░рпН', 'роТро░рпБро╡ро░рпН'
]

irrational_nouns = [
    'роХрпБро┤роирпНродрпИ', 'роиро╛ропрпН', 'рокрпВройрпИ', 'рооро░роорпН', 'ро╡рпАроЯрпБ', 'роХро▓рпН', 'рокрпКро░рпБро│рпН', 'роЪрпЖроЯро┐', 'ропро╛ройрпИ'
    # Add more irrational nouns
]

# Vallinam consonants for special rules
vallinam_consonants = ['роХрпН', 'роЪрпН', 'роЯрпН', 'родрпН', 'рокрпН', 'ро▒рпН']
vallinam_with_u = ['роХрпБ', 'роЪрпБ', 'роЯрпБ', 'родрпБ', 'рокрпБ', 'ро▒рпБ']  # For Kutriyalukaram

# Case suffixes for noun declension
case_suffixes = {
    'nominative': '',
    'accusative': 'роР',
    'instrumental': 'роЖро▓рпН',
    'sociative': 'роУроЯрпБ',
    'dative': 'роХрпНроХрпБ',
    'benefactive': 'роХрпНроХро╛роХ',
    'ablative': 'роЗро▓ро┐ро░рпБроирпНродрпБ',
    'genitive': 'роЙроЯрпИроп',
    'locative': 'роЗро▓рпН',
    'vocative': 'роП'
}

# Roman equivalents of case suffixes
roman_case_suffixes = {
    'nominative': '',
    'accusative': 'ai',
    'instrumental': 'aal',
    'sociative': 'otu',
    'dative': 'kku',
    'benefactive': 'kkaaka',
    'ablative': 'ilirunthu',
    'genitive': 'utaiya',
    'locative': 'il',
    'vocative': 'ee'
}

# ===== TRANSLITERATION FUNCTIONS =====

def romanize_tamil_word(word):
    """
    Convert a Tamil word into its romanized form.

    Args:
        word (str): Tamil word to be romanized

    Returns:
        str: Romanized version of the Tamil word
    """
    romanized = ""
    i = 0

    while i < len(word):
        char = word[i]

        # List of Tamil consonants
        tamil_consonants = [
            'роХ', 'роЩ', 'роЪ', 'роЮ', 'роЯ', 'рог', 'род', 'рои', 'рок', 'роо',
            'роп', 'ро░', 'ро▓', 'ро╡', 'ро┤', 'ро│', 'ро▒', 'рой', 'ро╣'
        ]

        # List of vowel markers
        vowel_markers = ['ро╛', 'ро┐', 'рпА', 'рпБ', 'рпВ', 'рпЖ', 'рпЗ', 'рпИ', 'рпК', 'рпЛ', 'рпМ', 'рпН']

        # Case 1: Character is a consonant
        if char in tamil_consonants:
            consonant_roman = romanized_map[char]

            # Look ahead for vowel marker or pulli
            if i + 1 < len(word) and word[i+1] in vowel_markers:
                if word[i+1] == 'рпН':  # pulli - no vowel
                    romanized += consonant_roman
                else:  # vowel marker
                    romanized += consonant_roman + romanized_map[word[i+1]]
                i += 2
            else:
                # No vowel marker: add inherent 'a'
                romanized += consonant_roman + 'a'
                i += 1

        # Case 2: Character is an independent vowel or other mapped character
        elif char in romanized_map and char not in vowel_markers:
            romanized += romanized_map[char]
            i += 1

        # Case 3: Vowel marker appearing independently (unusual, but handle it)
        elif char in vowel_markers:
            # Skip if it's a stray vowel marker (unusual case)
            i += 1

        # Case 4: Character not in our mapping
        else:
            romanized += char  # Keep as is (numbers, punctuation, etc.)
            i += 1

    # Check for Kutriyalukaram (shortened 'u')
    if any(word.endswith(u_cons) for u_cons in vallinam_with_u):
        # Check if preceded by multiple letters or a nedil (long vowel)
        if len(word) > 1:
            # If it ends with one of the 'u' forms, modify the romanization
            if romanized.endswith('u'):
                romanized = romanized[:-1] + 'u╠Ж'  # Mark shortened 'u'

    # Check for Aikarakurukkam (shortened 'ai')
    if word.startswith('роР') and len(word) > 1:
        # Replace the first 'ai' with a shortened version
        romanized = romanized.replace('ai', 'ai╠п', 1)

    # Check for Aukarakurukkam (shortened 'au')
    if word.startswith('роФ') and len(word) > 1:
        # Replace the first 'au' with a shortened version
        romanized = romanized.replace('au', 'au╠п', 1)

    return romanized

def romanize_tamil_text(text):
    """
    Romanize a Tamil text (can contain multiple words).

    Args:
        text (str): Tamil text to romanize

    Returns:
        str: Romanized version of the Tamil text
    """
    words = text.split()
    romanized_words = [romanize_tamil_word(word) for word in words]
    return " ".join(romanized_words)

def convert_roman_to_tamil(romanized_text):
    """
    Convert romanized text to Tamil.

    Args:
        romanized_text (str): The romanized text to convert

    Returns:
        str: Tamil text
    """
    # List of consonant prefixes and vowel suffixes from original code
    consonant_prefixes = [
        'k', 'ng', 'ch', 'nj', 't', 'th', 'n', 'p',
        'm', 'y', 'r', 'l', 'v', 'zh', 'ha'
    ]

    vowel_suffixes = [
        'a', 'aa', 'i', 'ii', 'u', 'uu',
        'e', 'ee', 'ai', 'o', 'oo', 'au'
    ]

    words = romanized_text.split()
    tamil_words = []

    for word in words:
        # Handle special shortened forms
        word = word.replace('u╠Ж', 'u')  # Handle Kutriyalukaram
        word = word.replace('ai╠п', 'ai')  # Handle Aikarakurukkam
        word = word.replace('au╠п', 'au')  # Handle Aukarakurukkam

        tamil_word = ""
        i = 0

        while i < len(word):
            # Try to match longest possible segments
            matched = False

            # First check for consonant-vowel combinations (longest matches)
            for prefix in sorted(consonant_prefixes, key=len, reverse=True):
                if word[i:].startswith(prefix):
                    for suffix in sorted(vowel_suffixes, key=len, reverse=True):
                        combo = prefix + suffix
                        if word[i:].startswith(combo) and combo in consonant_vowel_combinations:
                            tamil_word += consonant_vowel_combinations[combo]
                            i += len(combo)
                            matched = True
                            break
                    if matched:
                        break

            # Check for case suffixes
            if not matched:
                for case, suffix in roman_case_suffixes.items():
                    if suffix and word[i:].startswith(suffix):
                        # Don't add empty suffixes (nominative)
                        if suffix:
                            # Convert Roman case suffix to Tamil
                            tamil_case_suffix = convert_case_suffix_to_tamil(case)
                            tamil_word += tamil_case_suffix
                            i += len(suffix)
                            matched = True
                            break

            # If no consonant-vowel combo matched, check for standalone vowels or consonants
            if not matched:
                # Try matching vowels (independent forms)
                for vowel in sorted(vowel_suffixes, key=len, reverse=True):
                    if word[i:].startswith(vowel) and vowel in tamil_map:
                        tamil_word += tamil_map[vowel]
                        i += len(vowel)
                        matched = True
                        break

                # Try matching consonants (with pulli)
                if not matched:
                    for consonant in sorted(consonant_prefixes, key=len, reverse=True):
                        if word[i:].startswith(consonant) and consonant in tamil_map:
                            tamil_word += tamil_map[consonant]
                            i += len(consonant)
                            matched = True
                            break

            # If still no match, keep the character as is
            if not matched:
                tamil_word += word[i]
                i += 1

        tamil_words.append(tamil_word)

    return " ".join(tamil_words)

# ===== GRAMMATICAL FUNCTIONALITY =====

def is_rational(word):
    """
    Determine if a noun is rational (роЙропро░рпНродро┐рогрпИ) or irrational (роЕроГро▒ро┐рогрпИ).

    Args:
        word (str): Tamil word to check

    Returns:
        bool: True if rational, False if irrational
    """
    # Basic check against known lists
    if word in rational_nouns:
        return True
    if word in irrational_nouns:
        return False

    # Check for common endings that indicate rational nouns
    rational_endings = ['ройрпН', 'ро│рпН', 'ро░рпН']
    for ending in rational_endings:
        if word.endswith(ending):
            return True

    # Default to irrational if unknown
    return False

def decline_noun(noun, case, is_plural=False):
    """
    Decline a Tamil noun according to case and number.

    Args:
        noun (str): Tamil noun in its base form
        case (str): Grammatical case ('nominative', 'accusative', etc.)
        is_plural (bool): Whether the noun is plural

    Returns:
        str: Declined noun in Tamil script
    """
    # Base noun without any case ending
    base = noun

    # Add plural marker if needed
    if is_plural:
        # Remove any final consonant pulli for certain nouns before adding plural
        if base.endswith('рпН'):
            base = base[:-1]
        base += 'роХро│рпН'

    # No change for nominative case
    if case == 'nominative':
        return base

    # Add euphonic increment for certain nouns and cases
    needs_increment = case in ['accusative', 'dative', 'genitive'] and not base.endswith('роорпН')

    # Add appropriate case suffix
    if needs_increment and not base.endswith('рпН'):
        # Add euphonic increment
        if is_rational(noun):
            base += 'роЗройрпН'
        else:
            base += 'роЕродрпН'

    # Special handling for certain cases
    if case == 'dative':
        # Check if the word ends with a consonant
        if base.endswith('рпН'):
            base = base[:-1]  # Remove pulli
            base += 'роЙроХрпНроХрпБ'
        else:
            base += 'роХрпНроХрпБ'
    elif case in case_suffixes:
        base += case_suffixes[case]

    return base

def convert_case_suffix_to_tamil(case):
    """
    Convert a case name to its Tamil suffix.

    Args:
        case (str): Name of the grammatical case

    Returns:
        str: Tamil suffix for the case
    """
    return case_suffixes.get(case, '')

def handle_kutriyalukaram(word):
    """
    Apply Kutriyalukaram rules (shortening of 'u' sound).

    Args:
        word (str): Tamil word to process

    Returns:
        str: Word with Kutriyalukaram rule applied
    """
    # Check if word ends with a vallinam + 'u'
    for val_u in vallinam_with_u:
        if word.endswith(val_u):
            # Check if preceded by multiple letters or long vowel
            if len(word) > 1:
                # Mark for shortened pronunciation in romanization
                return True
    return False

def handle_aaytham(word):
    """
    Apply special rules for Aaytham.

    Args:
        word (str): Tamil word to process

    Returns:
        str: Word with Aaytham rules applied
    """
    # Aaytham should be preceded by a short vowel and followed by a hard consonant
    if 'роГ' in word:
        idx = word.index('роГ')
        if idx > 0 and idx < len(word) - 1:
            # Check if preceded by short vowel and followed by hard consonant
            # This is a simplified check
            return True
    return False

# ===== DEMO FUNCTIONS =====

def demo_noun_declension():
    """
    Demonstrate the noun declension functionality.
    """
    print("\nNoun Declension Examples:")
    print("------------------------")

    nouns = [('рооройро┐родройрпН', True), ('рооро░роорпН', False), ('роиро╛ропрпН', False)]
    cases = ['nominative', 'accusative', 'dative', 'genitive', 'locative']

    for noun, is_rational in nouns:
        print(f"\nDeclension of {noun} {'(Rational)' if is_rational else '(Irrational)'}:")
        for case in cases:
            singular = decline_noun(noun, case, False)
            plural = decline_noun(noun, case, True)
            rom_singular = romanize_tamil_word(singular)
            rom_plural = romanize_tamil_word(plural)

            print(f"  {case.capitalize()}: {singular} (romanized: {rom_singular})")
            print(f"  {case.capitalize()} Plural: {plural} (romanized: {rom_plural})")

def demo_special_rules():
    """
    Demonstrate the application of special phonological rules.
    """
    print("\nSpecial Phonological Rules Examples:")
    print("---------------------------------")

    # Kutriyalukaram examples
    kutriyalukaram_words = ['ро╡ро░ро╡рпБ', 'рокро╛роЯрпБ', 'роОроЯрпБрокрпНрокрпБ']
    print("\nKutriyalukaram (shortened 'u'):")
    for word in kutriyalukaram_words:
        is_kutriyal = handle_kutriyalukaram(word)
        rom_word = romanize_tamil_word(word)
        print(f"  {word} тЖТ {rom_word} {'(shortened)' if is_kutriyal else ''}")

    # Aikarakurukkam examples
    aikarakurukkam_words = ['роРроирпНродрпБ', 'роРроорпНрокродрпБ']
    print("\nAikarakurukkam (shortened 'ai'):")
    for word in aikarakurukkam_words:
        rom_word = romanize_tamil_word(word)
        print(f"  {word} тЖТ {rom_word}")

    # Aaytham examples
    aaytham_words = ['роЕроГродрпБ', 'роОроГроХрпБ']
    print("\nAaytham:")
    for word in aaytham_words:
        has_aaytham = handle_aaytham(word)
        rom_word = romanize_tamil_word(word)
        print(f"  {word} тЖТ {rom_word} {'(with aaytham)' if has_aaytham else ''}")

def main():
    """
    Main demonstration function.
    """
    print("Enhanced Tamil Transliteration System with Grammar Rules")
    print("======================================================")

    # Tamil to Roman examples
    test_words = [
        "роХро╛родро▓рпН",       # kaathal (love)
        "родрооро┐ро┤рпН",       # thamizh (Tamil)
        "ро╡рогроХрпНроХроорпН",     # vanakkam (hello)
        "роиройрпНро▒ро┐",       # nanri (thank you)
        "роЗроирпНродро┐ропро╛"      # inthiyaa (India)
    ]

    print("\nTamil to Roman Transliteration Examples:")
    print("---------------------------------------")

    for word in test_words:
        romanized = romanize_tamil_word(word)
        print(f"Tamil: {word} тЖТ Romanized: {romanized}")

    # Roman to Tamil examples
    roman_words = [
        "kaathal",      # роХро╛родро▓рпН (love)
        "thamizh",      # родрооро┐ро┤рпН (Tamil)
        "vanakkam",     # ро╡рогроХрпНроХроорпН (hello)
        "nanri",        # роиройрпНро▒ро┐ (thank you)
        "inthiyaa"      # роЗроирпНродро┐ропро╛ (India)
    ]

    print("\nRoman to Tamil Transliteration Examples:")
    print("---------------------------------------")

    for word in roman_words:
        tamil = convert_roman_to_tamil(word)
        print(f"Romanized: {word} тЖТ Tamil: {tamil}")

    # Demonstrate noun declension
    demo_noun_declension()

    # Demonstrate special phonological rules
    demo_special_rules()

    # Sentence examples
    tamil_sentence = "роЕройрпИро╡ро░рпБроХрпНроХрпБроорпН роЗройро┐роп ро╡рогроХрпНроХроорпН"  # Hello to everyone
    romanized_sentence = romanize_tamil_text(tamil_sentence)
    print("\nTamil sentence:")
    print(f"Original: {tamil_sentence}")
    print(f"Romanized: {romanized_sentence}")

if __name__ == "__main__":
    main()

Enhanced Tamil Transliteration System with Grammar Rules

Tamil to Roman Transliteration Examples:
---------------------------------------
Tamil: роХро╛родро▓рпН тЖТ Romanized: kaathal
Tamil: родрооро┐ро┤рпН тЖТ Romanized: thamizh
Tamil: ро╡рогроХрпНроХроорпН тЖТ Romanized: vanakkam
Tamil: роиройрпНро▒ро┐ тЖТ Romanized: nanri
Tamil: роЗроирпНродро┐ропро╛ тЖТ Romanized: inthiyaa

Roman to Tamil Transliteration Examples:
---------------------------------------
Romanized: kaathal тЖТ Tamil: роХро╛родро▓рпН
Romanized: thamizh тЖТ Tamil: родрооро┐ро┤рпН
Romanized: vanakkam тЖТ Tamil: ро╡рогроХрпНроХроорпН
Romanized: nanri тЖТ Tamil: рогройрпНро▒ро┐
Romanized: inthiyaa тЖТ Tamil: роЗройрпНродро┐ропро╛

Noun Declension Examples:
------------------------

Declension of рооройро┐родройрпН (Rational):
  Nominative: рооройро┐родройрпН (romanized: manithan)
  Nominative Plural: рооройро┐родройроХро│рпН (romanized: manithanakal)
  Accusative: рооройро┐родройрпНроР (romanized: manithanai)
  Accusati

In [None]:
import re

# ===== ENHANCED TRANSLITERATION WITH GRAMMAR RULES =====
# (Includes noun declension, pluralization, and special phonological rules)

# ... [Keep previous romanized_map, tamil_map, consonant_vowel_combinations] ...

# ===== GRAMMATICAL ENHANCEMENTS =====
# Based on Tamil Grammar Handbook (Pages 1-53)

# Noun classifications from handbook (Page 54-55)
rational_nouns = [
    'рооройро┐родройрпН', 'рокрпЖрогрпН', 'роЕро╡ройрпН', 'роЕро╡ро│рпН', 'роХрпБро┤роирпНродрпИ', 'роЕро░роЪройрпН',
    'роЖроЪро┐ро░ро┐ропро░рпН', 'рооро╛рогро╡ройрпН', 'роирогрпНрокро░рпН', 'роХрпБро░рпБ'
]

irrational_nouns = [
    'рооро░роорпН', 'ро╡рпАроЯрпБ', 'роиро╛ропрпН', 'рокрпБродрпНродроХроорпН', 'рооро▓рпИ', 'роиродро┐',
    'рокрпВ', 'роХро▓рпН', 'рокро┤роорпН', 'роЖроЯрпБ'
]

# Detailed case system from handbook (Page 57-61)
case_system = {
    'nominative': {'suffix': '', 'example': 'рооро░роорпН'},
    'accusative': {'suffix': 'роР', 'example': 'рооро░родрпНродрпИ'},
    'instrumental': {'suffix': 'роЖро▓рпН', 'example': 'рооро░родрпНродро╛ро▓рпН'},
    'dative': {'suffix': 'роХрпНроХрпБ', 'example': 'рооро░родрпНродрпБроХрпНроХрпБ'},
    'ablative': {'suffix': 'роЗро▓ро┐ро░рпБроирпНродрпБ', 'example': 'рооро░родрпНродро┐ро▓ро┐ро░рпБроирпНродрпБ'},
    'genitive': {'suffix': 'роЗройрпН', 'example': 'рооро░родрпНродро┐ройрпН'},
    'locative': {'suffix': 'роЗро▓рпН', 'example': 'рооро░родрпНродро┐ро▓рпН'},
    'vocative': {'suffix': 'роП', 'example': 'рооро░роорпЗ'}
}

# Plural markers from handbook (Page 55)
plural_markers = {
    'rational': 'роХро│рпН',
    'irrational': 'роХрпНроХро│рпН'
}

# Phonological rules from handbook (Pages 9-20)
special_consonants = {
    'vallinam': ['роХрпН', 'роЪрпН', 'роЯрпН', 'родрпН', 'рокрпН', 'ро▒рпН'],
    'mellinam': ['роЩрпН', 'роЮрпН', 'рогрпН', 'роирпН', 'роорпН', 'ройрпН'],
    'idayinam': ['ропрпН', 'ро░рпН', 'ро▓рпН', 'ро╡рпН', 'ро┤рпН', 'ро│рпН']
}

def apply_plural(noun, is_rational):
    """Apply pluralization rules from handbook (Page 55)"""
    if noun.endswith('рпН'):
        base = noun[:-1]
    else:
        base = noun

    marker = plural_markers['rational'] if is_rational else plural_markers['irrational']
    return base + marker

def decline_noun(noun, case, is_plural=False, is_rational=None):
    """Enhanced noun declension based on handbook (Pages 57-61)"""
    # Determine noun type
    if is_rational is None:
        is_rational = noun in rational_nouns

    # Base form processing
    if noun.endswith('рпН'):
        base = noun[:-1]
    else:
        base = noun

    # Apply plural
    if is_plural:
        base = apply_plural(base, is_rational)

    # Get case suffix
    case_info = case_system.get(case, case_system['nominative'])
    suffix = case_info['suffix']

    # Apply phonological combinations (Page 13-20)
    # Rule: Avoid consecutive consonants with pulli
    if base.endswith('рпН') and suffix.startswith(('роЕ', 'роЗ', 'роЙ')):
        base = base[:-1]  # Remove pulli before vowel

    # Special handling for dative case (Page 59)
    if case == 'dative':
        if base.endswith(('роорпН', 'ройрпН')):
            return base + 'роХрпНроХрпБ'
        elif base.endswith('ро│рпН'):
            return base[:-1] + 'роЯрпНроХрпБ'

    return base + suffix

# Enhanced transliteration with grammar rules
def enhanced_romanize(word):
    """Romanization with grammatical awareness"""
    romanized = romanize_tamil_word(word)

    # Apply Kurikazhi rules (Page 15)
    # Shortened 'u' for vallinam consonants
    if any(word.endswith(c + 'рпБ') for c in special_consonants['vallinam']):
        romanized = romanized.replace('u', '┼н', 1)

    # Aikarakurukkam (Page 15)
    if word.startswith('роР') and len(word) > 1:
        romanized = romanized.replace('ai', 'aтБ▒', 1)

    return romanized

# ===== DEMONSTRATION =====
def grammar_aware_demo():
    """Show grammar-integrated transliteration"""
    print("\nGrammar-Enhanced Tamil Processing")
    print("=================================")

    test_nouns = [
        ('рооройро┐родройрпН', True),    # Rational
        ('рооро░роорпН', False),     # Irrational
        ('роиро╛ропрпН', False),     # Irrational
        ('рокрпЖрогрпН', True)       # Rational
    ]

    for noun, rational in test_nouns:
        print(f"\nNoun: {noun} ({'Rational' if rational else 'Irrational'})")

        # Singular forms
        for case in case_system:
            declined = decline_noun(noun, case)
            romanized = enhanced_romanize(declined)
            print(f"{case.capitalize():<12} {declined} тЖТ {romanized}")

        # Plural forms
        plural_form = decline_noun(noun, 'nominative', is_plural=True)
        print(f"\nPlural Base: {plural_form}")
        for case in case_system:
            declined_plural = decline_noun(noun, case, is_plural=True)
            romanized_plural = enhanced_romanize(declined_plural)
            print(f"{case.capitalize():<12} {declined_plural} тЖТ {romanized_plural}")

def main():
    """Main demonstration function"""
    grammar_aware_demo()
    print("\nSpecial Phonological Rules:")
    print("Kutriyalukaram (роХрпБро▒рпНро▒ро┐ропро▓рпБроХро░роорпН): рооро░родрпНродрпБ тЖТ", enhanced_romanize('рооро░родрпНродрпБ'))
    print("Aikarakurukkam (роРроХро╛ро░роХрпН роХрпБро▒рпБроХрпНроХроорпН): роРроирпНродрпБ тЖТ", enhanced_romanize('роРроирпНродрпБ'))

if __name__ == "__main__":
    main()


Grammar-Enhanced Tamil Processing

Noun: рооройро┐родройрпН (Rational)
Nominative   рооройро┐родрой тЖТ 
Accusative   рооройро┐родройроР тЖТ 
Instrumental рооройро┐родройроЖро▓рпН тЖТ 
Dative       рооройро┐родройроХрпНроХрпБ тЖТ 
Ablative     рооройро┐родройроЗро▓ро┐ро░рпБроирпНродрпБ тЖТ 
Genitive     рооройро┐родройроЗройрпН тЖТ 
Locative     рооройро┐родройроЗро▓рпН тЖТ 
Vocative     рооройро┐родройроП тЖТ 

Plural Base: рооройро┐родройроХро│рпН
Nominative   рооройро┐родройроХро│рпН тЖТ 
Accusative   рооройро┐родройроХро│рпНроР тЖТ 
Instrumental рооройро┐родройроХро│рпНроЖро▓рпН тЖТ 
Dative       рооройро┐родройроХро│роЯрпНроХрпБ тЖТ 
Ablative     рооройро┐родройроХро│роЗро▓ро┐ро░рпБроирпНродрпБ тЖТ 
Genitive     рооройро┐родройроХро│роЗройрпН тЖТ 
Locative     рооройро┐родройроХро│роЗро▓рпН тЖТ 
Vocative     рооройро┐родройроХро│рпНроП тЖТ 

Noun: рооро░роорпН (Irrational)
Nominative   рооро░роо тЖТ 
Accusative   рооро░роороР тЖТ 
Instrumental рооро░роороЖро▓рпН тЖТ 
Dative      

The **рокрпБрогро░рпНроЪрпНроЪро┐ ро╡ро┐родро┐роХро│рпН (Punarchi Vidigal)** in Tamil grammar govern the formation of compound words through phonological and orthographic adjustments. Below is a detailed explanation of the rules and special cases:

---

### **1. Types of Joining**
#### **роЗропро▓рпНрокрпБ рокрпБрогро░рпНроЪрпНроЪро┐ (Natural Joining)**  
- **Definition**: No changes occur between the base (роиро┐ро▓рпИроорпКро┤ро┐) and affixed word (ро╡ро░рпБроорпКро┤ро┐).  
- **Conditions**:  
  - Base ends in a vowel, and affixed word begins with a consonant.  
  - Example:  
    - ро╡ро╛ро┤рпИ (banana) + рооро░роорпН (tree) тЖТ ро╡ро╛ро┤рпИрооро░роорпН (banana tree).  
    - роирпАро░рпН (water) + роХрпЛрокрпНрокрпИ (cup) тЖТ роирпАро░рпНроХрпЛрокрпНрокрпИ (water cup).  

#### **ро╡ро┐роХро╛ро░рокрпН рокрпБрогро░рпНроЪрпНроЪро┐ (Modified Joining)**  
Modifications occur at the junction, categorized into:  
- **родрпЛройрпНро▒ро▓рпН (Insertion)**:  
  - A glide consonant (ропрпН, ро╡рпН) or nasal (роЩрпН, роорпН) is inserted between vowels or consonant clusters.  
  - Examples:  
    - рооро╛ (mango) + роЕроЯро┐ (base) тЖТ рооро╛ро╡роЯро┐ (m─Бvaс╕Нi) [ро╡рпН inserted].  
    - рокрпВ (flower) + роХрпКроЯро┐ (vine) тЖТ рокрпВроЩрпНроХрпКроЯро┐ (p┼лс╣Еkoс╣нi) [роЩрпН inserted].  

- **родро┐ро░ро┐родро▓рпН (Transformation)**:  
  - The final consonant of the base changes to harmonize with the affixed word.  
  - Examples:  
    - рооро░роорпН (tree) + роХро┐ро│рпИ (branch) тЖТ рооро░роХрпНроХро┐ро│рпИ (marakkiс╕╖ai) [final роорпН тЖТ роХрпН].  
    - рокро▓рпН (tooth) + рокрпВ (flower) тЖТ рокро▓рпНрокрпВ (palp┼л) [final ро▓рпН remains].  

- **роХрпЖроЯрпБродро▓рпН (Deletion)**:  
  - A letter is dropped from the base or affixed word.  
  - Examples:  
    - родрпЖроЩрпНроХрпБ (coconut tree) + роХро╛ропрпН (fruit) тЖТ родрпЗроЩрпНроХро╛ропрпН (th─Уng─Бy) [final роЙ deleted].  
    - ро╡роЯроХрпНроХрпБ (north) + роХро┐ро┤роХрпНроХрпБ (east) тЖТ ро╡роЯроХро┐ро┤роХрпНроХрпБ (vaс╕Нakizhakku) [final роХрпБ deleted].  

---

### **2. Phonological Adjustments**
#### **Final Sound of Base + Initial Sound of Affixed Word**  
- **Vowel + Vowel**: Insert ропрпН or ро╡рпН.  
  - Example: роХрпЛ (king) + роЕро░роЪройрпН (ruler) тЖТ роХрпЛропро░роЪройрпН (k┼Нyaracaс╣Й).  
- **Vowel + Consonant**: Natural joining (no change).  
  - Example: рооро▓рпИ (mountain) + роПро▒рпБ (climb) тЖТ рооро▓рпИропрпЗро▒рпБ (malaiy─Ус╣Яu).  
- **Consonant + Vowel**: Double the consonant or transform.  
  - Example: роиро╛роЯрпБ (country) + роорпКро┤ро┐ (language) тЖТ роиро╛роЯрпНроЯрпБроорпКро┤ро┐ (n─Бс╣нс╣нumoс╕╗i).  
- **Consonant + Consonant**: Insert a vowel or modify.  
  - Example: роорогрпН (earth) + роХрпЛро╡ро┐ро▓рпН (temple) тЖТ роорогрпНроХрпЛро╡ро┐ро▓рпН (maс╣Зk┼Нvil).  

---

### **3. Specific Compound Categories**
#### **родро┐роЪрпИрокрпН рокрпЖропро░рпНрокрпН рокрпБрогро░рпНроЪрпНроЪро┐ (Directional Compounds)**  
- Combine directional terms with deletions.  
  - Example: ро╡роЯроХрпНроХрпБ (north) + роорпЗро▒рпНроХрпБ (west) тЖТ ро╡роЯроорпЗро▒рпНроХрпБ (northwest).  

#### **роорпИропрпАро▒рпНро▒рпБрокрпН рокрогрпНрокрпБрокрпН рокрпЖропро░рпНрокрпН рокрпБрогро░рпНроЪрпНроЪро┐ (Adjectival Compounds)**  
- Form adjectives by combining qualifiers.  
  - Example: рооро╛ (great) + рокрпЖро░ро┐роп (big) тЖТ рооро╛рокрпЖро░ро┐роп (m─Бperiya, "huge").  

#### **рокрпВрокрпНрокрпЖропро░рпНрокрпН рокрпБрогро░рпНроЪрпНроЪро┐ (Floral Compounds)**  
- Insert роЩрпН when рокрпВ (flower) combines with another word.  
  - Example: рокрпВ + рооро▓ро░рпН (blossom) тЖТ рокрпВроЩрпНроХрпКроЯро┐ (p┼лс╣Еkoс╣нi, "flower vine").  

#### **родрпЗроЩрпНроХро╛ропрпН рокрпБрогро░рпНроЪрпНроЪро┐ (Coconut Compounds)**  
- Special deletion and vowel elongation.  
  - Example: родрпЖроЩрпНроХрпБ (theс╣Еgu) + роХро╛ропрпН (k─Бy) тЖТ родрпЗроЩрпНроХро╛ропрпН (th─Уng─Бy, "coconut").  

---

### **4. Special Cases and Exceptions**
- **роЖропрпНродроорпН (роГ)**: Words ending with роГ may drop it or merge.  
  - Example: роЕроГродрпБ (that) + роОрой (like) тЖТ роЕродрпБрокрпЛро▓рпН (athup┼Нla, "like that").  
- **Nasal Consonants**: Final роорпН, ройрпН, рогрпН may change to stops (роХрпН, роЯрпН).  
  - Example: роЪро╛рооро┐ (god) + роХрпЛро╡ро┐ро▓рпН (temple) тЖТ роЪро╛рооро┐ропро╛ро░рпН роХрпЛро╡ро┐ро▓рпН (s─Бmiy─Бr k┼Нvil).  
- **Loanwords**: Borrowed words follow Tamil rules.  
  - Example: роЯрпА (tea) + роХрпЛрокрпНрокрпИ (cup) тЖТ роЯрпАроХрпНроХрпЛрокрпНрокрпИ (с╣н─лkk┼Нppai).  

---

### **5. Importance of Rules**  
- **Clarity**: Prevents ambiguity (e.g., рооро░родрпНродро┐ро▓рпИ vs. рооро░ роЗро▓рпИ).  
- **Euphony**: Ensures smooth pronunciation.  
- **Consistency**: Standardizes compound formation.  

---

### **Summary**  
The рокрпБрогро░рпНроЪрпНроЪро┐ ро╡ро┐родро┐роХро│рпН ensure systematic and harmonious word formation in Tamil. By applying rules of insertion, transformation, deletion, and category-specific adjustments, Tamil maintains its phonetic elegance and semantic precision. Mastery of these rules is essential for correct usage in literature, speech, and everyday communication

In [None]:
class TamilGrammarRules:
    # ... (existing class code remains the same)

    def ends_with_vowel(self, word):
        """Check if a word ends with a vowel or vowel sign."""
        if not word:
            return False
        vowels = {'роЕ', 'роЖ', 'роЗ', 'роИ', 'роЙ', 'роК', 'роО', 'роП', 'роР', 'роТ', 'роУ', 'роФ'}
        vowel_signs = {'ро┐', 'рпА', 'рпБ', 'рпВ', 'рпЖ', 'рпЗ', 'рпИ', 'рпК', 'рпЛ', 'ро╛', 'рпМ'}
        return word[-1] in vowels or word[-1] in vowel_signs

    def starts_with_consonant(self, word):
        """Check if a word starts with a consonant."""
        if not word:
            return False
        vowels = {'роЕ', 'роЖ', 'роЗ', 'роИ', 'роЙ', 'роК', 'роО', 'роП', 'роР', 'роТ', 'роУ', 'роФ'}
        vowel_signs = {'ро┐', 'рпА', 'рпБ', 'рпВ', 'рпЖ', 'рпЗ', 'рпИ', 'рпК', 'рпЛ', 'ро╛', 'рпМ'}
        return word[0] not in vowels and word[0] not in vowel_signs

    def get_last_vowel(self, word):
        """Get the last vowel (or equivalent) in a word."""
        vowels = {'роЕ', 'роЖ', 'роЗ', 'роИ', 'роЙ', 'роК', 'роО', 'роП', 'роР', 'роТ', 'роУ', 'роФ'}
        vowel_signs = {'ро┐', 'рпА', 'рпБ', 'рпВ', 'рпЖ', 'рпЗ', 'рпИ', 'рпК', 'рпЛ', 'ро╛', 'рпМ'}
        vowel_map = {
            'ро┐': 'роЗ', 'рпА': 'роИ', 'рпБ': 'роЙ', 'рпВ': 'роК',
            'рпЖ': 'роО', 'рпЗ': 'роП', 'рпИ': 'роР', 'рпК': 'роТ',
            'рпЛ': 'роУ', 'ро╛': 'роЖ', 'рпМ': 'роФ'
        }
        for char in reversed(word):
            if char in vowels:
                return char
            elif char in vowel_signs:
                return vowel_map.get(char, None)
        return None

    def get_first_vowel(self, word):
        """Get the first vowel (or equivalent) in a word."""
        vowels = {'роЕ', 'роЖ', 'роЗ', '', 'роЙ', 'роК', 'роО', 'роП', 'роР', 'роТ', 'роУ', 'роФ'}
        vowel_signs = {'ро┐', 'рпА', 'рпБ', 'рпВ', 'рпЖ', 'рпЗ', 'рпИ', 'рпК', 'рпЛ', 'ро╛', 'рпМ'}
        vowel_map = {
            'ро┐': 'роЗ', 'рпА': 'роИ', 'рпБ': 'роЙ', 'рпВ': 'роК',
            'рпЖ': 'роО', 'рпЗ': 'роП', 'рпИ': 'роР', 'рпК': 'роТ',
            'рпЛ': 'роУ', 'ро╛': 'роЖ', 'рпМ': 'роФ'
        }
        for char in word:
            if char in vowels:
                return char
            elif char in vowel_signs:
                return vowel_map.get(char, None)
        return None

    def apply_sandhi_rules(self, word1, word2):
        ##"""Apply Tamil sandhi rules to combine two words [[2]][[3]][[5]]"""
        # Natural Joining (роЗропро▓рпНрокрпБ рокрпБрогро░рпНроЪрпНроЪро┐)
        if self.ends_with_vowel(word1) and self.starts_with_consonant(word2):
            return word1 + word2

        # Modified Joining - Vowel + Vowel (родрпЛройрпНро▒ро▓рпН)
        elif self.ends_with_vowel(word1) and self.starts_with_vowel(word2):
            last_vowel = self.get_last_vowel(word1)
            first_vowel = self.get_first_vowel(word2)

            # Insert glide based on vowel combination
            if last_vowel == 'роЖ' and first_vowel == 'роЕ':
                return word1 + 'ро╡рпН' + word2
            elif last_vowel in ['роУ', 'роТ'] and first_vowel == 'роЕ':
                return word1 + 'ропрпН' + word2
            elif word1 == 'рокрпВ':
                return 'рокрпВроЩрпН' + word2  # Floral compound rule [[6]]
            else:
                return word1 + 'ропрпН' + word2  # Default insertion [[1]]

        # Modified Joining - Consonant + Consonant (родро┐ро░ро┐родро▓рпН)
        elif word1.endswith('роорпН') and word2.startswith('роХ'):
            return word1[:-1] + 'роХрпН' + word2  # роорпН тЖТ роХрпН transformation [[2]]
        elif word1.endswith('ройрпН') and word2.startswith('роЯ'):
            return word1[:-1] + 'роЯрпН' + word2  # ройрпН тЖТ роЯрпН transformation [[3]]

        # Deletion (роХрпЖроЯрпБродро▓рпН)
        elif word1 == 'родрпЖроЩрпНроХрпБ':
            return 'родрпЗроЩрпН' + word2  # родрпЖроЩрпНроХрпБ тЖТ родрпЗроЩрпН deletion [[5]]
        elif word1.endswith('роХрпБ') and len(word1) >= 2:
            return word1[:-2] + 'роХрпН' + word2  # роХрпБ тЖТ роХрпН conversion [[4]]

        # Default case
        return word1 + word2

        # Default case: concatenate
        return word1 + word2

    # ... (rest of the class remains the same)

In [None]:
def apply_sandhi_rules(self, word1, word2):
        """Apply Tamil sandhi rules to combine two words."""
        # Natural Joining (роЗропро▓рпНрокрпБ рокрпБрогро░рпНроЪрпНроЪро┐)
        if self.ends_with_vowel(word1) and self.starts_with_consonant(word2):
            return word1 + word2

        # Modified Joining - Vowel + Vowel (родрпЛройрпНро▒ро▓рпН)
        elif self.ends_with_vowel(word1) and self.starts_with_vowel(word2):
            last_vowel = self.get_last_vowel(word1)
            first_vowel = self.get_first_vowel(word2)

            # Insert glide based on vowel combination
            if last_vowel == 'роЖ' and first_vowel == 'роЕ':
                return word1 + 'ро╡рпН' + word2
            elif last_vowel in ['роУ', 'роТ'] and first_vowel == 'роЕ':
                return word1 + 'ропрпН' + word2
            # Floral compound rule (рокрпВ + word тЖТ insert роЩрпН)
            elif word1 == 'рокрпВ':
                return 'рокрпВроЩрпН' + word2
            else:
                # Default insertion (ропрпН for general cases)
                return word1 + 'ропрпН' + word2

        # Modified Joining - Consonant + Consonant (родро┐ро░ро┐родро▓рпН)
        elif word1.endswith('роорпН') and word2.startswith('роХ'):
            return word1[:-1] + 'роХрпН' + word2
        elif word1.endswith('ройрпН') and word2.startswith('роЯ'):
            return word1[:-1] + 'роЯрпН' + word2

        # Deletion (роХрпЖроЯрпБродро▓рпН)
        elif word1 == 'родрпЖроЩрпНроХрпБ':
            return 'родрпЗроЩрпН' + word2
        elif word1.endswith('роХрпБ'):
            return word1[:-2] + word2

        # Default case: concatenate
        return word1 + word2

In [None]:
grammar = TamilGrammarRules()
print(grammar.apply_sandhi_rules('родрпЖроЩрпНроХрпБ', 'роХро╛ропрпН'))  # Output: 'родрпЗроЩрпНроХро╛ропрпН'
print(grammar.apply_sandhi_rules('рооро░роорпН', 'роХро┐ро│рпИ'))     # Output: 'рооро░роХрпНроХро┐ро│рпИ'
print(grammar.apply_sandhi_rules('рокрпВ', 'роХрпКроЯро┐'))       # Output: 'рокрпВроЩрпНроХрпКроЯро┐'

родрпЖроЩрпНроХрпБроХро╛ропрпН
рооро░роороХрпНроХро┐ро│рпИ
рокрпВроХрпКроЯро┐
