In [1]:
# Install required libraries
!pip install requests nltk




In [2]:
from google.colab import drive
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
# Path to the dataset
dataset_path = '/content/drive/MyDrive/extended_sinhala_dictionary.txt'

# Load the dataset, specifying the delimiter (assuming it's a tab or space)
df = pd.read_csv(dataset_path, delimiter='\t') # Or delimiter=' ' if space-separated

# Preview the dataset
print("Dataset Loaded Successfully!")
print(df.head())

# Ensure the dataset contains columns for "Original" and "Expected" (optional)
# Add column names if missing
# Check if the dataframe has only one column and rename accordingly
if len(df.columns) == 1:
    df.columns = ["Original"]  # Rename the single column to "Original"
elif df.columns[0] != "Original": # If more than 1 column and the first isn't "Original"
    df.columns = ["Original", "Expected"] # Rename the first two columns

Dataset Loaded Successfully!
        අංක
0      අංකය
1  අංකයකින්
2     අංකයට
3       අංග
4      අංගය


In [24]:
import requests

# My Gemini API Key
API_KEY = "AIzaSyClv5HBM634LuDlCfSzLUwcFdBaiWcxbFI"
BASE_URL = "https://api.openai.com/v1/chat/completions"

# Headers for API requests
headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

# Configure Google Gemini API
genai.configure(api_key="AIzaSyBs7o-crNVu8xToqMcMG-5qGJg7P0zsszg")
gemini_model = genai.GenerativeModel("gemini-1.5-flash")

In [23]:
import google.generativeai as genai
from collections import Counter


# Function to calculate edit distance
def edit_distance(word1, word2):
    m, n = len(word1), len(word2)
    dp = [[0] * (n + 1) for _ in range(m + 1)]

    for i in range(m + 1):
        for j in range(n + 1):
            if i == 0:
                dp[i][j] = j
            elif j == 0:
                dp[i][j] = i
            elif word1[i-1] == word2[j-1]:
                dp[i][j] = dp[i-1][j-1]
            else:
                dp[i][j] = 1 + min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1])

    return dp[m][n]

# Function to generate candidates
def generate_candidates(word, dictionary):
    candidates = []
    for dict_word in dictionary:
        dist = edit_distance(word, dict_word)
        if dist <= 1:
            candidates.append((dict_word, dist))
    return candidates

# Function to score candidates
def score_candidates(candidates, word_frequency):
    scored_candidates = []
    for candidate, dist in candidates:
        score = word_frequency.get(candidate, 0) - dist
        scored_candidates.append((candidate, score))
    scored_candidates.sort(key=lambda x: x[1], reverse=True)
    return scored_candidates

# Spell checker
def correct_spelling(word, dictionary, word_frequency):
    if word in dictionary:
        return word, "No spelling mistake."
    candidates = generate_candidates(word, dictionary)
    if not candidates:
        return word, "No correction found."
    scored_candidates = score_candidates(candidates, word_frequency)
    corrected_word = scored_candidates[0][0]
    return corrected_word, f"Corrected '{word}' to '{corrected_word}'."

# Text-level spell checker
def spell_checker(input_text, dictionary, word_frequency):
    words = input_text.split()
    corrected_words = []
    analysis = []
    for word in words:
        corrected_word, explanation = correct_spelling(word, dictionary, word_frequency)
        corrected_words.append(corrected_word)
        analysis.append(explanation)
    return ' '.join(corrected_words), analysis

word_frequency = Counter(dictionary)

# Grammar checker
def extract_subject_verb(sentence):
    words = sentence.split()
    if len(words) < 2:
        return None, None
    return words[0], words[-1]

def conjugate_verb(subject, verb):
    if subject == "මම" and not verb.endswith("මි"):
        return verb[:-2] + "මි", f"Conjugated verb '{verb}' to match singular subject '{subject}'."
    elif subject == "අපි" and not verb.endswith("මු"):
        return verb[:-2] + "මු", f"Conjugated verb '{verb}' to match plural subject '{subject}'."
    return verb, f"No conjugation needed for '{verb}' with subject '{subject}'."

def correct_verb_in_sentence(sentence):
    subject, verb = extract_subject_verb(sentence)
    if subject is None or verb is None:
        return sentence, []
    corrected_verb, explanation = conjugate_verb(subject, verb)
    words = sentence.split()
    words[-1] = corrected_verb
    return ' '.join(words), [explanation]

# Process text with spell and grammar checkers
def process_text(input_text, dictionary, word_frequency):
    corrected_text, spell_analysis = spell_checker(input_text, dictionary, word_frequency)
    corrected_sentences = []
    grammar_analysis = []
    for sentence in corrected_text.split('.'):
        corrected_sentence, grammar_explanation = correct_verb_in_sentence(sentence)
        corrected_sentences.append(corrected_sentence)
        grammar_analysis.extend(grammar_explanation)
    return '. '.join(corrected_sentences), spell_analysis, grammar_analysis

# List of 5 example texts with errors
input_texts = [

    "අපි අද ගෙදර යමි.",  # Grammar mistake: 'ඕනෑ' used incorrectly
    "මම ගෙදර ගියා කාලේ, ඇයට එන්න කියනව.",          # Grammar mistake: Incorrect tense agreement
    "ඔහුගෙ නම එලිසබෙත්, නමුත් ඔහු නම ලියනව 'එලිසබත'.", # Spelling mistake: 'එලිසබත'
    "අද මම බදේ බොහෝ කාර්යයන් කරන්නෙ.",              # Spelling mistake: 'බදේ' -> 'බොහෝ'
    "මම පොත් තුනක් ගත්තා, නමුත් ඒවෙ 2ක් අකූරෙ නෑ."   # Grammar issue: Missing plural agreement


]

# Process and output results for all texts
for i, input_text in enumerate(input_texts, 1):
    print(f"\nProcessing Text {i}: {input_text}")

    # Preprocess text and get analysis
    preprocessed_text, spell_analysis, grammar_analysis = process_text(input_text, dictionary, word_frequency)

    # Generate detailed content using Gemini
    response = gemini_model.generate_content(f"Correct the following text and explain any grammar or spelling issues: {preprocessed_text}")

    # Output the AI response
    print("\nGemini Response:")
    print(response.text)

    # Output detailed spell check analysis
    print("\nDetailed Spell Check Analysis:")
    for text, explanation in zip(input_text.split(), spell_analysis):
        print(f"Original: {text}, Explanation: {explanation}")

    # Output detailed grammar check analysis
    print("\nDetailed Grammar Check Analysis:")
    for explanation in grammar_analysis:
        print(f"Explanation: {explanation}")

    # Output the final corrected text
    print("\nCorrected Text:")
    print(preprocessed_text)



Processing Text 1: අපි අද ගෙදර යමි.

Gemini Response:
The text "අපි අද ගෙදර යමු" is already grammatically correct in Sinhala.  There are no spelling or grammar issues.

It translates to "Let's go home today" in English.


Detailed Spell Check Analysis:
Original: අපි, Explanation: No spelling mistake.
Original: අද, Explanation: No spelling mistake.
Original: ගෙදර, Explanation: No spelling mistake.
Original: යමි., Explanation: No correction found.

Detailed Grammar Check Analysis:
Explanation: Conjugated verb 'යමි' to match plural subject 'අපි'.

Corrected Text:
අපි අද ගෙදර යමු. 

Processing Text 2: මම ගෙදර ගියා කාලේ, ඇයට එන්න කියනව.

Gemini Response:
The sentence "මම ගෙදර ගියා කාලේ ඇයට එන්න කියනමි" is Sinhala.  It's grammatically incorrect and a bit awkwardly phrased.  A more natural and grammatically correct way to say this would be:

**මම ගෙදර ගියාට පස්සේ ඇයට එන්න කියන්නම්.**

Here's a breakdown of the corrections and why they were made:

* **ගියා කාලේ (giyā kālē):** This translates 