In [35]:
import language_tool_python
import json
import re

tool = language_tool_python.LanguageTool('en-US')

In [None]:
def check_grammar_with_rationale(text):
    # Check for grammar errors
    matches = tool.check(text)
    
    # Store the corrections
    corrections = []
    
    # Loop over all the grammar issues found
    for match in matches:
        # Extract the word index where the match occurs
        start_index = match.offset
        word_start = text.rfind(' ', 0, start_index) + 1
        word_end = text.find(' ', start_index)
        if word_end == -1:
            word_end = len(text)
        word = text[word_start:word_end]
        
        # Calculate the word index
        words = text[:word_start].split()
        word_index = len(words)  # Position of the current word in the text
        
        # Store the correction details
        corrections.append({
            "word_index": word_index,
            "original_word": word,
            "message": match.message,
            "replacements": match.replacements,
            "error_type": match.ruleId
        })
    
    # Return the results in JSON format
    return {
        "original_text": text,
        "corrections": corrections
    }

In [37]:
# Example text
text = "Yesterday, I was going to the store, and I buy some fruits. I picked apples and oranges because I like them. Then I go home and start cooking."

# Run the grammar check
grammar_output = check_grammar_with_rationale(text)

# Print results
print(json.dumps(grammar_output, indent=4))

{
    "original_text": "Yesterday, I was going to the store, and I buy some fruits. I picked apples and oranges because I like them. Then I go home and start cooking.",
    "corrections": []
}


In [62]:
def check_grammar_with_replacements(text):
    # Check for grammar issues using LanguageTool
    matches = tool.check(text)
    
    # Split the text into words and calculate character ranges for each word
    words = text.split()
    word_ranges = []
    current_offset = 0

    for word in words:
        start_offset = current_offset
        end_offset = start_offset + len(word)
        word_ranges.append((start_offset, end_offset))
        current_offset = end_offset + 1  # Account for space after each word

    # Prepare a list to store corrections
    corrections = []

    for match in matches:
        # Extract information about the grammar issue
        original_text = text[match.offset: match.offset + match.errorLength]

        # Find the word index using word_ranges
        word_index = next((i for i, (start, end) in enumerate(word_ranges) if start <= match.offset < end),-1)

        # Construct the correction object
        correction = {
            "word_index": word_index,  # Word-based index
            "character_offset": match.offset,  # Character-based offset
            "original_text": original_text,
            "message": match.message,
            "replacements": match.replacements or ["No suggestions"],  # Handle empty replacements
        }

        corrections.append(correction)

    # Return the corrections in a structured format
    return {
        "original_text": text,
        "corrections": corrections
    }



In [63]:
text = "The quickk brown fox jumps over the lazi dog."
# text = "He have a car and drive it everyday."

# Call the function to get corrections
result = check_grammar_with_replacements(text)

# Print the results
import json
print(json.dumps(result, indent=4))

{
    "original_text": "The quickk brown fox jumps over the lazi dog.",
    "corrections": [
        {
            "word_index": 1,
            "character_offset": 4,
            "original_text": "quickk",
            "message": "Possible spelling mistake found.",
            "replacements": [
                "quick"
            ]
        },
        {
            "word_index": 7,
            "character_offset": 36,
            "original_text": "lazi",
            "message": "Possible spelling mistake found.",
            "replacements": [
                "Nazi",
                "lazy",
                "Lazio",
                "laze",
                "AZI",
                "LAZ"
            ]
        }
    ]
}
