In [1]:
import language_tool_python
import re
import json

tool = language_tool_python.LanguageTool('en-US')

In [2]:
def check_grammar_with_replacements(text):
    matches = tool.check(text)

    # Split text into words and their character ranges
    words, word_ranges = split_text_with_punctuation(text)

    corrections = []
    revised_text = list(text)  # Convert text to a list for mutable character edits
    offset_adjustment = 0  # Tracks changes in text length due to replacements

    for match in matches:
        original_text = text[match.offset: match.offset + match.errorLength]
        adjusted_offset = match.offset + offset_adjustment
        word_index = get_word_index_from_offset(word_ranges, adjusted_offset)

        # Apply the first suggested replacement (if available) to the revised text
        if match.replacements:
            replacement = match.replacements[0]
            # Replace text while adjusting offsets
            revised_text[adjusted_offset: adjusted_offset + match.errorLength] = list(replacement)
            offset_adjustment += len(replacement) - match.errorLength

        corrections.append({
            "word_index": word_index,  # Word-based index
            "character_offset": match.offset,  # Original character offset in the text
            "character_endset": match.offset + match.errorLength,  # Original character end offset in the text
            "original_text": original_text,
            "message": match.message,
            "category": match.category,
            "rule_id": match.ruleId,
            "replacements": match.replacements or [""],  # Handle empty suggestions
        })

    revised_text_str = ''.join(revised_text)
    if revised_text_str == text:
        return {}
    return {
        "original_text": text,
        "revised_text": revised_text_str,  # Join the list to form the revised sentence
        "corrections": corrections
    }

def split_text_with_punctuation(text):
    words_with_punctuation = re.findall(r'\S+|\s|[.,!?;(){}\[\]":]', text)
    
    word_ranges = []
    current_offset = 0
    for word in words_with_punctuation:
        start_offset = current_offset
        end_offset = start_offset + len(word)
        word_ranges.append((start_offset, end_offset))
        current_offset = end_offset

    return words_with_punctuation, word_ranges

def get_word_index_from_offset(word_ranges, offset):
    for i, (start, end) in enumerate(word_ranges):
        if start <= offset < end:
            return i
    return -1  # Return -1 if no match is found, which shouldn't happen

In [5]:
text = """My mother state is are ""You are bad humasn" """
# text = "LanguageTool is your intelligent writing assistant for all common browsers and word processors. Write or paste your text here too have it checked continuously. Errors will be underlined in different colours: we will mark seplling errors with red underilnes. Furthermore grammar error's are highlighted in yellow. LanguageTool also marks style issues in a reliable manner by underlining them in blue. did you know that you can sea synonyms by double clicking a word? Its a impressively versatile tool especially if youd like to tell a colleague from over sea's about what happened at 5 PM in the afternoon on Monday, 27 May 2007."
# text = """
# In times of emergency, firemen is the brave ones who risk their lives to save others, while policemen work tirelessly to enforce law and order on our streets. These men are just naturally inclined towards such roles, given there physical strength and cowrage. Firemen and policemen undergo rigorous training that prepare them for the challenging situations they face everyday, showing that some jobs simply fit men better. Women might work as policewomen or lady firefighters, but its often a tough fit for them as compared to their male colleagues. In the business world, a successful businessman is admire for his ability to negotiate and lead a team effectively. Many companies prefer male chairmen since they are knowed for their decisiveness and strategic thinking. Even at lower levels, salesmen is often seen as more persuasive than their female counterparts, as people tend to trust men in these roles. Women on the other hand, usually pursue careers as secretaries or assistants, providing the vital support to their male bosses whom handle the main responsibilities.
# """

# Call the function to get corrections
result = check_grammar_with_replacements(text)

# Print the results
import json
print(json.dumps(result, indent=4))

{
    "original_text": "My mother state is are \"\"You are bad humasn\" ",
    "revised_text": "My mother state is are \"\"You are bad human\" ",
    "corrections": [
        {
            "word_index": 16,
            "character_offset": 37,
            "character_endset": 43,
            "original_text": "humasn",
            "message": "Possible spelling mistake found.",
            "category": "TYPOS",
            "rule_id": "MORFOLOGIK_RULE_EN_US",
            "replacements": [
                "human",
                "humans"
            ]
        },
        {
            "word_index": 16,
            "character_offset": 43,
            "character_endset": 44,
            "original_text": "\"",
            "message": "Unpaired symbol: \u2018\"\u2019 seems to be missing",
            "category": "PUNCTUATION",
            "rule_id": "EN_UNPAIRED_QUOTES",
            "replacements": [
                ""
            ]
        }
    ]
}
