In [2]:
import language_tool_python

def check_text(text):

    tool = language_tool_python.LanguageTool('en-US')

    matches = tool.check(text)
    issues = []
    for match in matches:
        issue = {
            'message': match.message,
            'context': match.context,
            'offset': match.offset,
            'length': match.errorLength,
            'category': match.category,
            'rule_id': match.ruleId,
            'suggestions': match.replacements
        
        }
        issues.append(issue)
    tool.close()

    return issues

def print_issue(text):
    issues = check_text(text)
 
    if not issues:
        print("no issue found")
        return 
    else:
        print(f"found {len(issues)} issues")

        for i, issue in enumerate(issues, 1):
            print(f"issue :{i}")
            print(f"context: {issue['message']}")
            if issue['suggestions']:
                print(f"suggestions: {issue['suggestions']}")
            print()
        

text = """The deadlist virus in modern history, perhaps of all time, was the 1918 Spanish Flu. It killed about 20 to 50 million people worldwide, perhaps more. The total death toll is unknown because medical records were not kept in many areas.
The pandemic hit during World War I and devastated military troops. In the United States, for instance, more servicemen were killed from the flu than from the war itself. The Spanish flu was fatal to a higher proportion of young adults than most flu viruses.
The pandemic started mildly, in the spring of 1918, but was followed by a much more severe wave in the fall of 1918. The war likely contributed to the devastating mortality numbers, as large outbreaks occurred in military forces living in close quarters. Poor nutrition and the unsanitary conditions of war camps had an effect.
A third wave occurred in the winter and spring of 1919, and a fourth, smaller wave occurred in a few areas in spring 1920. Initial symptoms of the flu were typical: sore throat, headache, and fever. The flu often progressed rapidly to cause severe pneumonia and sometimes hemorrhage in the lungs and mucus membranes. A characteristic feature of severe cases of the Spanish Flu was heliotrope cyanosis, where the patient’s face turned blue from lack of oxygen in the cells. Death usually followed within hours or days.
Modern medicine such as vaccines, antivirals, and antibiotics for secondary infections were not available at that time, so medical personnel couldn’t do much more than try to relieve symptoms.
The flu ended when it had infected enough people that those who were susceptible had either died or developed immunity.
"""
print("checking text : " )
print(text)
print("\n KET QUA NHAN DUOC :")
print_issue(text)

checking text : 
The deadlist virus in modern history, perhaps of all time, was the 1918 Spanish Flu. It killed about 20 to 50 million people worldwide, perhaps more. The total death toll is unknown because medical records were not kept in many areas.
The pandemic hit during World War I and devastated military troops. In the United States, for instance, more servicemen were killed from the flu than from the war itself. The Spanish flu was fatal to a higher proportion of young adults than most flu viruses.
The pandemic started mildly, in the spring of 1918, but was followed by a much more severe wave in the fall of 1918. The war likely contributed to the devastating mortality numbers, as large outbreaks occurred in military forces living in close quarters. Poor nutrition and the unsanitary conditions of war camps had an effect.
A third wave occurred in the winter and spring of 1919, and a fourth, smaller wave occurred in a few areas in spring 1920. Initial symptoms of the flu were typic

In [6]:
from spellchecker import SpellChecker

class SpellingChecker:
    def __init__(self):
        self.spell = SpellChecker()
    
    def check_text(self, text):
        """
        Check spelling in a text and return corrections
        """
        # Split text into words and keep track of their positions
        words = text.split()
        word_positions = {}
        
        # Store positions for each word
        for index, word in enumerate(words):
            # Remove punctuation from word for checking
            clean_word = word.strip('.,!?:;()[]{}""''')
            if clean_word not in word_positions:
                word_positions[clean_word] = []
            word_positions[clean_word].append(index)
        
        # Find misspelled words
        misspelled = self.spell.unknown([word.strip('.,!?:;()[]{}""''') for word in words])
        
        # Store corrections
        corrections = []
        for word in misspelled:
            # Get all positions for this misspelled word
            positions = word_positions.get(word, [])
            
            correction = {
                'word': word,
                'suggestions': list(self.spell.candidates(word)),
                'positions': positions  # Now storing all positions
            }
            corrections.append(correction)
            
        return corrections

    def print_corrections(self, text):
        """
        Print spelling corrections in a formatted way
        """
        try:
            corrections = self.check_text(text)
            
            if not corrections:
                print("No spelling mistakes found!")
                return
            
            print(f"Found {len(corrections)} spelling mistakes:\n")
            
            for i, correction in enumerate(corrections, 1):
                print(f"Mistake #{i}:")
                print(f"- Word: {correction['word']}")
                print(f"- Suggestions: {', '.join(correction['suggestions'][:5])}")
                # Print all positions where the word appears
                positions_str = ', '.join(str(pos + 1) for pos in correction['positions'])
                print(f"- Position(s): word #{positions_str}")
                print()
                
        except Exception as e:
            print(f"An error occurred while checking the text: {str(e)}")

# Example usage
if __name__ == "__main__":
    checker = SpellingChecker()
    
    test_text = """The deadlist virus in modern history, perhaps of all time, was the 1918 Spanish Flu. It killed about 20 to 50 million people worldwide, perhaps more. The total death toll is unknown because medical records were not kept in many areas.
The pandemic hit during World War I and devastated military troops. In the United States, for instance, more servicemen were killed from the flu than from the war itself. The Spanish flu was fatal to a higher proportion of young adults than most flu viruses.
The pandemic started mildly, in the spring of 1918, but was followed by a much more severe wave in the fall of 1918. The war likely contributed to the devastating mortality numbers, as large outbreaks occurred in military forces living in close quarters. Poor nutrition and the unsanitary conditions of war camps had an effect.
A third wave occurred in the winter and spring of 1919, and a fourth, smaller wave occurred in a few areas in spring 1920. Initial symptoms of the flu were typical: sore throat, headache, and fever. The flu often progressed rapidly to cause severe pneumonia and sometimes hemorrhage in the lungs and mucus membranes. A characteristic feature of severe cases of the Spanish Flu was heliotrope cyanosis, where the patient's face turned blue from lack of oxygen in the cells. Death usually followed within hours or days.
Modern medicine such as vaccines, antivirals, and antibiotics for secondary infections were not available at that time, so medical personnel couldn't do much more than try to relieve symptoms.
The flu ended when it had infected enough people that those who were susceptible had either died or developed immunity."""
    
    print("Checking text:")
    print(test_text)
    print("\nResults:")
    checker.print_corrections(test_text)

Checking text:
The deadlist virus in modern history, perhaps of all time, was the 1918 Spanish Flu. It killed about 20 to 50 million people worldwide, perhaps more. The total death toll is unknown because medical records were not kept in many areas.
The pandemic hit during World War I and devastated military troops. In the United States, for instance, more servicemen were killed from the flu than from the war itself. The Spanish flu was fatal to a higher proportion of young adults than most flu viruses.
The pandemic started mildly, in the spring of 1918, but was followed by a much more severe wave in the fall of 1918. The war likely contributed to the devastating mortality numbers, as large outbreaks occurred in military forces living in close quarters. Poor nutrition and the unsanitary conditions of war camps had an effect.
A third wave occurred in the winter and spring of 1919, and a fourth, smaller wave occurred in a few areas in spring 1920. Initial symptoms of the flu were typical

In [10]:

import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Error loading punkt: <urlopen error [WinError 10060] A
[nltk_data]     connection attempt failed because the connected party
[nltk_data]     did not properly respond after a period of time, or
[nltk_data]     established connection failed because connected host
[nltk_data]     has failed to respond>
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Thanh Minh\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [11]:
from textblob import TextBlob
import re

class TextBlobChecker:
    def __init__(self):
        pass
    
    def check_text(self, text):
        """
        Check text using TextBlob with enhanced error detection
        """
        blob = TextBlob(text)
        
        # Split into sentences for better analysis
        sentences = text.split('.')
        corrections = []
        
        # Process each sentence
        word_position = 1  # Keep track of global word position
        
        for sentence in sentences:
            if not sentence.strip():
                continue
                
            # Create TextBlob for this sentence
            sentence_blob = TextBlob(sentence)
            
            # Get original and corrected words
            original_words = re.findall(r'\b\w+\b', sentence)
            corrected_words = re.findall(r'\b\w+\b', str(sentence_blob.correct()))
            
            # Compare words and find differences
            for i, (orig, corr) in enumerate(zip(original_words, corrected_words)):
                if orig.lower() != corr.lower():
                    # Get word tags for context
                    tags = dict(sentence_blob.tags)
                    word_type = tags.get(orig, 'unknown')
                    
                    correction = {
                        'word': orig,
                        'suggestion': corr,
                        'position': word_position + i,
                        'word_type': word_type,
                        'confidence': self._calculate_confidence(orig, corr)
                    }
                    corrections.append(correction)
            
            word_position += len(original_words)
        
        return corrections
    
    def _calculate_confidence(self, original, correction):
        """
        Calculate a simple confidence score for the correction
        """
        # Calculate Levenshtein distance
        distance = self._levenshtein_distance(original.lower(), correction.lower())
        max_len = max(len(original), len(correction))
        
        # Convert to a confidence score (0-100)
        confidence = (1 - (distance / max_len)) * 100
        return round(confidence, 2)
    
    def _levenshtein_distance(self, s1, s2):
        """
        Calculate the Levenshtein distance between two strings
        """
        if len(s1) < len(s2):
            return self._levenshtein_distance(s2, s1)

        if len(s2) == 0:
            return len(s1)

        previous_row = range(len(s2) + 1)
        for i, c1 in enumerate(s1):
            current_row = [i + 1]
            for j, c2 in enumerate(s2):
                insertions = previous_row[j + 1] + 1
                deletions = current_row[j] + 1
                substitutions = previous_row[j] + (c1 != c2)
                current_row.append(min(insertions, deletions, substitutions))
            previous_row = current_row

        return previous_row[-1]
    
    def print_corrections(self, text):
        """
        Print corrections in a formatted way
        """
        try:
            corrections = self.check_text(text)
            
            if not corrections:
                print("No spelling or grammar issues found!")
                return
            
            print(f"Found {len(corrections)} potential issues:\n")
            
            for i, correction in enumerate(corrections, 1):
                print(f"Issue #{i}:")
                print(f"- Word: {correction['word']}")
                print(f"- Suggestion: {correction['suggestion']}")
                print(f"- Position: word #{correction['position']}")
                print(f"- Word Type: {correction['word_type']}")
                print(f"- Confidence: {correction['confidence']}%")
                print()
                
        except Exception as e:
            print(f"An error occurred while checking the text: {str(e)}")

# Example usage
if __name__ == "__main__":
    checker = TextBlobChecker()
    
    test_text = """The deadlist virus in modern history, perhaps of all time, was the 1918 Spanish Flu. 
    It killed about 20 to 50 million people worldwide, perhaps more. 
    The total death toll is unknown because medical records were not kept in many areas."""
    
    print("Checking text:")
    print(test_text)
    print("\nResults:")
    checker.print_corrections(test_text)

Checking text:
The deadlist virus in modern history, perhaps of all time, was the 1918 Spanish Flu. 
    It killed about 20 to 50 million people worldwide, perhaps more. 
    The total death toll is unknown because medical records were not kept in many areas.

Results:
An error occurred while checking the text: 
Looks like you are missing some required data for this feature.

To download the necessary data, simply run

    python -m textblob.download_corpora

or use the NLTK downloader to download the missing data: http://nltk.org/data.html
If this doesn't fix the problem, file an issue at https://github.com/sloria/TextBlob/issues.

