In [1]:
import requests
import time

def check_grammar(text, language="en-US", max_retries=3):
    """
    Check text for grammar and spelling errors using LanguageTool API
    
    Args:
        text (str): Text to check
        language (str): Language code (default: en-US)
        max_retries (int): Maximum number of retry attempts
    
    Returns:
        list: List of errors with suggestions
    """
    # List of API endpoints to try
    api_urls = [
        "https://api.languagetool.org/v2/check",
        "https://languagetool.org/api/v2/check",
        "http://localhost:8081/v2/check"  # If running local LanguageTool server
    ]
    
    params = {
        'text': text,
        'language': language,
        'enabledOnly': False
    }
    
    for url in api_urls:
        retries = 0
        while retries < max_retries:
            try:
                print(f"Trying to connect to: {url}")
                response = requests.post(url, data=params, timeout=10)
                response.raise_for_status()
                result = response.json()
                
                errors = []
                for match in result.get('matches', []):
                    error = {
                        'message': match['message'],
                        'context': match['context']['text'],
                        'suggestions': [fix['value'] for fix in match.get('replacements', [])[:3]],
                        'rule': match['rule']['description']
                    }
                    errors.append(error)
                
                return errors
                
            except requests.exceptions.ConnectionError:
                print(f"Connection error. Retrying... ({retries + 1}/{max_retries})")
                retries += 1
                time.sleep(2)  # Wait 2 seconds before retrying
                
            except requests.exceptions.Timeout:
                print(f"Request timed out. Retrying... ({retries + 1}/{max_retries})")
                retries += 1
                time.sleep(2)
                
            except requests.exceptions.RequestException as e:
                print(f"Error with {url}: {str(e)}")
                break  # Try next URL
                
        print(f"Failed to connect to {url}, trying next endpoint...")
    
    return None

def print_errors(errors):
    """
    Print errors in a readable format
    
    Args:
        errors (list): List of errors from check_grammar()
    """
    if errors is None:
        print("\nKhông thể kết nối với API. Vui lòng thử lại sau.")
        return
        
    if not errors:
        print("No errors found!")
        return
        
    print("Found the following potential errors:\n")
    for i, error in enumerate(errors, 1):
        print(f"Error {i}:")
        print(f"- Issue: {error['message']}")
        print(f"- Context: \"{error['context']}\"")
        if error['suggestions']:
            print(f"- Suggestions: {', '.join(error['suggestions'])}")
        print(f"- Rule: {error['rule']}\n")

# Example usage
if __name__ == "__main__":
    text = """i is smart"""
    print("Checking text:", text)
    print("-" * 50)
    
    errors = check_grammar(text)
    print_errors(errors)

Checking text: i is smart
--------------------------------------------------
Trying to connect to: https://api.languagetool.org/v2/check
Found the following potential errors:

Error 1:
- Issue: The personal pronoun “I” should be uppercase.
- Context: "i is smart"
- Suggestions: I
- Rule: i vs. I

Error 2:
- Issue: Did you mean “am” or “will be”?
- Context: "i is smart"
- Suggestions: am, will be
- Rule: Agreement: 'I is / you is / ... '



In [26]:
import re 
import tkinter as tk 
from tkinter.scrolledtext import ScrolledText 

import nltk
from nltk.corpus import words

nltk.download("words")

class SpellingChecker:

    def init(self):
        self.root = tk.Tk()
        self.root.geometry("600x500")

        self.text = ScrolledText(self.root, font=("Arial", 14))
        self.text.bind("<KeyRelease>", self.check)
        self.text.pack()

        self.old_spaces = 0

        self.root.mainloop()
    
    def check(self, event):
        content = self.text.get("1.0", tk.END)
        space_count = content.count(" ")

        for tag in self.text.tag_names():
            self.text.tag_delete(tag)

        if space_count != self.old_spaces:
            self.old_spaces = space_count
            for word in content.split(" "):
                if re.sub(r"[^\w]", "", word.lower()) not in words():
                    position = content.find(word)
                    self.text.tag_add(word, f"1.{position}", f"1.{position + len(word)}")
                    self.text.tag_config(word, foreground="red")

SpellingChecker()

[nltk_data] Downloading package words to
[nltk_data]     C:\Users\Acer\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


<__main__.SpellingChecker at 0x17b6453eae0>

In [12]:
from spellchecker import SpellChecker

class SpellingChecker:
    def __init__(self):
        self.spell = SpellChecker()
    
    def check_text(self, text):
        """
        Check spelling in a text and return corrections
        """
        # Split text into words and keep track of their positions
        words = text.split()
        word_positions = {}
        
        # Store positions for each word
        for index, word in enumerate(words):
            # Remove punctuation from word for checking
            clean_word = word.strip('.,!?:;()[]{}""''')
            if clean_word not in word_positions:
                word_positions[clean_word] = []
            word_positions[clean_word].append(index)
        
        # Find misspelled words
        misspelled = self.spell.unknown([word.strip('.,!?:;()[]{}""''') for word in words])
        
        # Store corrections
        corrections = []
        for word in misspelled:
            # Get all positions for this misspelled word
            positions = word_positions.get(word, [])
            
            correction = {
                'word': word,
                'suggestions': list(self.spell.candidates(word)),
                'positions': positions  # Now storing all positions
            }
            corrections.append(correction)
            
        return corrections

    def print_corrections(self, text):
        """
        Print spelling corrections in a formatted way
        """
        try:
            corrections = self.check_text(text)
            
            if not corrections:
                print("No spelling mistakes found!")
                return
            
            print(f"Found {len(corrections)} spelling mistakes:\n")
            
            for i, correction in enumerate(corrections, 1):
                print(f"Mistake #{i}:")
                print(f"- Word: {correction['word']}")
                print(f"- Suggestions: {', '.join(correction['suggestions'][:5])}")
                # Print all positions where the word appears
                positions_str = ', '.join(str(pos + 1) for pos in correction['positions'])
                print(f"- Position(s): word #{positions_str}")
                print()
                
        except Exception as e:
            print(f"An error occurred while checking the text: {str(e)}")

# Example usage
if __name__ == "__main__":
    checker = SpellingChecker()
    
    test_text = """The deadliest virus in modern history, perhaps of all time, was the 1918 Spanish Flu. It killed about 20 to 50 million people worldwide, perhaps more. The total death toll is unknown because medical records were not kept in many areas.
The pandemic hit during World War I and devastated military troops. In the United States, for instance, more servicemen were killed from the flu than from the war itself. The Spanish flu was fatal to a higher proportion of young adults than most flu viruses.
The pandemic started mildly, in the spring of 1918, but was followed by a much more severe wave in the fall of 1918. The war likely contributed to the devastating mortality numbers, as large outbreaks occurred in military forces living in close quarters. Poor nutrition and the unsanitary conditions of war camps had an effect.
A third wave occurred in the winter and spring of 1919, and a fourth, smaller wave occurred in a few areas in spring 1920. Initial symptoms of the flu were typical: sore throat, headache, and fever. The flu often progressed rapidly to cause severe pneumonia and sometimes hemorrhage in the lungs and mucus membranes. A characteristic feature of severe cases of the Spanish Flu was heliotrope cyanosis, where the patient's face turned blue from lack of oxygen in the cells. Death usually followed within hours or days.
Modern medicine such as vaccines, antivirals, and antibiotics for secondary infections were not available at that time, so medical personnel couldn't do much more than try to relieve symptoms.
The flu ended when it had infected enough people that those who were susceptible had either died or developed immunity."""
    
    print("Checking text:")
    print(test_text)
    print("\nResults:")
    checker.print_corrections(test_text)

Checking text:
The deadliest virus in modern history, perhaps of all time, was the 1918 Spanish Flu. It killed about 20 to 50 million people worldwide, perhaps more. The total death toll is unknown because medical records were not kept in many areas.
The pandemic hit during World War I and devastated military troops. In the United States, for instance, more servicemen were killed from the flu than from the war itself. The Spanish flu was fatal to a higher proportion of young adults than most flu viruses.
The pandemic started mildly, in the spring of 1918, but was followed by a much more severe wave in the fall of 1918. The war likely contributed to the devastating mortality numbers, as large outbreaks occurred in military forces living in close quarters. Poor nutrition and the unsanitary conditions of war camps had an effect.
A third wave occurred in the winter and spring of 1919, and a fourth, smaller wave occurred in a few areas in spring 1920. Initial symptoms of the flu were typica

In [10]:

import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Error loading punkt: <urlopen error [WinError 10060] A
[nltk_data]     connection attempt failed because the connected party
[nltk_data]     did not properly respond after a period of time, or
[nltk_data]     established connection failed because connected host
[nltk_data]     has failed to respond>
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Thanh Minh\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [6]:
from flair.data import Sentence
from flair.models import SequenceTagger

# load tagger
tagger = SequenceTagger.load("flair/upos-english")

# make example sentence
sentence = Sentence("I love Berlin.")

# predict NER tags
tagger.predict(sentence)

# print sentence
print(sentence)

# print predicted NER spans
print('The following NER tags are found:')
# iterate over entities and print
for entity in sentence.get_spans('pos'):
    print(entity)


2025-01-10 15:03:44,658 SequenceTagger predicts: Dictionary with 19 tags: <unk>, NOUN, VERB, PUNCT, ADP, DET, PROPN, PRON, ADJ, ADV, CCONJ, PART, NUM, AUX, INTJ, SYM, X, <START>, <STOP>
Sentence[4]: "I love Berlin." → ["I"/PRON, "love"/VERB, "Berlin"/PROPN, "."/PUNCT]
The following NER tags are found:


In [3]:
import spacy
from spacy import displacy

# Load English language model
nlp = spacy.load("en_core_web_sm")

# Process the text
doc = nlp("Jack messed up his room")

# Print dependency information
print("\nDependency Parse:")
for token in doc:
    print(f"{token.text:12} {token.dep_:12} {token.head.text}")

# Configure visualization options
options = {
    "compact": False,
    "bg": "#ffffff",
    "color": "#000000",
    "font": "Arial",
    "arrow_spacing": 20,
    "arrow_width": 2,
    "distance": 120,
    "offset_x": 50,
    "word_spacing": 40,
}

# Generate and display syntax tree visualization
html = displacy.render(doc, style="dep", options=options)

# Save visualization to file
with open("syntax_tree.html", "w", encoding="utf-8") as f:
    f.write("""
    <html>
    <head>
        <title>Syntax Tree</title>
        <style>
            body { margin: 20px; }
            .syntax-tree { border: 1px solid #ccc; padding: 20px; }
        </style>
    </head>
    <body>
        <div class="syntax-tree">
    """)
    f.write(html)
    f.write("""
        </div>
    </body>
    </html>
    """)

# Print detailed syntactic analysis
print("\nDetailed Syntactic Analysis:")
for token in doc:
    print(f"""
Token: {token.text}
    Dependency: {token.dep_}
    Head word: {token.head.text}
    Part of speech: {token.pos_}
    Syntactic tag: {token.tag_}
    Detailed tag: {spacy.explain(token.tag_)}
    Dependency explanation: {spacy.explain(token.dep_)}
    Children: {[child.text for child in token.children]}
    """)

# Print phrase structure
print("\nPhrase Structure:")
for chunk in doc.noun_chunks:
    print(f"""
Noun Phrase: {chunk.text}
    Root text: {chunk.root.text}
    Root dep_: {chunk.root.dep_}
    Root head text: {chunk.root.head.text}
""")


Dependency Parse:
Jack         nsubj        messed
messed       ROOT         messed
up           prt          messed
his          poss         room
room         dobj         messed


TypeError: write() argument must be str, not None