In [2]:
# Install the required packages
%pip install transformers symspellpy

from transformers import T5ForConditionalGeneration, T5Tokenizer
from symspellpy import SymSpell, Verbosity
import pkg_resources

Note: you may need to restart the kernel to use updated packages.


In [3]:

# Load Pre-trained Grammar Correction Model
model_name = "vennify/t5-base-grammar-correction"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [4]:
# Load SymSpell for Spelling Correction
symspell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
dictionary_path = pkg_resources.resource_filename(
    "symspellpy", "frequency_dictionary_en_82_765.txt"
)
symspell.load_dictionary(dictionary_path, term_index=0, count_index=1)

True

In [5]:
def correct_spelling(text):
    """Fix spelling errors."""
    words = text.split()
    corrected_words = []
    for word in words:
        suggestions = symspell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)
        corrected_word = suggestions[0].term if suggestions else word
        corrected_words.append(corrected_word)
    return " ".join(corrected_words)

In [6]:
def correct_grammar(text):
    """Fix grammar errors."""
    inputs = tokenizer(text, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=len(text.split()) + 10)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [7]:
# Interactive Testing
input_text = "She go to the park evryday."
print(f"Original: {input_text}")

Original: She go to the park evryday.


In [8]:
# Fix spelling first
spelling_fixed = correct_spelling(input_text)
print(f"Spelling Fixed: {spelling_fixed}")

Spelling Fixed: the go to the park everyday


In [9]:
# Fix grammar next
grammar_fixed = correct_grammar(spelling_fixed)
print(f"Final Correction: {grammar_fixed}")

Final Correction: Go to the park everyday and go to the park every day.
