In [1]:
#Setting Up Grammar Correction Model


In [2]:
# We’ll use T5 or BART (fine-tuned on grammar correction datasets)

In [3]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Loading the model and tokenizer
model_name = "facebook/bart-large-cnn"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

def correct_grammar(text):
    inputs = tokenizer("grammar: " + text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(**inputs)
    corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return corrected_text

# Testing the correction function
print(correct_grammar("This sentence are incorrect."))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

grammar: This sentence is incorrect. This sentence was originally published in The Guardian. We are happy to point out that this is not the case. We apologise for any confusion caused by the incorrect spelling of the word ‘crisp’ or ‘crust’.


In [4]:
# Adding Formal vs. Casual Writing Styles

In [5]:
# We can modify the model prompt to suggest formal or casual alternatives.


In [6]:
def rewrite_text(text, style="formal"):
    prompt = f"{style}: {text}"
    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(**inputs)
    rewritten_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return rewritten_text

print(rewrite_text("Hey, what's up?", "formal"))


formal: Hey, what's up? Formal: What's up, dude? Formally: What are you up to? What's going on? What do you want to do? What are your plans for the day? What is your plan for the evening? What about the morning?


In [7]:
# Speeding Up Corrections Using FAISS

In [8]:
# We'll store & retrieve common corrections using FAISS for quick suggestions

In [12]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Loading embedding model
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

# Sample corrections
corrections = [
    ("I has a apple.", "I have an apple."),
    ("She go to school.", "She goes to school."),
]

# Creating FAISS index
correction_texts = [c[0] for c in corrections]
correction_embeddings = np.array([embed_model.encode(c[0]) for c in corrections]).astype('float32')

index = faiss.IndexFlatL2(correction_embeddings.shape[1])
index.add(correction_embeddings)

def retrieve_correction(text):
    embedding = np.array([embed_model.encode(text)]).astype('float32')
    _, idx = index.search(embedding, 1)
    return corrections[idx[0][0]][1]

print(retrieve_correction("She go to school."))


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

She goes to school.
