# Step 1: Import Required Libraries

In [15]:
import nltk
import re
import string
from nltk.stem import WordNetLemmatizer
from google.colab import files

# Download required NLTK data
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

# Step 2: Load and Process the Text Dataset

In [16]:
print("Upload your text dataset (final.txt)")
uploaded = files.upload()

file_name = list(uploaded.keys())[0]
with open(file_name, 'r', encoding="utf8") as f:
    text_data = f.read().lower()
    words = re.findall(r'\w+', text_data)

vocab = set(words)

Upload your text dataset (final.txt)


Saving final.txt to final (1).txt


# Step 3: Count Word Frequency

In [17]:
def count_word_frequency(words):
    word_count = {}
    for word in words:
        word_count[word] = word_count.get(word, 0) + 1
    return word_count

word_count = count_word_frequency(words)

# Step 4: Calculate Word Probability

In [18]:
def calculate_probability(word_count):
    total_words = sum(word_count.values())
    return {word: count / total_words for word, count in word_count.items()}

probabilities = calculate_probability(word_count)

# Step 5: Define NLP-Based Functions

In [19]:
lemmatizer = WordNetLemmatizer()

def lemmatize_word(word):
    """Lemmatize a given word using NLTK WordNet Lemmatizer."""
    return lemmatizer.lemmatize(word)

def delete_letter(word):
    return [word[:i] + word[i+1:] for i in range(len(word))]

def swap_letters(word):
    return [word[:i] + word[i+1] + word[i] + word[i+2:] for i in range(len(word)-1)]

def replace_letter(word):
    letters = string.ascii_lowercase
    return [word[:i] + l + word[i+1:] for i in range(len(word)) for l in letters]

def insert_letter(word):
    letters = string.ascii_lowercase
    return [word[:i] + l + word[i:] for i in range(len(word)+1) for l in letters]

# Step 6: Generate Candidate Corrections

In [20]:
def generate_candidates(word):
    candidates = set()
    candidates.update(delete_letter(word))
    candidates.update(swap_letters(word))
    candidates.update(replace_letter(word))
    candidates.update(insert_letter(word))
    return candidates

def generate_candidates_level2(word):
    level1 = generate_candidates(word)
    level2 = set()
    for w in level1:
        level2.update(generate_candidates(w))
    return level2

# Step 7: Get the Best Corrections

In [21]:
def get_best_correction(word, probs, vocab, max_suggestions=3):
    candidates = (
        [word] if word in vocab else list(generate_candidates(word).intersection(vocab)) or
        list(generate_candidates_level2(word).intersection(vocab))
    )
    return sorted([(w, probs.get(w, 0)) for w in candidates], key=lambda x: x[1], reverse=True)[:max_suggestions]

# Step 8: User Input & Output Suggestions

In [23]:
user_input = input("\n Enter a word for autocorrection: ")
suggestions = get_best_correction(user_input, probabilities, vocab, max_suggestions=5)

print("\n Top suggestions:")
for suggestion in suggestions:
    print(suggestion[0])


 Enter a word for autocorrection: athem

 Top suggestions:
them
athe
athes
