### Import Required Libraries

In [1]:
import pandas as pd
import nltk
import re
import string
from nltk.stem import WordNetLemmatizer

# Download required NLTK data
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\athar\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\athar\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

### Load and Process the Text Dataset

In [2]:
# Read the CSV file
df = pd.read_csv("dict.csv")

In [3]:
# Print first 5 rows
df.head()

Unnamed: 0,word,definition
0,abbacy,"The word ""abbacy"" refers to the office or juri..."
1,abductor,"The word ""abductor"" refers to a person or thin..."
2,abas,"The word ""abas"" does not have a widely recogni..."
3,abasement,"The word ""abasement"" refers to the action or e..."
4,abampere,"The term ""abampere"" is a unit of electric curr..."


In [4]:
# Combine all words into a single string and convert to lowercase
text_data = df['word'].str.cat(sep=' ').lower()

In [5]:
# Extract all words using a regular expression
words = re.findall(r'\w+', text_data)

In [6]:
# Create a set of unique words (vocabulary)
vocab = set(words)

### Count Word Frequency

In [7]:
# Word Frequency
def count_word_frequency(words):
    word_count = {}
    for word in words:
        word_count[word] = word_count.get(word, 0) + 1
    return word_count

word_count = count_word_frequency(words)

### Calculate Word Probability

In [8]:
# Word Probability
def calculate_probability(word_count):
    total_words = sum(word_count.values())
    return {word: count / total_words for word, count in word_count.items()}

probabilities = calculate_probability(word_count)

### Define NLP-Based Functions

Here, we define a few helper functions that will generate possible corrections for misspelled words. These functions apply common spelling correction strategies such as:

1. Deleting a letter: removes a letter from the word.
2. Swapping adjacent letters: swaps adjacent letters in the word
3. Replacing a letter: replaces each letter with every other letter of the alphabet
4. Inserting a new letter: inserts a new letter at every position in the word

In [9]:
# Lemmatization
lemmatizer = WordNetLemmatizer()

def lemmatize_word(word):
    """Lemmatize a given word using NLTK WordNet Lemmatizer."""
    return lemmatizer.lemmatize(word)

def delete_letter(word):
    return [word[:i] + word[i+1:] for i in range(len(word))]

def swap_letters(word):
    return [word[:i] + word[i+1] + word[i] + word[i+2:] for i in range(len(word)-1)]

def replace_letter(word):
    letters = string.ascii_lowercase
    return [word[:i] + l + word[i+1:] for i in range(len(word)) for l in letters]

def insert_letter(word):
    letters = string.ascii_lowercase
    return [word[:i] + l + word[i:] for i in range(len(word)+1) for l in letters]

### Generate Candidate Corrections

In [10]:
# Candidate Correction
def generate_candidates(word):
    candidates = set()
    candidates.update(delete_letter(word))
    candidates.update(swap_letters(word))
    candidates.update(replace_letter(word))
    candidates.update(insert_letter(word))
    return candidates

def generate_candidates_level2(word):
    level1 = generate_candidates(word)
    level2 = set()
    for w in level1:
        level2.update(generate_candidates(w))
    return level2

### Get the Best Corrections

In [11]:
def get_best_correction(word, probs, vocab, max_suggestions=3):
    candidates = (
        [word] if word in vocab else list(generate_candidates(word).intersection(vocab)) or 
        list(generate_candidates_level2(word).intersection(vocab))
    )
    return sorted([(w, probs.get(w, 0)) for w in candidates], key=lambda x: x[1], reverse=True)[:max_suggestions]

### User Input & Output Suggestions

In [12]:
# Take user input
user_input = input("\n Enter a word for autocorrection: ")

In [13]:
# Display the user-entered word
print(f"\n You entered: {user_input}")


 You entered: Prwdiction


In [14]:
# Get the best corrections
suggestions = get_best_correction(user_input, probabilities, vocab, max_suggestions=3)

In [15]:
# Print the top suggestions
print("\n Top suggestions:")
for i, suggestion in enumerate(suggestions, start=1):
    print(f"{i}. {suggestion[0]}")


 Top suggestions:
1. prediction
