# **1. Build a Frequency dictionary from the Input Corpus**

In [25]:
with open('/kaggle/input/corpus/corpus_full.txt', 'r') as file:
    corpus = file.read()
words = corpus.split()
word_freq = {}

for word in words:
    if word in word_freq:
        word_freq[word] += 1
    else:
        word_freq[word] = 1

In [26]:
def check_no_mistake(input_str): #checks if the input word is correct and already present in our dictionary
    for key,val in word_freq.items():
        if input_str == key:
            return True
    return False

# **2. Handle all the 4 differen cases mentioned in problem statement**
* Deletion
* Replacement
* Transposition
* Insertion 

In [27]:
def delete_char_pairs(word): #to handle delete scenario like (heello - hello)
    deletes = []
    for i in range(len(word)):
        deletes.append(word[:i] + word[i+1:])
    return list(set(deletes))

In [28]:
def replace(word): #to handle replacement scenario like (accedent - accident)
    all_chars = 'abcdefghijklmnopqrstuvwxyz'
    replacements = []
    for i in range(len(word)):
        for letter in all_chars:
            updated_word = word[:i] + letter + word[i+1:]
            if updated_word in word_freq:
                replacements.append(updated_word)
    return list(set(replacements))

In [29]:
def transpose(word): #to handle transpose scenario like (nomrally - normally)
    transposed = []
    for i in range(len(word)-1):
        updated_word = swap(word, i, i+1)
        transposed.append(updated_word)
    return list(set(transposed))

def swap(word, i, j): #helper function to transpose in swapping adjacent characters
    l = list(word)
    l[i], l[j] = l[j], l[i]
    return ''.join(l)


In [30]:
def insert(word): #to handle insert scenario like (ordnary - ordinary)
    all_chars = 'abcdefghijklmnopqrstuvwxyz'
    insertions = []
    for i in range(len(word)):
        for letter in all_chars:
            updated_word = word[:i] + letter + word[i:]
            if updated_word in word_freq:
                insertions.append(updated_word)
    return list(set(insertions))


In [31]:
def words_generator(word): #invokes all the above functions to generate all the possible words
    delete_words = delete_char_pairs(word) 
    transpose_words = transpose(word)
    replace_words = replace(word) 
    insert_words = insert(word) 
    return list(set(delete_words + transpose_words + replace_words + insert_words))


In [32]:
def get_correct_word(word): #returns the correct word for the given input
    check = check_no_mistake(word)
    if check:
        return word
    res_list = words_generator(word)
    final_res = [c for c in res_list if c in word_freq]
    if final_res:
        best_word = ""
        best_freq = 0
        for word in final_res:
            if word in word_freq and word_freq[word] > best_freq:
                best_word = word
                best_freq = word_freq[word]
                ######
        best_words = []
        for word in final_res:
            if word in word_freq and word_freq[word] == best_freq:
                best_words.append(word)
        best_words.sort()
        if len(best_words) > 0:
            return best_words[0]
        else:
            return word
    else:
        return word

# 3. User Function

In [35]:
n = int(input())
for i in range(n):
    word = input().strip().lower()
    print(f'Input:{word}')
    print(get_correct_word(word))


 5
 contan


Input:contan
contain


 seroius


Input:seroius
serious


 pureli


Input:pureli
purely


 dose


Input:dose
dose


 note


Input:note
note


# 4. Application in Real Time

In [34]:
sample_str = "waling in the ground is a graet wya to exrcise and is goodd for heelth ."
sample_str = sample_str.lower()
print(f"Input sentence: {sample_str}")
res = ""
for word in sample_str.split():
    res = res +get_correct_word(word)+" "
print(f"Correct Sentence: {res}")
    

Input sentence: waling in the ground is a graet wya to exrcise and is goodd for heelth .
Correct Sentence: walking in the ground is a great way to exercise and is good for health . 
