In [None]:
Doesn't support Boolean operators as it treats all terms equally and checks spellings according to positional index which is time consuming and computationally expensive to run in our case.

Below code treats whole query as single term which is incorrect.

In [None]:
import Levenshtein

# Sample positional index dictionary
positional_index = {
    'apple': {'df': 2, 'posting_list': {'doc1': {1, 5, 20}, 'doc2': {3, 8, 15}}},
    'banana': {'df': 1, 'posting_list': {'doc1': {2, 6, 10}}},
    'orange': {'df': 2, 'posting_list': {'doc2': {7, 11, 14}, 'doc3': {13, 18, 22}}},
    'grape': {'df': 1, 'posting_list': {'doc1': {4, 8, 13}}},
    'peach': {'df': 1, 'posting_list': {'doc2': {1, 9, 17}}}
}

def spell_check(query):
    closest_word = None
    min_distance = float('inf')

    # Check each word in the positional index
    for word in positional_index.keys():
        distance = Levenshtein.distance(query, word)
        if distance < min_distance:
            min_distance = distance
            closest_word = word

    return closest_word

# Example usage
query = 'appeland bana'
closest_word = spell_check(query)
if closest_word:
    print(f"Suggested spelling for '{query}': {closest_word}")
else:
    print(f"No suggestion found for '{query}'")


Suggested spelling for 'appeland bana': banana


Below code treats each term separately but has the complications discussed above

In [None]:
import Levenshtein

# Sample positional index dictionary
positional_index = {
    'apple': {'df': 2, 'posting_list': {'doc1': {1, 5, 20}, 'doc2': {3, 8, 15}}},
    'banana': {'df': 1, 'posting_list': {'doc1': {2, 6, 10}}},
    'orange': {'df': 2, 'posting_list': {'doc2': {7, 11, 14}, 'doc3': {13, 18, 22}}},
    'grape': {'df': 1, 'posting_list': {'doc1': {4, 8, 13}}},
    'peach': {'df': 1, 'posting_list': {'doc2': {1, 9, 17}}}
}

def spell_check_term(term):
    closest_word = None
    min_distance = float('inf')

    # Check each word in the positional index
    for word in positional_index.keys():
        distance = Levenshtein.distance(term, word)
        if distance < min_distance:
            min_distance = distance
            closest_word = word

    return closest_word

def spell_check_query(query):
    corrected_query = []
    # Split the query into individual terms
    terms = query.split()
    for term in terms:
        closest_word = spell_check_term(term)
        if closest_word:
            corrected_query.append(closest_word)
        else:
            corrected_query.append(term)
    return ' '.join(corrected_query)

# Example usage
query = 'appl AND bnnana orangge'
corrected_query = spell_check_query(query)
print(f"Suggested spelling for '{query}': {corrected_query}")


Suggested spelling for 'appl AND bnnana orangge': apple apple banana orange


**Don't use below code as it doesn't work for multiple languages **

In [None]:
import spellchecker
from spellchecker import SpellChecker

def spell_check_query(query):
    spell = SpellChecker()
    corrected_query = []
    # Split the query into individual terms
    terms = query.split()
    for term in terms:
        corrected_term = spell.correction(term)
        corrected_query.append(corrected_term)
    return ' '.join(corrected_query)

# Example usage
query = 'Mieuz vaut tard que jamais.'
corrected_query = spell_check_query(query)
print(f"Suggested spelling for '{query}': {corrected_query}")


Suggested spelling for 'Mieuz vaut tard que jamais.': minus vast hard due jamaica


In [None]:
pip install pyspellchecker


Collecting pyspellchecker
  Downloading pyspellchecker-0.8.1-py3-none-any.whl.metadata (9.4 kB)
Downloading pyspellchecker-0.8.1-py3-none-any.whl (6.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m55.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pyspellchecker
Successfully installed pyspellchecker-0.8.1
Note: you may need to restart the kernel to use updated packages.


Use below code for our project

After careful consideration, i implemented logic to only English language and keep other languages the same without checking, as it is very complicated to include multiple languages for this task and only degenerates the task and system if i try including multiple languages.

I have used weighted systems which further enhance the task.

In [None]:
from spellchecker import SpellChecker
from autocorrect import Speller
from langdetect import detect
from textblob import TextBlob

def weighted_spell_check_query(query):
    corrected_query = []
    # Split the query into individual terms
    terms = query.split()
    for term in terms:
        language = detect(term)
        if language == 'en':  # Check if the term is in English
            # Weighted spell check using multiple libraries
            corrected_term = weighted_spell_check_en(term)
        else:
            corrected_term = term  # Retain word if it's not in English
        corrected_query.append(corrected_term)
    return ' '.join(corrected_query)

def weighted_spell_check_en(term):
    # Weighted spell check using multiple libraries
    spellchecker = SpellChecker()
    autocorrect = Speller(lang='en')
    textblob = TextBlob(term)

    # Calculate weights for each library
    spellchecker_weight = 0.4
    autocorrect_weight = 0.3
    textblob_weight = 0.3

    # Spell check using each library
    spellchecker_correction = spellchecker.correction(term)
    autocorrect_correction = autocorrect(term)
    textblob_correction = str(textblob.correct())

    # Calculate weighted correction
    corrected_term = (
        spellchecker_weight * spellchecker_correction +
        autocorrect_weight * autocorrect_correction +
        textblob_weight * textblob_correction
    )

    return corrected_term

# Example usage
query = 'Mieuz vaut tard que jamais shhsh'
corrected_query = weighted_spell_check_query(query)
print(f"Suggested spelling for '{query}': {corrected_query}")


Suggested spelling for 'Mieuz vaut tard que jamais shhsh': Mieuz vaut tard que jamais shhsh


In [None]:
pip install langdetect


Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25ldone
[?25h  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993225 sha256=59a421337447d557eff2de2899c734c0b1e8e61cb593671adafc7d33522da438
  Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.9
Note: you may need to restart the kernel to use updated packages.
