In [28]:
# =================================================================
# STEP 1: INITIALIZATION
# =================================================================
import pandas as pd
import textdistance
import re

# Model Configuration
CORPUS_PATH = 'words.txt'  
Q_VAL = 2                  # Character bigram level
TOP_K = 10                 # Number of suggestions to display

In [29]:
# =================================================================
# STEP 2: VOCABULARY GENERATION
# =================================================================
def load_vocabulary(path):
    with open(path, 'r', encoding='utf-8') as f:
        data = f.read().lower()
        # Extract unique words only to keep the search space efficient
        unique_words = list(set(re.findall(r'\w+', data)))
    
    print(f"Engine Ready: {len(unique_words)} unique words in vocabulary.")
    return unique_words

vocabulary = load_vocabulary(r"C:\Users\NAVEEN RAJ\OneDrive\Desktop\PRAVEEN RAJ S\python\Natural_Processing_Language\Word_Suggestion\words.txt")

Engine Ready: 8183 unique words in vocabulary.


In [30]:
# =================================================================
# STEP 3: SIMILARITY CALCULATION & FALLBACK
# =================================================================
def get_suggestions(input_word):
    query = input_word.lower().strip()
    
    # 1. Direct Match Check
    if query in vocabulary:
        return pd.DataFrame({
            'Suggested_Word': [query],
            'Similarity_Score': [1.0],
            'Match_Percentage': ['100%']
        }, index=[1])

    # 2. Compute Similarities
    similarities = [
        1 - textdistance.Jaccard(qval=Q_VAL).distance(v, query) 
        for v in vocabulary
    ]
    
    # 3. Compile Results
    results_df = pd.DataFrame({
        'Suggested_Word': vocabulary,
        'Similarity_Score': similarities
    })
    
    # 4. Filter and Rank
    suggestions = results_df[results_df['Similarity_Score'] > 0].sort_values(
        by='Similarity_Score', ascending=False
    ).head(TOP_K).reset_index(drop=True)
    
    # 5. Add Percentage Column (e.g., 0.666 -> 66.67%)
    suggestions['Match_Percentage'] = (suggestions['Similarity_Score'] * 100).round(2).astype(str) + '%'
    
    # 6. Shift Index to start from 1 instead of 0
    suggestions.index = suggestions.index + 1

    # 7. Fallback Logic: If no match found, suggest the original input
    if suggestions.empty:
        return pd.DataFrame({
            'Suggested_Word': [query],
            'Similarity_Score': [0.0],
            'Match_Percentage': ['0%']
        }, index=[1])
    
    return suggestions

In [33]:
# =================================================================
# STEP 4: INFERENCE
# =================================================================
user_input = "hel" 

# Execute Engine
final_output = get_suggestions(user_input)

print(f"Context Engine Suggestions for '{user_input}':")
display(final_output)

Context Engine Suggestions for 'hel':


Unnamed: 0,Suggested_Word,Similarity_Score,Match_Percentage
1,held,0.666667,66.67%
2,help,0.666667,66.67%
3,heel,0.666667,66.67%
4,he,0.5,50.0%
5,wheel,0.5,50.0%
6,helps,0.5,50.0%
7,heels,0.5,50.0%
8,helen,0.5,50.0%
9,shelf,0.5,50.0%
10,helped,0.4,40.0%
