# Imports

In [25]:
import spacy
import numpy as np
from tabulate import tabulate

# Loading Model and Phrases

In [26]:
nlp = spacy.load("en_core_web_md")

correct_phrases = [
    "machine learning",
    "deep learning",
    "reinforcement learning",
    "natural language processing",
    "computer vision",
    "support vector machine",
    "decision tree classifier",
    "neural networks",
    "convolutional neural network",
    "recurrent neural network",
    "gradient descent algorithm",
    "unsupervised learning",
    "supervised learning",
    "transfer learning",
    "generative adversarial networks",
    "transformers in NLP",
    "bert model",
    "ai in healthcare",
    "self-driving cars",
    "object detection",
    "face recognition"
]

# K-gram

In [27]:
def jaccard_similarity(str1, str2, n=3):
    ngrams1 = set([str1[i:i+n] for i in range(len(str1)-n+1)])
    ngrams2 = set([str2[i:i+n] for i in range(len(str2)-n+1)])
    return len(ngrams1 & ngrams2) / len(ngrams1 | ngrams2) if ngrams1 | ngrams2 else 0

# Levenshtein Distance

In [28]:
def levenshtein_distance(s1, s2):
    dp = np.zeros((len(s1)+1, len(s2)+1), dtype=int)
    for i in range(len(s1)+1): dp[i][0] = i
    for j in range(len(s2)+1): dp[0][j] = j
    for i in range(1, len(s1)+1):
        for j in range(1, len(s2)+1):
            cost = 0 if s1[i-1] == s2[j-1] else 1
            dp[i][j] = min(dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1]+cost)
    return dp[-1][-1]

# Semantic Similarity

In [29]:
def semantic_similarity(q1, q2):
    return nlp(q1).similarity(nlp(q2))

# Sound Match

In [30]:
def simple_sound_match(w1, w2):
    return 1.0 if w1[0] == w2[0] else 0.0

# Correcting the Phrase

In [31]:
def correct_phrase(user_input, top_k=3):
    scored = []
    for phrase in correct_phrases:
        jaccard = jaccard_similarity(user_input, phrase)
        edit_dist = levenshtein_distance(user_input, phrase)
        noise_score = 1 / (1 + edit_dist)
        semantic = semantic_similarity(user_input, phrase)
        sound = simple_sound_match(user_input, phrase)

        total = 0.4 * jaccard + 0.4 * noise_score + 0.15 * semantic + 0.05 * sound

        scored.append({
            "phrase": phrase,
            "total": total,
            "jaccard": jaccard,
            "noise": noise_score,
            "semantic": semantic,
            "sound": sound
        })

    top = sorted(scored, key=lambda x: x["total"], reverse=True)[:top_k]
    return top

# Execution

In [32]:
user_input = input("Enter your phrase: ")
top_results = correct_phrase(user_input)

# Filling table data
table_data = []
for result in top_results:
    table_data.append([
        result["phrase"],
        f"{result['total']:.4f}",
        f"{result['jaccard']:.4f}",
        f"{result['noise']:.4f}",
        f"{result['semantic']:.4f}",
        f"{result['sound']:.2f}"
    ])

print(f"Input: {user_input}")
headers = ["Suggested Phrase", "Final Score", "Jaccard (k-gram)", "Noise (edit)", "Context (semantic)", "Sound"]
print("\nTop Suggestions:\n")
print(tabulate(table_data, headers=headers, tablefmt="fancy_grid"))

Input: machin lernng

Top Suggestions:

╒════════════════════════╤═══════════════╤════════════════════╤════════════════╤══════════════════════╤═════════╕
│ Suggested Phrase       │   Final Score │   Jaccard (k-gram) │   Noise (edit) │   Context (semantic) │   Sound │
╞════════════════════════╪═══════════════╪════════════════════╪════════════════╪══════════════════════╪═════════╡
│ machine learning       │        0.2374 │             0.25   │         0.25   │              -0.0838 │       1 │
├────────────────────────┼───────────────┼────────────────────┼────────────────┼──────────────────────┼─────────┤
│ support vector machine │        0.0681 │             0.1481 │         0.0526 │              -0.0816 │       0 │
├────────────────────────┼───────────────┼────────────────────┼────────────────┼──────────────────────┼─────────┤
│ deep learning          │        0.0523 │             0.0476 │         0.1111 │              -0.0745 │       0 │
╘════════════════════════╧═══════════════╧══════