In [6]:
import re

corpus = """
Artificial intelligence (AI) is intelligence demonstrated by machines,
as opposed to the natural intelligence displayed by animals including humans.
Leading AI textbooks define the field as the study of intelligent agents,
which refers to any system that perceives its environment and takes actions
that maximize its chance of successfully achieving its goals.
"""

def build_vocabulary(text):
    """Processes text to create a set of unique words."""
    text = text.lower()
    words = re.findall(r'\b\w+\b', text)
    return set(words)

vocabulary = build_vocabulary(corpus)

print(vocabulary)
print(f"\nVocabulary size: {len(vocabulary)} words")

{'define', 'study', 'agents', 'any', 'machines', 'environment', 'including', 'textbooks', 'intelligent', 'demonstrated', 'perceives', 'artificial', 'chance', 'that', 'actions', 'by', 'its', 'animals', 'opposed', 'displayed', 'natural', 'takes', 'ai', 'goals', 'is', 'maximize', 'achieving', 'the', 'of', 'humans', 'refers', 'field', 'successfully', 'to', 'leading', 'system', 'and', 'which', 'as', 'intelligence'}

Vocabulary size: 40 words


In [7]:
def check_spelling(sentence, vocabulary):
    """Checks each word in a sentence and identifies errors."""
    
    corrected_sentence = []
    words = re.findall(r'\b\w+\b', sentence.lower())
    
    for word in words:
        if word in vocabulary:
            print(f"'{word}' is spelled correctly.")
            corrected_sentence.append(word)
        else:
            print(f"'{word}' is a spelling error!")
            corrected_sentence.append(f"[{word}]") 
            
    return " ".join(corrected_sentence)

test_sentence = "Artificil intellijence is demonstrated by macheens"
result = check_spelling(test_sentence, vocabulary)

print("\nOriginal Sentence:", test_sentence)
print("Checked Sentence:", result)

'artificil' is a spelling error!
'intellijence' is a spelling error!
'is' is spelled correctly.
'demonstrated' is spelled correctly.
'by' is spelled correctly.
'macheens' is a spelling error!

Original Sentence: Artificil intellijence is demonstrated by macheens
Checked Sentence: [artificil] [intellijence] is demonstrated by [macheens]


In [8]:
import re

corpus = """
Artificial intelligence (AI) is intelligence demonstrated by machines,
as opposed to the natural intelligence displayed by animals including humans.
Leading AI textbooks define the field as the study of intelligent agents,
which refers to any system that perceives its environment and takes actions
that maximize its chance of successfully achieving its goals.
"""

def build_vocabulary(text):
    text = text.lower()
    words = re.findall(r'\b\w+\b', text)
    return set(words)

vocabulary = build_vocabulary(corpus)

def levenshtein_distance(s1, s2):
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)
    if len(s2) == 0:
        return len(s1)
    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    return previous_row[-1]

def spelling_corrector(sentence, vocabulary):
    """Checks and corrects a sentence using Levenshtein distance."""
    corrected_sentence = []
    words = re.findall(r'\b\w+\b', sentence.lower())
    
    for word in words:
        if word in vocabulary:
            corrected_sentence.append(word)
        else:
            min_distance = float('inf')
            best_correction = word 
            
            for vocab_word in vocabulary:
                distance = levenshtein_distance(word, vocab_word)
                if distance < min_distance:
                    min_distance = distance
                    best_correction = vocab_word
            
            corrected_sentence.append(best_correction)
            
    return " ".join(corrected_sentence)

test_sentence = "Artificil intellijence is demonstrated by macheens"
result = spelling_corrector(test_sentence, vocabulary)

print("Original Sentence:", test_sentence)
print("Corrected Sentence:", result)

Original Sentence: Artificil intellijence is demonstrated by macheens
Corrected Sentence: artificial intelligence is demonstrated by machines
