In [26]:
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

# Sample user input
user_input = "The quick brown fox jumps over the lazy dog."

# Preprocess the input by tokenizing and removing stopwords
def preprocess_input(text):
    # Tokenize the input
    tokens = word_tokenize(text.lower())
    
    # Remove punctuation
    tokens = [re.sub(r'\W+', '', token) for token in tokens if token.isalpha()]
    
    # Remove stopwords
    tokens = [token for token in tokens if token not in stopwords.words('english')]
    
    return tokens




In [2]:
pip install fuzzywuzzy

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0
Note: you may need to restart the kernel to use updated packages.


In [5]:
# Assuming you have the Trie class defined like this (from previous steps):
class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end_of_word = False

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char]
        node.is_end_of_word = True

    def search(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                return None
            node = node.children[char]
        return node if node.is_end_of_word else None

# Instantiate Trie and insert sample words
trie = Trie()
sample_words = ["example", "test", "string", "matching", "run", "she", "he", "quickly"]
for word in sample_words:
    trie.insert(word)

# Now, we can define the StringMatcher
from fuzzywuzzy import fuzz

class StringMatcher:
    def __init__(self, trie):
        self.trie = trie

    def match_word(self, input_word):
        node = self.trie.search(input_word)
        if node:
            return input_word  # Exact match found
        
        # Fuzzy match if exact match not found
        possible_matches = []
        for word in self.get_all_words():
            similarity = fuzz.ratio(input_word, word)
            if similarity > 70:  # Threshold for fuzzy match
                possible_matches.append((word, similarity))
        
        # Sort matches based on similarity and return the best match
        if possible_matches:
            possible_matches.sort(key=lambda x: x[1], reverse=True)
            return possible_matches[0][0]
        return None

    def get_all_words(self):
        # Function to retrieve all words in the trie
        words = []
        self.dfs(self.trie.root, "", words)
        return words

    def dfs(self, node, prefix, words):
        if node.is_end_of_word:
            words.append(prefix)
        for char, child_node in node.children.items():
            self.dfs(child_node, prefix + char, words)


