# Building a search engine prototype with Trie and Hashing

We start by defining the Trie (for prefix-based search/autocomplete) and a SearchIndexer class that also maintains a Hash Map for quick keyword-to-URL lookups.

**🔷 Theory:**

A **Trie is a tree-like data structure that stores strings character by character.**

It's perfect for prefix-based queries, e.g., "pro" matches "program", "programming", etc.

A **Hash Map is used to quickly retrieve documents/URLs** associated with each keyword.



In [None]:
# --------- trie.py (inline) ----------------

class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end_of_word = False
        self.suggestions = set()

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word, reference):
        node = self.root
        for char in word.lower():
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char]
            node.suggestions.add(reference)
        node.is_end_of_word = True

    def search(self, prefix):
        node = self.root
        for char in prefix.lower():
            if char not in node.children:
                return set()
            node = node.children[char]
        return node.suggestions

# --------- indexer.py (inline) ------------------

#This class acts as our Search Engine's Brain — storing all the keywords,
# mapping them to URLs, and connecting both the Trie and Hash Map.

class SearchIndexer:
    def __init__(self):
        self.trie = Trie()
        self.keyword_map = {}

    def add_document(self, keyword, url):
        self.trie.insert(keyword, keyword)  # use keyword as reference
        if keyword not in self.keyword_map:
            self.keyword_map[keyword] = []
        self.keyword_map[keyword].append(url)

    def get_urls_by_keyword(self, keyword):
        return self.keyword_map.get(keyword, [])

    def autocomplete(self, prefix):
        return self.trie.search(prefix)

# --------- search.py (inline) ----------

# When a user types a prefix query, the search engine should return:
# Matching keywords (from Trie)Corresponding URLs (from Hash Map)

def search_engine(indexer, query):
    suggestions = indexer.autocomplete(query)
    if not suggestions:
        return f"No results for '{query}'"

    results = []
    for keyword in suggestions:
        urls = indexer.get_urls_by_keyword(keyword)
        results.append((keyword, urls))
    return results


In [None]:
# Simulating sample_data.txt
sample_data = """
python https://python.org
python https://docs.python.org
programming https://geeksforgeeks.org
program https://example.com/programming
java https://oracle.com/java
javascript https://developer.mozilla.org/javascript
"""

# Create indexer and load data
indexer = SearchIndexer()
for line in sample_data.strip().split('\n'):
    keyword, url = line.strip().split()
    indexer.add_document(keyword, url)


In [None]:
# Simple interactive search
while True:
    query = input("\nEnter search query (or 'exit'): ").strip()
    if query.lower() == 'exit':
        break
    results = search_engine(indexer, query)
    print("Results:")
    if isinstance(results, str):
        print(results)
    else:
        for keyword, urls in results:
            print(f"🔑 {keyword}:")
            for url in urls:
                print(f"    - {url}")



Enter search query (or 'exit'): Python
Results:
🔑 python:
    - https://python.org
    - https://docs.python.org

Enter search query (or 'exit'): exit


**Search Box UI **

In [None]:
import ipywidgets as widgets
from IPython.display import display

search_box = widgets.Text(placeholder='Enter your query...')
output = widgets.Output()

def on_submit(sender):
    query = search_box.value
    output.clear_output()
    with output:
        results = search_engine(indexer, query)
        print("Results:")
        if isinstance(results, str):
            print(results)
        else:
            for keyword, urls in results:
                print(f"🔑 {keyword}:")
                for url in urls:
                    print(f"    - {url}")

search_box.on_submit(on_submit)
display(search_box, output)


Text(value='', placeholder='Enter your query...')

Output()