In [3]:
import pandas as pd
import timeit
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

In [3]:
class Node:
    def __init__(self):
        self.children = {}
        self.end_word = False

class Trie:
    def __init__(self):
        self.root = Node()

    def insert(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                node.children[char] = Node()
            node = node.children[char]
        node.end_word = True

    def search(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                return False
            node = node.children[char]
        return node.end_word

    def autocomplete(self, prefix):
        words = []
        node = self.root
        for char in prefix:
            if char not in node.children:
                return words
            node = node.children[char]

        def dfs(current_node, path):
            if current_node.end_word:
                words.append("".join(path))
            for c, child in current_node.children.items():
                dfs(child, path + [c])

        dfs(node, list(prefix))
        return words

In [5]:

data = pd.read_csv(r"C:\Users\Joseph Dania\Desktop\Ai_search_engine\raw_code.csv")
function_calls = data["Function Calls"].dropna().astype(str)

unique_functions = set() 
for funcs in function_calls:
    for f in funcs.split(","):
        f = f.strip()
        if f:
            unique_functions.add(f)
            
words = list(unique_functions)
print(f"Loaded {len(words)} unique functions.")

trie = Trie()
for word in words:
    trie.insert(word)
print("Trie populated successfully.")

Loaded 3279 unique functions.
Trie populated successfully.


In [6]:

print("Generating embeddings")
embeddings = model.encode(words)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

print(f"FAISS Index built with {index.ntotal} vectors.")

def search_ai(query, top_k=5):
    query_vector = model.encode([query])

    distances, indices = index.search(query_vector, top_k)

    results = []
    for idx in indices[0]:
        results.append(words[idx])
        
    return results

Generating embeddings
FAISS Index built with 3279 vectors.


In [7]:


query = "calculate"


trie_results = trie.autocomplete(query)
print(f"Trie Results (Exact Prefix): {trie_results}")

ai_results = search_ai(query)
print(f"AI Results (Semantic): {ai_results}")

Trie Results (Exact Prefix): ['calculate_current_element', 'calculate_hypothesis_value', 'calculate_variance', 'calculate_each_score', 'calculate_fft', 'calculate_signal_power', 'calculate_probabilities', 'calculate_prime_numbers', 'calculate_pi', 'calculate_mean']
AI Results (Semantic): ['_calculate', 'compute', 'Formula', 'multiply', 'calculate_pi']


In [8]:
search_term = "get"

print("Timing Trie Autocomplete:")
%timeit trie.autocomplete(search_term)

print("Timing AI Search:")
%timeit search_ai(search_term)

Timing Trie Autocomplete:
667 μs ± 108 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
Timing AI Search:
28.9 ms ± 12 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:

query = "find someone"
trie_results = trie.autocomplete(query)
print(f"Trie Results (Exact Prefix): {trie_results}")

ai_results = search_ai(query)
print(f"AI Results (Semantic): {ai_results}")

Trie Results (Exact Prefix): []
AI Results (Semantic): ['self.find', 'job.find', 'search', 'self.right.search', 'self.search']
