In [3]:
import numpy as np

# Knowledge Base (Documents)
docs = [
    "Python is a programming language used for web development, data analysis, and AI.",
    "Machine learning is a subset of AI that allows computers to learn from data.",
    "Pandas is a Python library for data manipulation and analysis.",
    "NumPy is used for numerical computations in Python."
]

# Create Vocabulary
vocab = list(set(word.lower() for doc in docs for word in doc.split()))
vocab_index = {word:i for i, word in enumerate(vocab)}

# Convert Docs to Vectors
def doc_to_vector(doc):
    vec = np.zeros(len(vocab))
    for word in doc.lower().split():
        if word in vocab_index:
            vec[vocab_index[word]] += 1
    return vec

X = np.array([doc_to_vector(doc) for doc in docs])

# User Question
question = "Which library is used for numerical computation in Python?"
q_vec = doc_to_vector(question)

# Compute Cosine Similarity
def cosine_sim(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-10)

similarities = [cosine_sim(q_vec, doc_vec) for doc_vec in X]

# Retrieve top document
top_idx = np.argmax(similarities)
top_doc = docs[top_idx]

# Conditional Answering (simple rules)
if "pandas" in top_doc.lower():
    answer = "You should use Pandas for data analysis in Python."
elif "numpy" in top_doc.lower():
    answer = "You can use NumPy for numerical computations in Python."
elif "machine learning" in top_doc.lower():
    answer = "Machine learning involves algorithms to learn from data."
else:
    answer = top_doc  # fallback

# Display
print("User Question:", question)
print("Top Relevant Document:", top_doc)
print("Answer:", answer)


User Question: Which library is used for numerical computation in Python?
Top Relevant Document: NumPy is used for numerical computations in Python.
Answer: You can use NumPy for numerical computations in Python.
