In [None]:
# Cell 1: Title & Introduction (Markdown)
"""
# Task 3: Dynamic Chatbot with Expanding Knowledge Base

This notebook demonstrates how to build a chatbot that dynamically expands its knowledge base using:

- **ChromaDB** for vector storage and retrieval,
- **Sentence Transformers** for text embeddings,
- **Ollama's Mistral model** for generating conversational responses.

---

The chatbot retrieves relevant knowledge from the vector database to provide context-aware responses.
"""

: 

In [None]:
import chromadb
from sentence_transformers import SentenceTransformer
import ollama

In [None]:
chroma_client = chromadb.PersistentClient(path="./data/chromadb/")

# Try to get existing collection or create it if it doesn't exist
try:
    collection = chroma_client.get_collection(name="chatbot_knowledge")
except chromadb.errors.InvalidCollectionException:
    collection = chroma_client.create_collection(name="chatbot_knowledge")

# Load the embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

print("Database and embedding model ready!")

In [None]:
def add_knowledge(text, source):
    """
    Add new knowledge text to the vector database.
    :param text: The knowledge text to store.
    :param source: A unique ID or source identifier.
    """
    vector = embedding_model.encode(text).tolist()
    collection.add(ids=[source], embeddings=[vector], metadatas=[{"text": text, "source": source}])
    print(f"Added knowledge from source '{source}'.")

def retrieve_knowledge(query, top_k=3):
    """
    Retrieve the most relevant knowledge entries for a query.
    :param query: The user query string.
    :param top_k: Number of top results to retrieve.
    :return: List of knowledge texts.
    """
    query_vector = embedding_model.encode(query).tolist()
    results = collection.query(query_embeddings=[query_vector], n_results=top_k)
    
    if results and results.get("documents") and results["documents"][0]:
        return [doc["text"] for doc in results["documents"][0] if doc]
    return ["No relevant knowledge found."]

In [None]:
add_knowledge(
    "Artificial Intelligence (AI) is intelligence demonstrated by machines, as opposed to natural intelligence displayed by humans and animals.",
    "knowledge_1"
)

add_knowledge(
    "Machine learning is a subset of AI focused on building systems that learn from data to improve performance on tasks without explicit programming.",
    "knowledge_2"
)

add_knowledge(
    "ChromaDB is an open-source vector database designed to store and query high-dimensional embeddings efficiently.",
    "knowledge_3"
)

In [None]:
query = "What is AI?"
retrieved_docs = retrieve_knowledge(query)
print(f"Query: {query}\nRetrieved Knowledge:")
for idx, doc in enumerate(retrieved_docs, 1):
    print(f"{idx}. {doc}")

In [None]:
def chat_with_bot(user_input):
    """
    Generates a chatbot response based on retrieved knowledge and Ollama LLM.
    :param user_input: User query string.
    :return: Chatbot response string.
    """
    context = " ".join(retrieve_knowledge(user_input))
    prompt = f"User: {user_input}\nContext: {context}\nChatbot:"
    
    response = ollama.chat(model="mistral", messages=[{"role": "user", "content": prompt}])
    return response["message"]["content"]

In [None]:
test_input = "Explain artificial intelligence."
print(f"User: {test_input}")
print("Bot:", chat_with_bot(test_input))