In [None]:

import os
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex, ServiceContext
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.llms import OpenAI
from llama_index.llms.huggingface import HuggingFaceLLM
import chromadb
from IPython.display import Markdown, display

# ✅ Load environment variables
load_dotenv()

# ✅ Set API Keys (OpenAI only needed if using OpenAI)
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# ✅ Connect to existing ChromaDB
chroma_client = chromadb.PersistentClient(path="../data/vector_db")
chroma_collection = chroma_client.get_or_create_collection("biodiversity_docs")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# ✅ Choose LLM backend (OpenAI or HuggingFace)
USE_OPENAI = True  # Change to False if you want to try HuggingFace

if USE_OPENAI:
    llm = OpenAI(model="gpt-3.5-turbo", temperature=0)
else:
    llm = HuggingFaceLLM(model_name="tiiuae/falcon-7b-instruct", context_window=2048)

# ✅ Build service context with selected LLM
service_context = ServiceContext.from_defaults(llm=llm)

# ✅ Build the index from existing Chroma store
index = VectorStoreIndex.from_vector_store(
    vector_store, service_context=service_context
)

# ✅ Set up retriever and query engine
retriever = VectorIndexRetriever(index=index, similarity_top_k=3)
query_engine = RetrieverQueryEngine.from_args(retriever=retriever)

# 🔍 Ask a question
query = "What are key biodiversity risks investors should monitor?"
response = query_engine.query(query)

# 📘 Pretty display
print("\n✨ Query Completed")
display(Markdown(f"### 📘 Answer:\n{response.response}"))

print("\n📎 Sources:")
for i, node in enumerate(response.source_nodes):
    source = node.metadata.get("file_name", "Unknown")
    preview = node.node.text[:300].strip().replace("\n", " ")
    display(Markdown(f"**{i+1}. {source}**\n> {preview}..."))
