In [0]:
%pip install langchain
%pip install databricks-vectorsearch
%pip install --upgrade langgraph langchain_core databricks-langchain mlflow-skinny[databricks]
%restart_python

In [0]:
from databricks.vector_search.client import VectorSearchClient
from langchain.chat_models import ChatDatabricks
from langchain.schema import HumanMessage


2. Connect to your Vector Search index

In [0]:
vsc = VectorSearchClient()

index = vsc.get_index(
    index_name="genai.default.vec_index",  # fully qualified index name
    endpoint_name="genai_vec_search_shankho"
)


3: Define function to embed the question (same model as indexing)

In [0]:
from mlflow.deployments import get_deploy_client

embedding_client = get_deploy_client("databricks")
embedding_endpoint = "databricks-bge-large-en"

def get_question_embedding(question: str):
    res = embedding_client.predict(endpoint=embedding_endpoint, inputs={"input": [question]})
    return res["data"][0]["embedding"]


4: Perform vector search

In [0]:
def retrieve_relevant_chunks(question: str, k: int = 4):
    # 1. Embed the question
    query_vector = get_question_embedding(question)

    # 2. Perform vector similarity search
    response = index.similarity_search(
        query_vector=query_vector,
        columns=["chunk_id", "content", "source"],
        num_results=k
    )

    # 3. Extract content from results (which is a dict)
    results = response.get("results", [])
    chunks = [row["content"] for row in results if "content" in row]

    return chunks


5: Call the LLM with retrieved context

In [0]:
llm = ChatDatabricks(endpoint="databricks-llama-4-maverick")  # Or any other model you use

def generate_answer(question: str):
    context_chunks = retrieve_relevant_chunks(question)
    context = "\n\n".join(context_chunks)

    prompt = f"""You are a helpful assistant. Use the following context to answer the question.
    
                    Context:
                    {context}

                    Question: {question}
                    Answer:"""

    response = llm([HumanMessage(content=prompt)])
    return response.content


Example Usage:

In [0]:
question = "What is tanswer for Queston The course will equip you to provide care while waiting for?"
answer = generate_answer(question)
print(answer)