# 1. Configurar Ambiente

# 1.1 Variables

In [0]:
catalog = dbutils.widgets.get("catalog")
schema = dbutils.widgets.get("schema")
llm = dbutils.widgets.get("llm")
vs_index = dbutils.widgets.get("index")
vs_endpoint = dbutils.widgets.get("vs_endpoint")
volume = dbutils.widgets.get("volume")
exp_name = dbutils.widgets.get("exp_name")
model_name = dbutils.widgets.get("model_name")

# 1.2 DSPy

In [0]:
import dspy

# Configurar LLM que usa DSPy
lm = dspy.LM(llm)
dspy.configure(lm=lm)

# 1.3 MLflow

In [0]:
import mlflow

# Configurar MLflow con Databricks
mlflow.set_tracking_uri("databricks")
mlflow.set_experiment("/Workspace/Shared/MLflow Experiments/DSPy RAG")
mlflow.dspy.autolog(log_compiles=True, log_evals=True)

import mlflow # Storing artifacts in a volume requires MLflow 2.15.0 or above

ARTIFACT_PATH = f"dbfs:/Volumes/{catalog}/{schema}/{volume}/DSPy RAG Artifacts/" # can be a managed or external volume

mlflow.set_tracking_uri("databricks")
mlflow.set_registry_uri("databricks-uc")

if mlflow.get_experiment_by_name(exp_name) is None:
    mlflow.create_experiment(name=exp_name, artifact_location=ARTIFACT_PATH)
mlflow.set_experiment(exp_name)

# 2. Configurar RAG

# 2.1 Definir Signature

In [0]:
class RAG_SIGNATURE(dspy.Signature):
  """Receive user's request, search for context in index, and recommend a cooking recipe."""
  question: str = dspy.InputField(desc="Customer question on cooking recipe.")
  context: str = dspy.InputField(desc="Vector Search Index of cooking recipes.")
  recommendation: str = dspy.OutputField(desc="Recommended cooking recipe.")

# 2.2 Definir Lógica de RAG

In [0]:
class Recommendation(dspy.Module):
  def initialize_agent(self):
    from databricks.vector_search.client import VectorSearchClient
    from databricks.vector_search.utils import CredentialStrategy
    # Definir cliente de Vector Search
    self.client = VectorSearchClient(
      credential_strategy=CredentialStrategy.MODEL_SERVING_USER_CREDENTIALS,
      disable_notice=True
    )
  def __init__(self, index: str = None, endpoint: str = None, for_mosaic_agent: bool = True, lm: str = "databricks/databricks-llama-4-maverick"):
    super().__init__()
    dspy.configure(lm=dspy.LM(lm))
    self.response_generator = dspy.Predict(RAG_SIGNATURE)
    self.vs_index = index if index is not None else f"{catalog}.{schema}.{vs_index}"
    self.vs_endpoint = endpoint if endpoint is not None else vs_endpoint
    self.initialize_agent()
    self.retriever = self.client.get_index(
        endpoint_name=self.vs_endpoint, 
        index_name=self.vs_index
    )

  # Definir función de búsqueda
  def documentation_vector_search(self, text):
      # Obtener contexto usando búsqueda por similitud
      retrieved_context = self.retriever.similarity_search(
          query_text=text,
          columns=["title", "ingredients", "directions"],
          num_results=3,
          disable_notice=True
      )
      return(retrieved_context)
  
  # Generar y devolver respuesta
  def forward(self, question):
    self.initialize_agent()
    context = self.documentation_vector_search(question)
    response = self.response_generator(context=context, question=question)
    return response

# 2.3 Usar formato ResponsesAgent sobre el módulo RAG

In [0]:
from mlflow.types.responses import (
    ResponsesAgentRequest,
    ResponsesAgentResponse,
)
from mlflow.pyfunc import ResponsesAgent
import uuid
import dspy
from dspy.retrievers.databricks_rm import DatabricksRM

# ResponsesAgent wrapper around your DSPy RAG
class RecommendationResponsesAgent(ResponsesAgent):
    def initialize_agent(self):
        # Initialize DSPy DatabricksRM
        # Ensure DATABRICKS_HOST and DATABRICKS_TOKEN are set in your environment
        # or pass them explicitly if available in self.dspy_rag config.
        self.retriever_tool = DatabricksRM(
            databricks_index_name=self.dspy_rag.vs_index,
            docs_id_column_name="title",
            text_column_name="ingredients",
            k=3  # Adjust the number of documents to retrieve as needed
        )

    def __init__(self, dspy_rag):
        super().__init__()
        self.dspy_rag = dspy_rag

    def _extract_question(self, request: ResponsesAgentRequest) -> str:
        """
        Extracts the user question from the ResponsesAgentRequest input.
        """
        messages = [m.model_dump() if hasattr(m, "model_dump") else m for m in request.input]
        user_messages = [m for m in messages if m.get("role") == "user"]
        if not user_messages:
            raise ValueError("No user message found in request.input")
        return user_messages[-1]["content"]
    
    def load_context(self, context):
        # Recreate the DSPy RAG in the prediction process
        # Assuming 'Recommendation' class is available in scope
        self.dspy_rag = Recommendation(
            index=context.model_config["index"],
            endpoint=context.model_config["endpoint"],
            lm=context.model_config["lm"],
        )

    def predict(self, request: ResponsesAgentRequest) -> ResponsesAgentResponse:
        self.initialize_agent()
        # 1. Convert ResponsesAgent input -> DSPy input
        question = self._extract_question(request)

        # 2. Use DatabricksRM to get context
        # The RM returns a list of dictionaries (passages)
        retrieved_results = self.retriever_tool(question, query_type="text")
        
        # Format the list of results into a single context string
        # We join the string representation of results, or you can extract specific columns
        # e.g., doc['text'] if you know the text column name.
        retrieved_context = "\n\n".join([str(doc) for doc in retrieved_results])

        # 3. Call your existing DSPy RAG module with retrieved context
        dspy_response = self.dspy_rag.response_generator(
            context=retrieved_context, question=question
        ).recommendation

        if hasattr(dspy_response, "answer"):
            answer_text = dspy_response.answer
        else:
            answer_text = str(dspy_response)

        output_item = self.create_text_output_item(
            text=answer_text,
            id=str(uuid.uuid4()),
        )

        return ResponsesAgentResponse(
            output=[output_item],
            custom_outputs=request.custom_inputs,
        )

# 3. Probar agente

In [0]:
with mlflow.start_run(run_name="recommender") as run:
    # Instantiate your DSPy RAG as usual
    rag = Recommendation(index=f"{catalog}.{schema}.{vs_index}", endpoint=vs_endpoint)

    # Wrap it as a ResponsesAgent
    agent = RecommendationResponsesAgent(dspy_rag=rag)
    request = {
        "input": [
            {
                "role": "user",
                "content": "What can I cook with chicken and rice?"
            }
        ]
    }

    print(agent.predict(request))

    mlflow.log_param(run.info.run_id, "generated_prompt", rag.inspect_history(n=1))

# 4. Guardar agente

In [0]:
from mlflow.models import ModelSignature
from mlflow.types.schema import ColSpec, Schema
from mlflow.models.resources import DatabricksVectorSearchIndex

input_schema = Schema([ColSpec("string")])
output_schema = Schema([ColSpec("string")])
signature = ModelSignature(inputs=input_schema, outputs=output_schema)
input_example = {
        "input": [
            {
                "role": "user",
                "content": "What can I cook with chicken and rice?"
            }
        ]
    }

resources = [
    DatabricksVectorSearchIndex(index_name=f"{catalog}.{schema}.{vs_index}")
]

with mlflow.start_run(run_name="Log Model"):
    # model_info = mlflow.dspy.log_model(
    #     agent,
    #     registered_model_name=f"{catalog}.{schema}.{model_name}",
    #     model_config={"index": vs_index, "endpoint": vs_endpoint},
    #     input_example=[input_example],
    #     pip_requirements="requirements.txt",
    #     signature=signature,
    #     resources=resources
    # )

    from mlflow.models.auth_policy import AuthPolicy, SystemAuthPolicy, UserAuthPolicy

    # System policy: resources accessed with system credentials
    system_policy = SystemAuthPolicy(
        resources=resources
    )

    # User policy: API scopes for OBO access
    user_policy = UserAuthPolicy(api_scopes=[
        "vectorsearch.vector-search-endpoints",
        "vectorsearch.vector-search-indexes"
    ])

    model_info = mlflow.pyfunc.log_model(
        python_model=RecommendationResponsesAgent(dspy_rag=Recommendation(index=f"{catalog}.{schema}.{vs_index}", endpoint=vs_endpoint)),
        auth_policy=AuthPolicy(
            system_auth_policy=system_policy,
            user_auth_policy=user_policy
        ),
        registered_model_name=f"{catalog}.{schema}.{model_name}",
        # resources=resources,
        input_example=input_example,
        pip_requirements="requirements.txt",
        model_config={"index": f"{catalog}.{schema}.{vs_index}", "endpoint": vs_endpoint, "lm": "databricks/databricks-llama-4-maverick"},
    )

    mlflow.models.predict(
        model_uri=model_info.model_uri,
        input_data=input_example,
        env_manager="uv",
    )

In [0]:
from databricks import agents

deployment = agents.deploy(
    model_name=f"{catalog}.{schema}.{model_name}",
    model_version=model_info.registered_model_version,
    scale_to_zero=True,      # optional
)

print("Serving endpoint:", deployment.query_endpoint)