In [1]:
# load config
from pathlib import Path
import mlflow
from maud.agent.config import parse_config
import os

root_dir = Path(os.getcwd()).parent
config_path = root_dir / 'implementations' / 'agents' / 'langgraph' / 'config.yaml'
agent_path = root_dir / 'implementations' / 'agents' / 'langgraph' / 'agent.py'

mlflow_config = mlflow.models.ModelConfig(development_config=config_path)
maud_config = parse_config(mlflow_config)



In [2]:
from maud.agent.retrievers import get_vector_retriever
retriever = get_vector_retriever(maud_config)
retriever.invoke("What is Databricks?")

[NOTICE] Using a notebook authentication token. Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True to VectorSearchClient().


[Document(metadata={'img_path': '', 'filename': 'D8200-ARM.pdf', 'type': 'text', 'id': 60129542171.0}, page_content='A Peabody-Barnes 651 N. Main St. Mansfield , OH 44902\nA UBM Engineering Ltd. Lower William St. Northam Southampton England 509 2DN'),
 Document(metadata={'img_path': '', 'filename': 'D8200-ARM.pdf', 'type': 'text', 'id': 34359738423.0}, page_content='MATL:\nSCREW PIN DROP FORGED STEEL'),
 Document(metadata={'img_path': '', 'filename': '01-0571-0011_Rev06_07-07.pdf', 'type': 'text', 'id': 3.0}, page_content='.................................................................................................................................................................. 2. 2.1, 2 = . 3.0, 1 = OPERATION .................................................................................................................................................................. 2. 3.0, 2 = . 3.1, 1 = MAINTENANCE CHECKLIST ...................................................................

In [3]:
from databricks_langchain import ChatDatabricks
model = ChatDatabricks(endpoint=maud_config.model.endpoint_name)
model.invoke("What is Databricks?")

AIMessage(content="Databricks is a cloud-based data engineering platform that provides a fast, easy, and collaborative Apache Spark-based platform for data engineering, data science, and data analytics. It was founded by the original creators of Apache Spark and is designed to simplify the process of working with big data and machine learning.\n\nDatabricks offers a range of features and tools that enable users to:\n\n1. **Process large-scale data**: Databricks provides a scalable and performant platform for processing large datasets, including support for batch and real-time data processing.\n2. **Build and deploy machine learning models**: Databricks offers a range of machine learning libraries and frameworks, including TensorFlow, PyTorch, and scikit-learn, making it easy to build, train, and deploy machine learning models.\n3. **Collaborate on data projects**: Databricks provides a collaborative environment for data engineers, data scientists, and data analysts to work together on 

In [4]:
from langgraph.graph.state import CompiledStateGraph
from langgraph.graph import StateGraph, END, START
from maud.agent.states import get_state
from maud.agent.nodes import make_query_vector_database, make_generation

In [6]:
from typing import TypedDict, Annotated, List, Union
from operator import add

class GraphState(TypedDict):
  messages: Annotated[List[dict[str,str]], add]
  generated_question: List[dict[str,str]]
  context: List[str]

state = GraphState

query_vector_database = make_query_vector_database(retriever)
generation = make_generation(model)

workflow = StateGraph(state)
workflow.add_node("query_vector_database", query_vector_database)
workflow.add_node("generation", generation)
workflow.add_edge(START, "generation")
app = workflow.compile()

In [9]:
from langchain_core.messages import HumanMessage
app.invoke({"messages": [HumanMessage(content="What is SQL?")]})

{'messages': [HumanMessage(content='What is SQL?', additional_kwargs={}, response_metadata={}),
  {'role': 'assistant',
   'content': "So SQL, or Structured Query Language, is a programming language designed for managing and manipulating data stored in relational database management systems (RDBMS). It's a standard language for accessing, managing, and modifying data in relational databases, and is widely used for storing, updating, and retrieving data in a variety of applications, from simple websites to complex enterprise systems."}]}