In [None]:
import os
from dotenv import load_dotenv

load_dotenv()
from langchain_openai import AzureChatOpenAI
llm = AzureChatOpenAI(azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    api_version=os.environ["AZURE_OPENAI_API_VERSION"])

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders import NotebookLoader, WebBaseLoader
from pathlib import Path
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_openai import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
openai_api_type="azure",
openai_api_version=os.environ["OPENAI_API_EMBEDDING_VERSION"],
openai_api_key=os.environ["OPENAI_API_EMBEDDING_KEY"],
azure_endpoint=os.environ["AZURE_OPENAI_EMBEDDING_ENDPOINT"],
deployment=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
model=os.environ["AZURE_OPENAI_EMBEDDING_MODEL"],
validate_base_url=True,
)

# Specify the directory path
base_directory = Path("D:/langchain-academy")
notebook_files = base_directory.rglob("*.ipynb")

# documents = []
# for notebook in notebook_files:
#     loader = NotebookLoader(
#     notebook,
#     include_outputs=True,
#     max_output_length=20,
#     remove_newline=True)
#     document = loader.load()
#     documents.append(document[0])
    
# Docs to index
urls = [
    "https://langchain-ai.github.io/langgraph/concepts/agentic_concepts/",
    "https://langchain-ai.github.io/langgraph/concepts/multi_agent/",
    "https://langchain-ai.github.io/langgraph/tutorials/introduction/"
]

# Load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

len(doc_splits)
vector_store = InMemoryVectorStore(embeddings)
ids = vector_store.add_documents(documents=doc_splits)

In [None]:
retriever = vector_store.as_retriever(search_kwargs={'k': 10})

In [None]:
### Generate

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

results = retriever.invoke("What are the different multi agent architectures?")
relevant_docs = format_docs(results)

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
generation = rag_chain.invoke({"context": relevant_docs, "question": "What multi agent architecture would be appropriate to build a nutritional agent? Generate an agent design accordingly? Try generating plan in UML syntax"})
print(generation)