# Settings

In [3]:
import os

from dotenv import load_dotenv

In [4]:
load_dotenv()

True

# Preview

## LLM

In [7]:
from langchain_openai import AzureChatOpenAI

In [8]:
llm = AzureChatOpenAI(
    openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_deployment=os.getenv("DEPLOYMENT_NAME_LLM")
)

In [9]:
llm

AzureChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7c441bbbc820>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7c441bbbdf30>, openai_api_key=SecretStr('**********'), openai_proxy='', azure_endpoint='https://qsgc-nlp.openai.azure.com/', deployment_name='QSGC-gpt35', openai_api_version='2024-02-01', openai_api_type='azure')

## Additional Imports

In [15]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import AzureOpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


# Indexing

## Load

In [16]:
# Only keep post title, headers, and content from the full HTML
loader = WebBaseLoader(
    web_paths = ("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_ = ("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [17]:
len(docs[0].page_content)

43131

In [18]:
print(docs[0].page_content[:500])



      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In


## Split

In [19]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1_000,
    chunk_overlap = 200,
    add_start_index = True
)

all_splits = text_splitter.split_documents(docs)

In [21]:
type(all_splits), len(all_splits)

(list, 66)

In [22]:
all_splits[:3]

[Document(page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n\n

In [23]:
len(all_splits[0].page_content)

969

In [24]:
all_splits[10].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'start_index': 7056}

## Store

In [25]:
# Instanciate embeddings
embeddings = AzureOpenAIEmbeddings(
    azure_deployment = os.getenv("DEPLOYMENT_NAME_EMBEDDING"),
    openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION"),
)

# Instanciate vector store
vectorstore = Chroma.from_documents(
    documents = all_splits,
    embedding = embeddings
)

# Check __repr__
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x7c43f747caf0>