In [None]:
import os

os.environ["OPENAI_API_KEY"] = "<YOUR_OPENAI_KEY>"
os.environ["ACTIVELOOP_TOKEN"] = "<YOUR_ACTIVELOOP_KEY>"

In [None]:
!pip install -q langchain==0.0.346 openai==1.3.7 tiktoken==0.5.2 cohere==4.37 deeplake==3.8.11 langchainhub==0.1.14

In [None]:
# Load docs
from langchain.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()
len( data )

1

In [None]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
all_splits = text_splitter.split_documents(data)
len( all_splits )

130

In [None]:
# Store splits
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import DeepLake

vectorstore = DeepLake.from_documents(all_splits, dataset_path="hub://genai360/langsmith_intro", embedding=OpenAIEmbeddings(), overwrite=False)

Your Deep Lake dataset has been successfully created!


Creating 130 embeddings in 1 batches of size 130:: 100%|██████████| 1/1 [00:05<00:00,  5.81s/it]

Dataset(path='hub://genai360/langsmith_intro', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype       shape      dtype  compression
  -------    -------     -------    -------  ------- 
   text       text      (130, 1)      str     None   
 metadata     json      (130, 1)      str     None   
 embedding  embedding  (130, 1536)  float32   None   
    id        text      (130, 1)      str     None   





In [None]:
# RAG prompt
from langchain import hub

prompt = hub.pull("rlm/rag-prompt:50442af1")
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [None]:
# LLM
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [None]:
# RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": prompt}
)

In [None]:
question = "What are the approaches to Task Decomposition?"
result = qa_chain({"query": question})
result["result"]

'The approaches to task decomposition include using LLM with simple prompting, task-specific instructions, and human inputs.'

# Environment

In [None]:
!pip list

Package                          Version
-------------------------------- ---------------------
absl-py                          1.4.0
aioboto3                         12.1.0
aiobotocore                      2.8.0
aiohttp                          3.9.1
aioitertools                     0.11.0
aiosignal                        1.3.1
alabaster                        0.7.13
albumentations                   1.3.1
altair                           4.2.2
anyio                            3.7.1
appdirs                          1.4.4
argon2-cffi                      23.1.0
argon2-cffi-bindings             21.2.0
array-record                     0.5.0
arviz                            0.15.1
astropy                          5.3.4
astunparse                       1.6.3
async-timeout                    4.0.3
atpublic                         4.0
attrs                            23.1.0
audioread                        3.0.1
autograd                         1.6.2
Babel                            2.13.1
b