In [4]:
from dotenv import load_dotenv
load_dotenv()

True

[Source notebook](https://github.com/pinecone-io/examples/blob/master/generation/langchain/handbook/08-langchain-retrieval-agent.ipynb)

# Embed "Blocks"

In [5]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.docstore.document import Document

In [3]:
files = [{ "path": "./Behaviorism.txt", "id": "bw456" }, { "path": "./Cognitivism.txt", "id": "cg789" }]
docs = []
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
for file in files:
	loader = TextLoader(file["path"])
	documents = loader.load()
	chunks = text_splitter.split_documents(documents)
	print(chunks[0], type(chunks[0]))
	for chunkIndex, chunk in enumerate(chunks):
		docs.append(Document(page_content=chunk.page_content, metadata={
			"blockId": file["id"],
			"source": chunk.metadata["source"],
			"chunkIndex": chunkIndex
		}))

Created a chunk of size 506, which is longer than the specified 500
Created a chunk of size 793, which is longer than the specified 500


page_content='Behaviorism' metadata={'source': './Behaviorism.txt'} <class 'langchain.schema.Document'>
page_content='Cognitivism\nCognitivism is a learning theory that focusses on how information is received, organized, stored and retrieved by the mind. It uses the mind as an information processer, like a computer. Therefore, cognitivism looks beyond observable behaviour, viewing learning as internal mental processes. In this view, learners are actively involved in the way they process information. Knowledge, memory, thinking, and problem solving are areas for development.' metadata={'source': './Cognitivism.txt'} <class 'langchain.schema.Document'>


In [6]:
embeddings = OpenAIEmbeddings()

In [5]:
# db = Chroma.from_documents(docs, embeddings, persist_directory="./chroma_db")
# db.persist()
# db = None

In [7]:
db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

In [8]:
query = "What is behaviorism"
db.similarity_search(query)

[Document(page_content='Behaviorism', metadata={'blockId': 'bw456', 'source': './Behaviorism.txt', 'chunkIndex': 0}),
 Document(page_content='Behaviorism is a theory of learning; it’s based on the idea that our behavior is learned as a result of interaction with our external environment. Interaction with our external environment can include an interaction with a certain person, a certain object, or with certain surroundings.', metadata={'blockId': 'bw456', 'source': './Behaviorism.txt', 'chunkIndex': 2}),
 Document(page_content='Key Terms\nBehaviorism: A theory and school of thought in psychology which states that all types of human behavior can be learned through two key types of conditioning: classical conditioning and operant conditioning. The terms behaviorism and behavioral perspective can also be used interchangeably.\n\nStimulus: Anything that occurs in the environment that elicits a response from an individual.', metadata={'blockId': 'bw456', 'source': './Behaviorism.txt', 'chu

# Self Query Retriever
This helps to get docs for a block using its ID

In [26]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

metadata_field_info = [
    AttributeInfo(
        name="blockId",
        description="The ID of the block this chunk is in",
        type="string",
    ),
    AttributeInfo(
        name="source",
        description="The path to the file of the source document",
        type="string",
    ),
]
document_content_description = "Info of a block"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm, db, document_content_description, metadata_field_info, verbose=True
)

# Initializing the Conversational Agent
Our conversational agent needs a Chat LLM, conversational memory, and a `RetrievalQA` chain to initialize.

In [28]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import RetrievalQAWithSourcesChain

# chat completion llm
llm = ChatOpenAI(
    model_name='gpt-3.5-turbo-0613',
    temperature=0.7
)
# conversational memory
conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)
# retrieval qa chain
qa = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

But this isn't yet ready for our conversational agent. For that we need to convert this retrieval chain into a tool. We do that like so:

In [29]:
from langchain.agents import Tool
from pydantic import BaseModel, Field


class BlockQaInput(BaseModel):
    question: str = Field()

def blockQA(question):
    return qa({"question": question})

tools = [
    Tool(
        name='Blocks Info',
        func=blockQA,
        args_schema=BlockQaInput,
        description=(
            'use this tool to answer questions about a single block'
        )
    )
]

# `STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION` is DOPE 🔥🔥🔥

In [30]:
from langchain.agents import initialize_agent
from langchain.agents.agent_types import AgentType

agent = initialize_agent(
    agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory
)

In [36]:
QUERY="""\
Instructions:
Create a task for a single block and then combine results from tasks to answer the query. Use new lines in the final response to make it presentable.

User Query:
How is @block:bw456 different from @block:cg789 in their ideas on learning?
"""

In [38]:
res = agent(QUERY)



[1m> Entering new  chain...[0m
[32;1m[1;3mThought: To answer this query, I need to create tasks for both @block:bw456 and @block:cg789 and gather information about their ideas on learning. Then I can combine the results to provide a comprehensive response.

Action: 
```
{
  "action": "Blocks Info",
  "action_input": {
    "question": "What are the ideas on learning in @block:bw456?"
  }
}
```

[0mquery='ideas on learning' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='blockId', value='bw456') limit=None

Observation: [36;1m[1;3m{'question': 'What are the ideas on learning in @block:bw456?', 'answer': 'The ideas on learning in @block:bw456 are based on behaviorism. Behaviorism is a theory of learning that emphasizes the role of our external environment in shaping our behavior. It suggests that our behavior is learned through a process called conditioning, which can lead to the formation of habits. Behaviorism presents two main types of conditioning: classical co

In [39]:
print(res["output"])

The ideas on learning in @block:bw456 are based on behaviorism, which emphasizes the role of the external environment in shaping behavior. Behaviorism suggests that behavior is learned through conditioning, either classical or operant. Behaviorists believe that anyone can be trained to perform tasks through the right conditioning.

On the other hand, the ideas on learning in @block:cg789 are based on cognitive learning theory. According to @block:cg789, learning is a process of organizing information into conceptualized models. Instructions should be organized and presented in a meaningful way. Retention and recall are important for building schemas in the brain, and memory is supported by organizing learning material. Cognitivism views learning as internal mental processes and recognizes that learners are actively involved in processing information.

