In [None]:
!wget https://raw.githubusercontent.com/ksm26/LangChain-for-LLM-Application-Development/main/OutdoorClothingCatalog_1000.csv
!apt install subversion
!mkdir docsre
!svn checkout https://github.com/ksm26/LangChain-Chat-with-Your-Data/trunk/docs/cs229_lectures docs
!pip install sentence_transformers openai langchain langchain_experimental tiktoken docarray wikipedia google-search-results pypdf yt_dlp pydub chromadb lark

In [None]:
import os
import pandas as pd
import numpy as np
from datetime import date
import langchain
import openai
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain.chat_models import ChatOpenAI
from langchain.chains import (
    ConversationChain,
    LLMChain,
    SimpleSequentialChain,
    SequentialChain,
    RetrievalQA,
    ConversationalRetrievalChain
)
from langchain.chains.router import MultiPromptChain
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
from langchain.memory import (
    ConversationBufferMemory,
    ConversationBufferWindowMemory,
    ConversationSummaryBufferMemory,
    ConversationTokenBufferMemory
)
from langchain.evaluation.qa import QAGenerateChain, QAEvalChain
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator
from IPython.display import display, Markdown
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain.agents import load_tools, initialize_agent, AgentType, tool
from langchain.python import PythonREPL
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import OpenAIWhisperParser
from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.text_splitter import TokenTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers import ContextualCompressionRetriever, SVMRetriever, TFIDFRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

os.environ['OPENAI_API_KEY'] = ''
os.environ['SERPAPI_API_KEY'] = ''
openai.api_key = os.environ['OPENAI_API_KEY']

# 1) LangChain DataBase

## 1.1 Data Loader

In [None]:
url = "https://www.youtube.com/watch?v=jGwO_UgTS7I"
save_dir = "docs/youtube/"
loader = GenericLoader(
    YoutubeAudioLoader([url],save_dir),
    OpenAIWhisperParser()
)
docs = loader.load()
docs[0].page_content[0:200]

In [None]:
loader = WebBaseLoader("https://github.com/basecamp/handbook/blob/master/37signals-is-you.md")
docs = loader.load()
print(docs[0].page_content[2000:2500])

## 1.2 Data Splitter

[Langchain Documentation - Split](https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/character_text_splitter)

* **CharacterTextSplitter** is the simplest method. This splits based on characters (by default "\n\n") and measure chunk length by number of characters.

* **RecursiveCharacterTextSplitter** is the recommended one for generic text. It is parameterized by a list of characters. It tries to split on them in order until the chunks are small enough. The default list is ["\n\n", "\n", " ", ""]. This has the effect of trying to keep all paragraphs (and then sentences, and then words) together as long as possible, as those would generically seem to be the strongest semantically related pieces of text.

In [None]:
chunk_size = 450
chunk_overlap = 0

c_splitter = CharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
    separator = ' '
)

# split by \n\n if still longer than chunk_size, then it split based on the next "\n"
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)

In [None]:
some_text = """\
When writing documents, writers will use document structure to group content. \
This can convey to the reader, which idea's are related. For example, closely related ideas \
are in sentances. Similar ideas are in paragraphs. Paragraphs form a document. \n\n  \
Paragraphs are often delimited with a carriage return or two carriage returns. \
Carriage returns are the "backslash n" you see embedded in this string. \
Sentences have a period at the end, but also, have a space.\
and words are separated by space.\
"""

for i in c_splitter.split_text(some_text):
    print(i, '\n###\n')

print("#" * 200)
for i in r_splitter.split_text(some_text):
    print(i, '\n###\n')

When writing documents, writers will use document structure to group content. This can convey to the reader, which idea's are related. For example, closely related ideas are in sentances. Similar ideas are in paragraphs. Paragraphs form a document. 

 Paragraphs are often delimited with a carriage return or two carriage returns. Carriage returns are the "backslash n" you see embedded in this string. Sentences have a period at the end, but also, 
###

have a space.and words are separated by space. 
###

########################################################################################################################################################################################################
When writing documents, writers will use document structure to group content. This can convey to the reader, which idea's are related. For example, closely related ideas are in sentances. Similar ideas are in paragraphs. Paragraphs form a document. 
###

Paragraphs are often delimited with 

In [None]:
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=150,
    chunk_overlap=0,
    separators=["\n\n", "\n", "\. ", " ", ""]
)
r_splitter.split_text(some_text)

["When writing documents, writers will use document structure to group content. This can convey to the reader, which idea's are related. For example,",
 'closely related ideas are in sentances. Similar ideas are in paragraphs. Paragraphs form a document.',
 'Paragraphs are often delimited with a carriage return or two carriage returns. Carriage returns are the "backslash n" you see embedded in this',
 'string. Sentences have a period at the end, but also, have a space.and words are separated by space.']

## 1.3 Token splitting

We can also split on token count explicity, if we want. This can be useful because LLMs often have context windows designated in tokens. Tokens are often ~4 characters.

In [None]:
text_splitter = TokenTextSplitter(encoding_name="gpt2", chunk_size=1, chunk_overlap=0)
text1 = "foo bar bazzyfoo"
print(text_splitter.split_text(text1))

['foo', ' bar', ' b', 'az', 'zy', 'foo']


## 1.4 Vectorstores and Embeddings

### 1.4.1 Similarity Search

In [None]:
loaders = [
    # Duplicate documents on purpose - messy data
    PyPDFLoader("docs/MachineLearning-Lecture01.pdf"),
    PyPDFLoader("docs/MachineLearning-Lecture01.pdf"),
    PyPDFLoader("docs/MachineLearning-Lecture02.pdf"),
    PyPDFLoader("docs/MachineLearning-Lecture03.pdf")
]
# embedding_model = OpenAIEmbeddings()

embedding_model = HuggingFaceEmbeddings(
    model_name='BAAI/bge-base-en-v1.5',
    model_kwargs={"device": "cuda"},
    encode_kwargs={"device": "cuda", "batch_size": 100}
)
# embedding_model.embed_documents(batch["text"])
# result = {"text": batch["text"], "source": batch["source"], "embeddings": embeddings}

docs = []
for loader in loaders:
    docs.extend(loader.load())

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=150,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)
splits = text_splitter.split_documents(docs)
print('split', len(splits))

persist_directory = './chroma/'
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding_model,
    persist_directory=persist_directory
)
print('vectordb', vectordb._collection.count())

split 209
vectordb 209


In [None]:
splits[4].page_content

"So in this class, we've tried to convey to you a broad set of principl es and tools that will \nbe useful for doing many, many things. And ev ery time I teach this class, I can actually \nvery confidently say that af ter December, no matter what yo u're going to do after this \nDecember when you've sort of completed this  class, you'll find the things you learn in \nthis class very useful, and these things will be useful pretty much no matter what you end \nup doing later in your life.  \nSo I have more logistics to go over later, but let's say a few more words about machine \nlearning. I feel that machine learning grew out of  early work in AI, early work in artificial \nintelligence. And over the last — I wanna say last 15 or last 20 years or so, it's been viewed as a sort of growing new capability for computers. And in particular, it turns out \nthat there are many programs or there are many applications that you can't program by \nhand.  \nFor example, if you want to get a compute

In [None]:
splits[5].page_content

"turns out to be extremely difficult to sit dow n and write a program to  fly a helicopter.  \nBut in contrast, if you want to do things like to get software to fl y a helicopter or have \nsoftware recognize handwritten digits, one very  successful approach is to use a learning \nalgorithm and have a computer learn by its elf how to, say, recognize your handwriting. \nAnd in fact, handwritten digit recognition, this is pretty much the only approach that \nworks well. It uses applications that are hard to program by hand.  \nLearning algorithms has also made I guess sign ificant inroads in what's sometimes called \ndatabase mining. So, for example, with the growth of IT and computers, increasingly \nmany hospitals are keeping around medical reco rds of what sort of patients, what \nproblems they had, what their prognoses was,  what the outcome was. And taking all of \nthese medical records, which started to be digitized only about maybe 15 years, applying \nlearning algorithms to them c

In [None]:
# similarity search
question = "is there an email i can ask for help"
docs = vectordb.similarity_search(question, k=3)
print(len(docs))
print(docs[0].page_content)
vectordb.persist()

3
cs229-qa@cs.stanford.edu. This goes to an acc ount that's read by all the TAs and me. So 
rather than sending us email individually, if you send email to this account, it will 
actually let us get back to you maximally quickly with answers to your questions.  
If you're asking questions about homework probl ems, please say in the subject line which 
assignment and which question the email refers to, since that will also help us to route 
your question to the appropriate TA or to me  appropriately and get the response back to 
you quickly.  
Let's see. Skipping ahead — let's see — for homework, one midterm, one open and term 
project. Notice on the honor code. So one thi ng that I think will help you to succeed and 
do well in this class and even help you to enjoy this cla ss more is if you form a study 
group.  
So start looking around where you' re sitting now or at the end of class today, mingle a 
little bit and get to know your classmates. I strongly encourage you to form study g

### 1.4.2 MMR

In [None]:
question = "what did they say about matlab?"
docs_ss = vectordb.similarity_search(question, k=3)
print('doc1 without mmr: ', docs_ss[0].page_content[:100])
print('doc2 without mmr: ', docs_ss[1].page_content[:100])

docs_mmr = vectordb.max_marginal_relevance_search(question, k=3)
print('\ndoc1 with mmr: ', docs_mmr[0].page_content[:100])
print('doc2 with mmr: ', docs_mmr[1].page_content[:100])

doc1 without mmr:  those homeworks will be done in either MATLA B or in Octave, which is sort of — I 
know some people 
doc2 without mmr:  those homeworks will be done in either MATLA B or in Octave, which is sort of — I 
know some people 

doc1 with mmr:  those homeworks will be done in either MATLA B or in Octave, which is sort of — I 
know some people 
doc2 with mmr:  algorithm then? So what’s different? How come  I was making all that noise earlier about 
least squa


### 1.4.3 MetaData Search

In [None]:
# filter with metadata
question = "what did they say about regression in the third lecture?"
docs = vectordb.similarity_search(
    question, k=3, filter={"source":"docs/MachineLearning-Lecture03.pdf"}
)
for d in docs:
    print(d.metadata)

{'page': 0, 'source': 'docs/MachineLearning-Lecture03.pdf'}
{'page': 14, 'source': 'docs/MachineLearning-Lecture03.pdf'}
{'page': 4, 'source': 'docs/MachineLearning-Lecture03.pdf'}


In [None]:
# metadata_field_info = [
#     AttributeInfo(
#         name="source",
#         description="The lecture the chunk is from, should be one of `docs/MachineLearning-Lecture01.pdf`, `docs/MachineLearning-Lecture02.pdf`, or `docs/MachineLearning-Lecture03.pdf`",
#         type="string",
#     ),
#     AttributeInfo(
#         name="page",
#         description="The page from the lecture",
#         type="integer",
#     ),
# ]

# document_content_description = "Lecture notes"
# llm = OpenAI(temperature=0)
# retriever = SelfQueryRetriever.from_llm(
#     llm,
#     vectordb,
#     document_content_description,
#     metadata_field_info,
#     verbose=True
# )

# question = "what did they say about regression in the third lecture?"
# docs = retriever.get_relevant_documents(question)
# for d in docs:
#     print(d.metadata)

### 1.4.4 Contextual Compression Retriever

[Contextual Compression Model](https://medium.com/@baptisteloquette.entr/training-a-contextual-compression-model-2490f974eecf) | [LangChain Contextual Method](https://linuxhint.com/get-data-contextual-compression-langchain/)

A Contextual Compression Retriever is designed to improve the answers returned from vector store document similarity searches by better taking into account the context from the query. It wraps another retriever, and uses a Document Compressor (cross encoder) as an intermediate step after the initial similarity search that removes information irrelevant to the initial query from the retrieved documents. This reduces the amount of distraction a subsequent chain has to deal with when parsing the retrieved documents and making its final judgements.

In [None]:
def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join(
        [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)])
    )

llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_type="mmr")
)
question = "what did they say about matlab?"
compressed_docs = compression_retriever.get_relevant_documents(question)
pretty_print_docs(compressed_docs)

Document 1:

"MATLAB is I guess part of the programming language that makes it very easy to write codes using matrices, to write code for numerical routines, to move data around, to plot data. And it's sort of an extremely easy to learn tool to use for implementing a lot of learning algorithms."
----------------------------------------------------------------------------------------------------
Document 2:

"And the student said, "Oh, it was the MATLAB." So for those of you that don't know MATLAB yet, I hope you do learn it. It's not hard, and we'll actually have a short MATLAB tutorial in one of the discussion sections for those of you that don't know it."


### 1.4.5 Reranking

In [None]:
from langchain.retrievers.document_compressors import CohereRerank
llm = OpenAI(temperature=0)
compressor = CohereRerank()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)
compressed_docs = compression_retriever.get_relevant_documents(question)

### 1.4.6 Retrieval with TfIdf / SVM

In [None]:
loader = PyPDFLoader("docs/MachineLearning-Lecture01.pdf")
pages = loader.load()
all_page_text = [p.page_content for p in pages]
joined_page_text = " ".join(all_page_text)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
splits = text_splitter.split_text(joined_page_text)
print('pages', len(pages))
print('splits', len(splits))

# retrieval
svm_retriever = SVMRetriever.from_texts(splits, embedding)
tfidf_retriever = TFIDFRetriever.from_texts(splits)

pages 22
splits 45


In [None]:
question = "What are major topics for this class?"
docs_svm=svm_retriever.get_relevant_documents(question)
docs_svm[0]

Document(page_content="let me just check what questions you have righ t now. So if there are no questions, I'll just \nclose with two reminders, which are after class today or as you start to talk with other \npeople in this class, I just encourage you again to start to form project partners, to try to \nfind project partners to do your project with. And also, this is a good time to start forming \nstudy groups, so either talk to your friends  or post in the newsgroup, but we just \nencourage you to try to star t to do both of those today, okay? Form study groups, and try \nto find two other project partners.  \nSo thank you. I'm looking forward to teaching this class, and I'll see you in a couple of \ndays.   [End of Audio]  \nDuration: 69 minutes", metadata={})

In [None]:
# not recommended
question = "what did they say about matlab?"
docs_tfidf = tfidf_retriever.get_relevant_documents(question)
docs_tfidf[0]

Document(page_content="Saxena and Min Sun here did, wh ich is given an image like this, right? This is actually a \npicture taken of the Stanford campus. You can apply that sort of cl ustering algorithm and \ngroup the picture into regions. Let me actually blow that up so that you can see it more \nclearly. Okay. So in the middle, you see the lines sort of groupi ng the image together, \ngrouping the image into [inaudible] regions.  \nAnd what Ashutosh and Min did was they then  applied the learning algorithm to say can \nwe take this clustering and us e it to build a 3D model of the world? And so using the \nclustering, they then had a lear ning algorithm try to learn what the 3D structure of the \nworld looks like so that they could come up with a 3D model that you can sort of fly \nthrough, okay? Although many people used to th ink it's not possible to take a single \nimage and build a 3D model, but using a lear ning algorithm and that sort of clustering \nalgorithm is the first ste

## 1.5 QnA

In [None]:
persist_directory = 'chroma/'
embedding = OpenAIEmbeddings()
vectordb = Chroma(
    persist_directory=persist_directory,
    embedding_function=embedding
)
print(vectordb._collection.count())

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever()
)

209


In [None]:
question = "What are major topics for this class?"
result = qa_chain({"query": question})
print(result["result"])

 This class will cover topics such as statistics, algebra, and machine learning.


In [None]:
# Build prompt and chain
template = """\
Use the following pieces of context to answer the question at the end. \
If you don't know the answer, just say that you don't know, don't try to make up an answer. \
Use three sentences maximum. Keep the answer as concise as possible. \
Always say "thanks for asking!" at the end of the answer. \n \
{context} \n
Question: {question} \n
Helpful Answer:
"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    # chain_type="map_reduce",
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

question = "Is probability a class topic?"
result = qa_chain({"query": question})
print(result["result"])
print(result["source_documents"][0])

Yes, probability is a class topic. We'll assume familiarity with basic probability and statistics, and most undergraduate statistics classes like Stat 116 taught here at Stanford will be more than enough. Thanks for asking!
page_content="of this class will not be very program ming intensive, although we will do some \nprogramming, mostly in either MATLAB or Octa ve. I'll say a bit more about that later.  \nI also assume familiarity with basic proba bility and statistics. So most undergraduate \nstatistics class, like Stat 116 taught here at Stanford, will be more than enough. I'm gonna \nassume all of you know what ra ndom variables are, that all of you know what expectation \nis, what a variance or a random variable is. And in case of some of you, it's been a while \nsince you've seen some of this material. At some of the discussion sections, we'll actually \ngo over some of the prerequisites, sort of as  a refresher course under prerequisite class. \nI'll say a bit more about that la

## 1.6 Chatbot

In [None]:
import param
import panel as pn

# initialize database and retriever chain
def load_db(file, chain_type, k):
    loader = PyPDFLoader(file)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    docs = text_splitter.split_documents(documents)
    embeddings = OpenAIEmbeddings()
    db = DocArrayInMemorySearch.from_documents(docs, embeddings)
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})

    # create a chatbot chain. Memory is managed externally.
    qa = ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(model_name=llm, temperature=0),
        chain_type=chain_type,
        retriever=retriever,
        return_source_documents=True,
        return_generated_question=True,
    )
    return qa

# 2) LangChain LLM

set ```langchain.debug = True``` to see more context

In [None]:
chat = ChatOpenAI(temperature=0)

## 2.1 Prompt

In [None]:
template_string = """\
Translate the text \
that is delimited by triple backticks \
into a style that is {style}. \
text: ```{text}```
"""

prompt_template = ChatPromptTemplate.from_template(template_string)
print(f'input{prompt_template.messages[0].prompt.input_variables}')

customer_style = """Singaporean english in an exagerate tone"""

customer_email = """\
Arrr, I be fuming that me blender lid \
flew off and splattered me kitchen walls \
with smoothie! And to make matters worse, \
the warranty don't cover the cost of \
cleaning up me kitchen. I need yer help \
right now, matey!
"""

customer_messages = prompt_template.format_messages(
    style=customer_style, text=customer_email
)
print(type(customer_messages[0]))
print('final prompt: ', customer_messages[0])

customer_response = chat(customer_messages)
print('response: ', customer_response.content)

input['style', 'text']
<class 'langchain.schema.messages.HumanMessage'>
final prompt:  content="Translate the text that is delimited by triple backticks into a style that is Singaporean english in an exagerate tone. text: ```Arrr, I be fuming that me blender lid flew off and splattered me kitchen walls with smoothie! And to make matters worse, the warranty don't cover the cost of cleaning up me kitchen. I need yer help right now, matey!\n```\n" additional_kwargs={} example=False
response:  Arrr, I be fumin' like a volcano lah! Me blender lid just decided to fly off like a rocket and splatter me kitchen walls with smoothie, can you believe it? Wah, siao liao! And to make matters worse, the warranty don't even cover the cost of cleaning up me kitchen, can you imagine? So jialat lah! I really need yer help right now, matey! Don't play play, hurry up and come and save me from this mess!


In [None]:
# output parser
customer_review = """\
This leaf blower is pretty amazing.  It has four settings:\
candle blower, gentle breeze, windy city, and tornado. \
It arrived in two days, just in time for my wife's \
anniversary present. \
I think my wife liked it so much she was speechless. \
So far I've been the only one using it, and I've been \
using it every other morning to clear the leaves on our lawn. \
It's slightly more expensive than the other leaf blowers \
out there, but I think it's worth it for the extra features.
"""

review_template = """\
For the following text, extract the following information:

gift: Was the item purchased as a gift for someone else? \
Answer True if yes, False if not or unknown.

delivery_days: How many days did it take for the product \
to arrive? If this information is not found, output -1.

price_value: Extract any sentences about the value or price,\
and output them as a comma separated Python list.

Format the output as JSON with the following keys:
gift
delivery_days
price_value

text: {text}
"""

prompt_template = ChatPromptTemplate.from_template(review_template)
messages = prompt_template.format_messages(text=customer_review)
chat = ChatOpenAI(temperature=0.0)
response = chat(messages)
print(response.content)
print('type: ', type(response.content))

# parse to python dictionary
gift_schema = ResponseSchema(
    name="gift",
    description="\
    Was the item purchased as a gift for someone else? Answer True if yes, False if not or unknown.\
    "
)

delivery_days_schema = ResponseSchema(
    name="delivery_days",
    description="\
    How many days did it take for the product o arrive? \
    If this information is not found, output -1.\
    "
)

price_value_schema = ResponseSchema(
    name="price_value",
    description="Extract any sentences about the value or price, \
    and output them as a comma separated Python list.\
    "
)

response_schemas = [
    gift_schema, delivery_days_schema, price_value_schema
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()
print('\nGuide', format_instructions)
output_dict = output_parser.parse(response.content)
print('\nOutput Parser', output_dict)
print(output_dict.get('delivery_days'))

{
  "gift": false,
  "delivery_days": 2,
  "price_value": ["It's slightly more expensive than the other leaf blowers out there, but I think it's worth it for the extra features."]
}
type:  <class 'str'>

Guide The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"gift": string  //     Was the item purchased as a gift for someone else? Answer True if yes, False if not or unknown.    
	"delivery_days": string  //     How many days did it take for the product o arrive?     If this information is not found, output -1.    
	"price_value": string  // Extract any sentences about the value or price,     and output them as a comma separated Python list.    
}
```

Output Parser {'gift': False, 'delivery_days': 2, 'price_value': ["It's slightly more expensive than the other leaf blowers out there, but I think it's worth it for the extra features."]}
2


## 2.2 Memory

### 2.2.1 Conversation Buffer Memory

In [None]:
chat = ChatOpenAI(temperature=0.0)
memory = ConversationBufferMemory()
conversation = ConversationChain(
    llm=chat,
    verbose=True,
    memory = memory
)

In [None]:
conversation.predict(input="Hi, my name is Kean")
conversation.predict(input="What is 1+1?")
conversation.predict(input="What is my name?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Hi, my name is Kean
AI:[0m

[1m> Finished chain.[0m


[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi, my name is Kean
AI: Hello Kean! It's nice to meet you. How can I assist you today?
Human: What is 1+1?
AI:[0m

[1m> Finished chain.[0m


[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe follow

'Your name is Kean.'

In [None]:
memory.load_memory_variables({})
print(memory.buffer)

Human: Hi, my name is Kean
AI: Hello Kean! It's nice to meet you. How can I assist you today?
Human: What is 1+1?
AI: 1+1 is equal to 2.
Human: What is my name?
AI: Your name is Kean.


In [None]:
# storing k conversation
memory = ConversationBufferWindowMemory(k=1)
memory.save_context({"input": "Hi"}, {"output": "What's up"})
memory.save_context({"input": "Not much, just hanging"}, {"output": "Cool"})
memory.load_memory_variables({})

{'history': 'Human: Not much, just hanging\nAI: Cool'}

### 2.2.2 Conversation Token Buffer Memory

In [None]:
memory = ConversationTokenBufferMemory(llm=chat, max_token_limit=30)
memory.save_context({"input": "AI is what?!"}, {"output": "Amazing!"})
memory.save_context({"input": "Backpropagation is what?"}, {"output": "Beautiful!"})
memory.save_context({"input": "Chatbots are what?"}, {"output": "Charming!"})
memory.load_memory_variables({})

{'history': 'AI: Beautiful!\nHuman: Chatbots are what?\nAI: Charming!'}

### 2.2.3 Conversation Summary Memory
Combines the summary and buffer. If max_limit allows, it will include historical conversation.

In [None]:
# create a long string
memory = ConversationSummaryBufferMemory(llm=chat, max_token_limit=100)
conversation = ConversationChain(
    llm=chat, memory = memory, verbose=True
)

schedule = "\
There is a meeting at 8am with your product team. \
You will need your powerpoint presentation prepared. \
9am-12pm have time to work on your LangChain \
project which will go quickly because Langchain is such a powerful tool. \
At Noon, lunch at the italian resturant with a customer who is driving \
from over an hour away to meet you to understand the latest in AI. \
Be sure to bring your laptop to show the latest LLM demo."

memory.save_context({"input": "Hello"}, {"output": "What's up"})
memory.save_context({"input": "Not much, just hanging"}, {"output": "Cool"})
memory.save_context({"input": "What is on the schedule today?"}, {"output": f"{schedule}"})

print(memory.load_memory_variables({}))
conversation.predict(input="What would be a good demo to show?")

{'history': 'System: The human greets the AI and the AI responds with a casual greeting. The human mentions that they are not doing much and the AI responds with a casual remark.\nHuman: What is on the schedule today?\nAI: There is a meeting at 8am with your product team. You will need your powerpoint presentation prepared. 9am-12pm have time to work on your LangChain project which will go quickly because Langchain is such a powerful tool. At Noon, lunch at the italian resturant with a customer who is driving from over an hour away to meet you to understand the latest in AI. Be sure to bring your laptop to show the latest LLM demo.'}


[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
System: The hu

"A good demo to show would be the Language Model API. It showcases the capabilities of our AI technology by generating coherent and contextually relevant text based on user input. It's a great way to demonstrate the power and potential of our language processing capabilities."

## 2.3 Chain

In [None]:
df = pd.read_csv(
    'https://raw.githubusercontent.com/ksm26/LangChain-for-LLM-Application-Development/main/Data.csv'
)
llm = ChatOpenAI(temperature=0.9)
df.head()

Unnamed: 0,Product,Review
0,Queen Size Sheet Set,I ordered a king size set. My only criticism w...
1,Waterproof Phone Pouch,"I loved the waterproof sac, although the openi..."
2,Luxury Air Mattress,This mattress had a small hole in the top of i...
3,Pillows Insert,This is the best throw pillow fillers on Amazo...
4,Milk Frother Handheld\n,I loved this product. But they only seem to l...


### 2.3.1 Sequential Chain

In [None]:
# simple chain
prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe a company that makes {product}?"
)
chain = LLMChain(llm=llm, prompt=prompt)
chain.run("Queen Size Sheet Set")

'RegalRest'

In [None]:
# chain 1
first_prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe a company that makes {product}?"
)
chain_one = LLMChain(llm=llm, prompt=first_prompt)

# chain 2
second_prompt = ChatPromptTemplate.from_template(
    "Write a 20 words description for the following company:{company_name}"
)
chain_two = LLMChain(llm=llm, prompt=second_prompt)

overall_simple_chain = SimpleSequentialChain(
    chains=[chain_one, chain_two], verbose=True
)
overall_simple_chain.run("Queen Size Sheet Set")



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mRegalRest[0m
[33;1m[1;3mRegalRest is a luxury mattress company providing comfortable and premium quality mattresses for a restful sleep experience.[0m

[1m> Finished chain.[0m


'RegalRest is a luxury mattress company providing comfortable and premium quality mattresses for a restful sleep experience.'

In [None]:
# chain 1: input=Review and output=English_Review
first_prompt = ChatPromptTemplate.from_template(
    "Translate the following review to english:"
    "\n\n{Review}")
chain_one = LLMChain(llm=llm, prompt=first_prompt, output_key="English_Review")

# chain 2: input=English_Review and output=summary
second_prompt = ChatPromptTemplate.from_template(
    "Can you summarize the following review in 1 sentence:"
    "\n\n{English_Review}")
chain_two = LLMChain(llm=llm, prompt=second_prompt, output_key="summary")

# chain 3: input=Review and output=language
third_prompt = ChatPromptTemplate.from_template(
    "What language is the following review:\n\n{Review}")
chain_three = LLMChain(llm=llm, prompt=third_prompt, output_key="language")

# chain 4: input=summary, language and output=followup_message
fourth_prompt = ChatPromptTemplate.from_template(
    "Write a follow up response to the following "
    "summary in the specified language:"
    "\n\nSummary: {summary}\n\nLanguage: {language}"
)
chain_four = LLMChain(llm=llm, prompt=fourth_prompt, output_key="followup_message")

# overall_chain: input=Review
# and output=English_Review, summary, followup_message
overall_chain = SequentialChain(
    chains=[chain_one, chain_two, chain_three, chain_four],
    input_variables=["Review"],
    output_variables=["English_Review", "summary","followup_message"],
    verbose=True
)
overall_chain(df['Review'][5])



[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


{'Review': "Je trouve le goût médiocre. La mousse ne tient pas, c'est bizarre. J'achète les mêmes dans le commerce et le goût est bien meilleur...\nVieux lot ou contrefaçon !?",
 'English_Review': "I find the taste mediocre. The foam doesn't hold, it's weird. I buy the same ones in stores and the taste is much better...\nOld batch or counterfeit!?",
 'summary': 'The reviewer is disappointed with the taste and foam quality of the product, comparing it negatively to the ones bought in stores, questioning if it is an old batch or a counterfeit item.',
 'followup_message': "Réponse de suivi :\n\nCher(e) responsable des ventes,\n\nNous vous remercions d'avoir partagé votre expérience avec notre produit. Nous sommes désolés d'apprendre que vous avez été déçu(e) par le goût et la qualité de la mousse. Nous comprenons votre déception et nous tenons à nous excuser pour toute gêne occasionnée.\n\nLa qualité de nos produits est d'une importance capitale pour nous, et nous prenons vos commentaires

### 2.3.2 Router Chain

In [None]:
physics_template = """\
You are a very smart physics professor. \
You are great at answering questions about physics in a concise\
and easy to understand manner. \
When you don't know the answer to a question you admit\
that you don't know.
Here is a question:

{input}
"""

math_template = """\
You are a very good mathematician. \
You are great at answering math questions. \
You are so good because you are able to break down \
hard problems into their component parts,
answer the component parts, and then put them together\
to answer the broader question.

Here is a question:
{input}
"""

history_template = """\
You are a very good historian. \
You have an excellent knowledge of and understanding of people,\
events and contexts from a range of historical periods. \
You have the ability to think, reflect, debate, discuss and \
evaluate the past. You have a respect for historical evidence\
and the ability to make use of it to support your explanations \
and judgements.

Here is a question:
{input}
"""

computerscience_template = """\
You are a successful computer scientist.\
You have a passion for creativity, collaboration,\
forward-thinking, confidence, strong problem-solving capabilities,\
understanding of theories and algorithms, and excellent communication \
skills. You are great at answering coding questions. \
You are so good because you know how to solve a problem by \
describing the solution in imperative steps \
that a machine can easily interpret and you know how to \
choose a solution that has a good balance between \
time complexity and space complexity.

Here is a question:
{input}"""

prompt_infos = [
    {
        "name": "physics",
        "description": "Good for answering questions about physics",
        "prompt_template": physics_template
    },
    {
        "name": "math",
        "description": "Good for answering math questions",
        "prompt_template": math_template
    },
    {
        "name": "History",
        "description": "Good for answering history questions",
        "prompt_template": history_template
    },
    {
        "name": "computer science",
        "description": "Good for answering computer science questions",
        "prompt_template": computerscience_template
    }
]

destination_chains = {}
for p_info in prompt_infos:
    name = p_info["name"]
    prompt_template = p_info["prompt_template"]
    prompt = ChatPromptTemplate.from_template(template=prompt_template)
    chain = LLMChain(llm=llm, prompt=prompt)
    destination_chains[name] = chain

# default chain is used if model can't decide which one to use
destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str = "\n".join(destinations)
default_prompt = ChatPromptTemplate.from_template("{input}")
default_chain = LLMChain(llm=llm, prompt=default_prompt)

print('destination_chains', destination_chains)
print('\ndestinations_str', destinations_str)
print('\ndefault_prompt', default_prompt)
print('\ndefault_chain', default_chain)

destination_chains {'physics': LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=ChatPromptTemplate(input_variables=['input'], output_parser=None, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], output_parser=None, partial_variables={}, template="You are a very smart physics professor. You are great at answering questions about physics in a conciseand easy to understand manner. When you don't know the answer to a question you admitthat you don't know.\nHere is a question:\n\n{input}\n", template_format='f-string', validate_template=True), additional_kwargs={})]), llm=ChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.9, model_kwargs={}, openai_api_key='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', op

In [None]:
# "DEFAULT" would turn into None in destination chain

MULTI_PROMPT_ROUTER_TEMPLATE = """\
Given a raw text input to a \
language model select the model prompt best suited for the input. \
You will be given the names of the available prompts and a \
description of what the prompt is best suited for. \
You may also revise the original input if you think that revising\
it will ultimately lead to a better response from the language model.

<< FORMATTING >>
Return a markdown code snippet with a JSON object formatted to look like:
```json
{{{{
    "destination": string \ name of the prompt to use or "DEFAULT"
    "next_inputs": string \ a potentially modified version of the original input
}}}}
```

REMEMBER: "destination" MUST be one of the candidate prompt \
names specified below OR it can be "DEFAULT" if the input is not\
well suited for any of the candidate prompts.
REMEMBER: "next_inputs" can just be the original input \
if you don't think any modifications are needed.

<< CANDIDATE PROMPTS >>
{destinations}

<< INPUT >>
{{input}}

<< OUTPUT (remember to include the ```json)>>"""

router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(
    destinations=destinations_str
)
router_prompt = PromptTemplate(
    template=router_template,
    input_variables=["input"],
    output_parser=RouterOutputParser(),
)

router_chain = LLMRouterChain.from_llm(llm, router_prompt)
chain = MultiPromptChain(
    router_chain=router_chain,
    destination_chains=destination_chains,
    default_chain=default_chain,
    verbose=True)

chain.run("What is black body radiation?")



[1m> Entering new MultiPromptChain chain...[0m
physics: {'input': 'What is black body radiation?'}
[1m> Finished chain.[0m


'Black body radiation refers to the electromagnetic radiation emitted by an object that absorbs all incident radiation. It is called "black body" because an idealized object that perfectly absorbs all radiation without reflecting or transmitting any appears black.\n\nWhen an object is heated, it emits radiation at different wavelengths or frequencies, which depend on its temperature. Black body radiation follows a specific pattern called Planck\'s law, which describes the intensity of the radiation at different wavelengths. As the temperature increases, the peak intensity shifts towards shorter wavelengths, corresponding to more energetic photons.\n\nOne important aspect of black body radiation is its color. At low temperatures, the radiation is primarily in the infrared range and not visible to the human eye. As the temperature increases, the object starts to glow red, then orange, and eventually becomes white-hot as the radiation spans the visible spectrum.\n\nBlack body radiation ha

In [None]:
# category is None
chain.run("Why does the chicken cross the road")



[1m> Entering new MultiPromptChain chain...[0m
None: {'input': 'Why does the chicken cross the road'}
[1m> Finished chain.[0m


"There are numerous reasons people have come up with to explain why the chicken crosses the road. Here are a few popular humorous explanations:\n\n- To get to the other side: This is the classic punchline to the joke and is often used to highlight the simplicity of the question and subvert expectations.\n- To show it's not a chicken: This is a play on words, suggesting that the chicken is crossing the road not because it is a chicken, but to prove it is not cowardly.\n- Because its GPS told it to: This is a modern interpretation of the joke, implying that the chicken is being guided by technology like a GPS to navigate its way to the other side.\n- To escape a bad situation: This is a more serious interpretation, suggesting that the chicken is crossing the road to avoid danger, such as a predator or some other threat.\n- To find food or explore: This interpretation is based on the natural behavior of chickens, as they often wander and forage for food. The chicken may be crossing the ro

## 2.4 QnA

[Medium Blog - 4 Ways to Do Question Answering in LangChain](https://towardsdatascience.com/4-ways-of-question-answering-in-langchain-188c6707cc5a)

In [None]:
langchain.debug = False

In [None]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-base-en",
    model_kwargs={'device': 'cuda'},
    encode_kwargs={'normalize_embeddings': True}
)
file = 'OutdoorClothingCatalog_1000.csv'
llm = ChatOpenAI(temperature=0.0)
loader = CSVLoader(file_path=file)
docs = loader.load()
index = VectorstoreIndexCreator(vectorstore_cls=DocArrayInMemorySearch).from_loaders([loader])
# embeddings = OpenAIEmbeddings()
db = DocArrayInMemorySearch.from_documents(docs, embeddings)
pd.read_csv(file)

Unnamed: 0.1,Unnamed: 0,name,description
0,0,Women's Campside Oxfords,This ultracomfortable lace-to-toe Oxford boast...
1,1,"Recycled Waterhog Dog Mat, Chevron Weave",Protect your floors from spills and splashing ...
2,2,Infant and Toddler Girls' Coastal Chill Swimsu...,"She'll love the bright colors, ruffles and exc..."
3,3,"Refresh Swimwear, V-Neck Tankini Contrasts",Whether you're going for a swim or heading out...
4,4,EcoFlex 3L Storm Pants,Our new TEK O2 technology makes our four-seaso...
...,...,...,...
995,995,"Men's Classic Denim, Standard Fit",Crafted from premium denim that will last wash...
996,996,CozyPrint Sweater Fleece Pullover,The ultimate sweater fleece - made from superi...
997,997,Women's NRS Endurance Spray Paddling Pants,These comfortable and affordable splash paddli...
998,998,Women's Stop Flies Hoodie,This great-looking hoodie uses No Fly Zone Tec...


In [None]:
# convert documents and query into embedding, find the most similar document
# fit into llm to output
query ="\
Please list all your shirts with sun protection in a table in markdown and summarize each one.\
"
response = index.query(llm=OpenAI(), question=query, verbose=True, )
display(Markdown(response))

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Please list all your shirts with sun protection in a table in markdown and summarize each one."
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Please list all your shirts with sun protection in a table in markdown and summarize each one.",
  "context": ": 618\nname: Men's Tropical Plaid Short-Sleeve Shirt\ndescription: Our lightest hot-weather shirt is rated UPF 50+ for superior protection from the sun's UV rays. With a traditional fit that is relaxed through the chest, sleeve, and waist, this fabric is made of 100% polyester and is wrinkle-resistant. With front and back cape venting that lets in cool breezes and two front bellows pockets, th



| Name | Description |
| --- | --- |
| Men's Tropical Plaid Short-Sleeve Shirt | UPF 50+ rated sun protection; 100% polyester fabric; front and back cape venting; two front bellows pockets. |
| Men's Plaid Tropic Shirt, Short-Sleeve | UPF 50+ rated sun protection; 52% polyester and 48% nylon fabric; SunSmart technology blocks 98% of the sun's harmful UV rays; front and back cape venting; two front bellows pockets. |
| Men's TropicVibe Shirt, Short-Sleeve | UPF 50+ rated sun protection; 71% nylon and 29% polyester shell; 100% polyester knit mesh lining; wrinkle resistant; front and back cape venting; two front bellows pockets. |
| Sun Shield Shirt by | UPF 50+ rated sun protection; 78% nylon and 22% Lycra Xtra Life fiber; wicks moisture for quick-drying comfort; abrasion resistant; fits comfortably over your favorite swimsuit. |

In [None]:
query = "Please suggest a shirt with sunblocking"
docs = db.similarity_search(query, k=4)
print('result: ', docs)

result:  [Document(page_content=': 255\nname: Sun Shield Shirt by\ndescription: "Block the sun, not the fun – our high-performance sun shirt is guaranteed to protect from harmful UV rays. \n\nSize & Fit: Slightly Fitted: Softly shapes the body. Falls at hip.\n\nFabric & Care: 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated – the highest rated sun protection possible. Handwash, line dry.\n\nAdditional Features: Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion resistant for season after season of wear. Imported.\n\nSun Protection That Won\'t Wear Off\nOur high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun\'s harmful rays. This fabric is recommended by The Skin Cancer Foundation as an effective UV protectant.', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 255}), Document(page_content=": 374\nname: Men's Plaid Tropic Shirt, Short-Sleeve\ndescription: Our Ultracomfortable sun protection is rate

In [None]:
embed = embeddings.embed_query("Hi my name is Harrison")
print(len(embed))
print(embed[:5])

query = "Please suggest a shirt with sunblocking"
docs = db.similarity_search(query)
print('result: ', docs)

768
[-0.02024184912443161, 0.036355745047330856, -0.024344725534319878, -0.010754036717116833, 0.046086233109235764]
result:  [Document(page_content=': 255\nname: Sun Shield Shirt by\ndescription: "Block the sun, not the fun – our high-performance sun shirt is guaranteed to protect from harmful UV rays. \n\nSize & Fit: Slightly Fitted: Softly shapes the body. Falls at hip.\n\nFabric & Care: 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated – the highest rated sun protection possible. Handwash, line dry.\n\nAdditional Features: Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion resistant for season after season of wear. Imported.\n\nSun Protection That Won\'t Wear Off\nOur high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun\'s harmful rays. This fabric is recommended by The Skin Cancer Foundation as an effective UV protectant.', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 255}), Document(page_c

In [None]:
# chain_type
# 1. stuff: feed all documents
# 2. map_reduce: feed different chunk and combine answer at the end
# 3. refine: fit different chumk + info from previous chunk
# 4. map_rerank: same as map_reduce but get score for each output

retriever = db.as_retriever()
llm = ChatOpenAI(temperature=0.0)

qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_rerank",
    retriever=retriever,
    # verbose=True
)

query = "Please list all your shirts with summary of each of them in a table in markdown"
# query = 'Recommend me the best shirt for winter'
response = qa_stuff.run(query)
display(Markdown(response))



This document does not answer the question

## 2.5 Evaluation

In [None]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
data = loader.load()
index = VectorstoreIndexCreator(vectorstore_cls=DocArrayInMemorySearch).from_loaders([loader])
llm = ChatOpenAI(temperature = 0.0)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose=True,
    chain_type_kwargs = {"document_separator": "<<<<>>>>>"}
)

In [None]:
examples = [
    {
        "query": "Do the Cozy Comfort Pullover Set have side pockets?",
        "answer": "Yes"
    },
    {
        "query": "What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
]

example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI(), verbose=True)
new_examples = example_gen_chain.apply_and_parse([{"doc": t} for t in data[:5]])
print('new_examples', new_examples)





[1m> Entering new QAGenerateChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a teacher coming up with questions to ask on a quiz. 
Given the following document, please generate a question and answer based on that document.

Example Format:
<Begin Document>
...
<End Document>
QUESTION: question here
ANSWER: answer here

These questions should be detailed and be based explicitly on information in the document. Begin!

<Begin Document>
page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish 

In [None]:
examples += new_examples
query = examples[1]["query"]
print('query: ', query)
qa.run(query)

query:  What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.'

In [None]:
# llm to check evaluation
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose=False,
    chain_type_kwargs = {"document_separator": "<<<<>>>>>"}
)

predictions = qa.apply(examples)
eval_chain = QAEvalChain.from_llm(llm, verbose=False)
graded_outputs = eval_chain.evaluate(examples, predictions)

for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

Example 0:
Question: Do the Cozy Comfort Pullover Set have side pockets?
Real Answer: Yes
Predicted Answer: Yes, the Cozy Comfort Pullover Set does have side pockets.
Predicted Grade: CORRECT

Example 1:
Question: What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?
Real Answer: The DownTek collection
Predicted Answer: The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.
Predicted Grade: CORRECT

Example 2:
Question: What is the weight of the Women's Campside Oxfords?
Real Answer: The weight of the Women's Campside Oxfords is approximately 1 lb. 1 oz. per pair.
Predicted Answer: The weight of the Women's Campside Oxfords is approximately 1 lb. 1 oz. per pair.
Predicted Grade: CORRECT

Example 3:
Question: What are the dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave?
Real Answer: The dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave are 18" x 28".
Predicted Answer: The dimensions of the 

## 2.6 Agent

### 2.6.1 External Agent

In [None]:
llm = ChatOpenAI(temperature=0)
tools = load_tools(["llm-math","wikipedia", "serpapi"], llm=llm)
agent= initialize_agent(
    tools,
    llm,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors=True,
    verbose=True
)

In [None]:
agent("What is the 25% of 300?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI can use the calculator tool to find the answer to this math question.

Thought: I will use the calculator tool to calculate 25% of 300.
Action:
```
{
  "action": "Calculator",
  "action_input": "25% of 300"
}
```
[0m
Observation: [36;1m[1;3mAnswer: 75.0[0m
Thought:[32;1m[1;3mThe answer is 75.0.
Final Answer: 75.0[0m

[1m> Finished chain.[0m


{'input': 'What is the 25% of 300?', 'output': '75.0'}

In [None]:
question = "\
Tom M. Mitchell is an American computer scientist \
and the Founders University Professor at Carnegie Mellon University (CMU)\
what book did he write?\
"
agent(question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I can use Wikipedia to find out what book Tom M. Mitchell wrote.
Action:
```
{
  "action": "Wikipedia",
  "action_input": "Tom M. Mitchell"
}
```[0m



  lis = BeautifulSoup(html).find_all('li')



Observation: [33;1m[1;3mPage: Tom M. Mitchell
Summary: Tom Michael Mitchell (born August 9, 1951) is an American computer scientist and the Founders University Professor at Carnegie Mellon University (CMU). He is a founder and former Chair of the Machine Learning Department at CMU. Mitchell is known for his contributions to the advancement of machine learning, artificial intelligence, and cognitive neuroscience and is the author of the textbook Machine Learning. He is a member of the United States National Academy of Engineering since 2010. He is also a Fellow of the American Academy of Arts and Sciences, the American Association for the Advancement of Science and a Fellow and past President of the Association for the Advancement of Artificial Intelligence. In October 2018, Mitchell was appointed as the Interim Dean of the School of Computer Science at Carnegie Mellon.

Page: Tom Mitchell (Australian footballer)
Summary: Thomas Mitchell (born 31 May 1993) is a professional Australia

In [None]:
langchain.debug = False

In [None]:
question = "Who is the oscar award winner in 2023"
result = agent(question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find out who won the Oscar award in 2023.
Action:
```
{
  "action": "Search",
  "action_input": "Oscar award winner 2023"
}
```[0m
Observation: [38;5;200m[1;3mThe 95th Academy Awards ceremony, presented by the Academy of Motion Picture Arts and Sciences, took place on March 12, 2023, at the Dolby Theatre in Hollywood, Los Angeles. During the gala, the AMPAS presented Academy Awards in 23 categories honoring films released in 2022.[0m
Thought:[32;1m[1;3mI need to find the specific category and winner for the Oscar award in 2023.
Action:
```
{
  "action": "Search",
  "action_input": "Oscar award winners 2023"
}
```
[0m
Observation: [38;5;200m[1;3mThe 95th Academy Awards ceremony, presented by the Academy of Motion Picture Arts and Sciences, took place on March 12, 2023, at the Dolby Theatre in Hollywood, Los Angeles. During the gala, the AMPAS presented Academy Awards in 23 categories honoring films 

### 2.6.2 Python Agent

In [None]:
langchain.debug = True
agent = create_python_agent(
    llm, tool=PythonREPLTool(), verbose=True
)

customer_list = [
    ["Harrison", "Chase"],
    ["Lang", "Chain"],
    ["Dolly", "Too"],
    ["Elle", "Elem"],
    ["Geoff","Fusion"],
    ["Trance","Former"],
    ["Jen","Ayai"]
]

agent.run(f"""\
Sort these customers by last name and then first name \
and print the output: {customer_list} \
""")
langchain.debug = False

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "Sort these customers by last name and then first name and print the output: [['Harrison', 'Chase'], ['Lang', 'Chain'], ['Dolly', 'Too'], ['Elle', 'Elem'], ['Geoff', 'Fusion'], ['Trance', 'Former'], ['Jen', 'Ayai']] "
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain] Entering Chain run with input:
[0m{
  "input": "Sort these customers by last name and then first name and print the output: [['Harrison', 'Chase'], ['Lang', 'Chain'], ['Dolly', 'Too'], ['Elle', 'Elem'], ['Geoff', 'Fusion'], ['Trance', 'Former'], ['Jen', 'Ayai']] ",
  "agent_scratchpad": "",
  "stop": [
    "\nObservation:",
    "\n\tObservation:"
  ]
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain > 3:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: You are an agent designed to write and execute python code to answer questions.\nY



[36;1m[1;3m[llm/end][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain > 3:llm:ChatOpenAI] [4.75s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "To sort the customers by last name and then first name, I can use the `sorted()` function and provide a custom key function that returns a tuple of the last name and first name. This will ensure that the sorting is done in the desired order.\nAction: Python_REPL\nAction Input: sorted([['Harrison', 'Chase'], ['Lang', 'Chain'], ['Dolly', 'Too'], ['Elle', 'Elem'], ['Geoff', 'Fusion'], ['Trance', 'Former'], ['Jen', 'Ayai']], key=lambda x: (x[1], x[0]))",
        "generation_info": {
          "finish_reason": "stop"
        },
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "To sort the customers by last name and t

"[['Jen', 'Ayai'], ['Harrison', 'Chase'], ['Lang', 'Chain'], ['Elle', 'Elem'], ['Geoff', 'Fusion'], ['Trance', 'Former'], ['Dolly', 'Too']]"

In [None]:
@tool
def time(text: str) -> str:
    """Returns todays date, use this for any \
    questions related to knowing todays date. \
    The input should always be an empty string, \
    and this function will always return todays \
    date - any date mathmatics should occur \
    outside this function."""
    return str(date.today())

agent= initialize_agent(
    tools + [time],
    llm,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors=True,
    verbose=True
)

try:
    result = agent("whats the date today?")
except:
    print("exception on external access")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: What's the date today?
Thought: I can use the `time` tool to get the current date.
Action:
```
{
  "action": "time",
  "action_input": ""
}
```
[0m
Observation: [36;1m[1;3m2023-07-22[0m
Thought:[32;1m[1;3mI now know the final answer.
Final Answer: The date today is 2023-07-22.[0m

[1m> Finished chain.[0m
