<a href="https://colab.research.google.com/github/Nid989/LLMs-Overview/blob/main/langchain/langchain_for_llm_application_development/langchain_for_llm_application_development_part_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install openai python-dotenv langchain tiktoken pip install langchain[docarray] wikipedia DateTime

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import os
from dotenv import load_dotenv, find_dotenv
import openai

_ = load_dotenv(find_dotenv(filename="./.env.txt"))
openai.api_key = os.environ["OPENAI_API_KEY"]

In [4]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [5]:
loader = CSVLoader("./OutdoorClothingCatalog_1000.csv")
data = loader.load()

In [6]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [7]:
llm = ChatOpenAI(temperature=0.0, model="gpt-3.5-turbo")

In [8]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose=True,
    chain_type_kwargs={
        "document_separator": "<<<<>>>>"
    }
)

In [9]:
# coming up w/ test datapoints
data[10], data[11]

(Document(page_content=": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\n\nAdditional Features\n- Relaxed fit top with raglan sleeves and rounded hem.\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\n\nImported.", metadata={'source': './OutdoorClothingCatalog_1000.csv', 'row': 10}),
 Document(page_content=': 11\nname: Ultra-Lofty 850 Stretch Down Hooded Jacket\ndescription: This technical stretch down jacket from our DownTek collection is sure to keep you warm and comfortable with its full-stretch construction providing exc

In [10]:
# hard-coded examples
examples = [
    {
        "query": "Do the Cozy Comfort Pullover Set have side pockets?",
        "answer": "Yes"
    },
    {
        "query": "What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
]

In [11]:
# LLM-generated examples; i.e. utilizing language models to generate test-examples
from langchain.evaluation.qa import QAGenerateChain

In [12]:
# this takes in documents and will create a question-answer pair from each document
# by prompting a llm itself.
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI(model="gpt-3.5-turbo"))

In [13]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)

In [14]:
new_examples = [example["qa_pairs"] for example in new_examples]

In [15]:
# combine examples i.e. hardcoded and llm-generated
examples += new_examples

In [16]:
# sample run
print("qa retrieval chain generated response: ", qa.run(examples[0]["query"]), "\noriginal answer:", examples[0]["answer"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
qa retrieval chain generated response:  Yes, the Cozy Comfort Pullover Set does have side pockets. 
original answer: Yes


`Manual Evaluation`

In [17]:
# the above illustration is not showcasing what is actually happening inside the chain
# e.g. it does not showcase, what is the actual prompt which is going inside llm?,
# what are the document it retrieves?, if the chain was more complex with multiple steps
# in between what are the intermediate results?

import langchain
langchain.debug = True

In [18]:
print("qa retrieval chain generated response: ", qa.run(examples[0]["query"]), "\noriginal answer:", examples[0]["answer"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Do the Cozy Comfort Pullover Set have side pockets?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do the Cozy Comfort Pullover Set have side pockets?",
  "context": ": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\n\nAdditional 

In [19]:
# turn off the debug mode
langchain.debug = False

`LLM assisted evaluation`

In [20]:
predictions = qa.apply(examples)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [21]:
from langchain.evaluation.qa import QAEvalChain

In [22]:
llm = ChatOpenAI(temperature=0.0, model="gpt-3.5-turbo")
eval_chain = QAEvalChain.from_llm(llm)

In [23]:
graded_outputs = eval_chain.evaluate(examples, predictions)

In [24]:
for i, example in enumerate(examples):
    print(f"Examples {i}: ")
    print("Question: " + predictions[i]["query"])
    print("Real Answer: " + predictions[i]["answer"])
    print("Predicted Answer: " + predictions[i]["result"])
    print("Predicted Grade: " + graded_outputs[i]["results"])
    print()

Examples 0: 
Question: Do the Cozy Comfort Pullover Set have side pockets?
Real Answer: Yes
Predicted Answer: Yes, the Cozy Comfort Pullover Set does have side pockets.
Predicted Grade: CORRECT

Examples 1: 
Question: What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?
Real Answer: The DownTek collection
Predicted Answer: The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.
Predicted Grade: CORRECT

Examples 2: 
Question: What is the description of the Women's Campside Oxfords?
Real Answer: The Women's Campside Oxfords are an ultracomfortable lace-to-toe Oxford made of super-soft canvas with thick cushioning and quality construction for a broken-in feel from the first time they are worn.
Predicted Answer: The description of the Women's Campside Oxfords is: "This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on."
Predicted Grade

`LangChain: Agents`

`Outline`


*   `Using built in LangChain tools: DuckDuckGo search and Wikipedia`
*   `Defining your own tools`



In [25]:
from langchain.agents.agent_toolkits import create_python_agent
from langchain.agents import load_tools, initialize_agent
from langchain.agents import AgentType
from langchain.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL
from langchain.chat_models import ChatOpenAI

In [26]:
# temperature=0.0; since we're utilizing llm as a reasoning engine connecting external source of data and other source
llm = ChatOpenAI(temperature=0.0, model="gpt-3.5-turbo")

In [27]:
# llm-math tool; is a chain itself which uses a language model in conjunction with
# a calculator to solve math problems.
# wikipedia tool; is an API which connects to Wikipedia, allowing search query to run
# w/ and getback results/documents.
tools = load_tools(["llm-math", "wikipedia"], llm=llm)

`CHAT_ZERO_SHOT_REACT_DESCRIPTION`

`important things to note`

*   `CHAT: this is a agent which is optimized to work w/ chat-models`
*   `REACT: this is a prompting technique designed to get best reasoning performance out of language models`



In [28]:
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    # useful when language model outputs something which is not able to be parsed!
    # when this happens we pass the mis-formatted text to language model and ask to correct it.
    handle_parsing_errors=True,
    verbose=True
)

In [29]:
# llm-math example
agent("What is 25% of 300?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI can use the calculator tool to find the answer to this question.

Action:
```json
{
  "action": "Calculator",
  "action_input": "25% * 300"
}
```[0m
Observation: [36;1m[1;3mAnswer: 75.0[0m
Thought:[32;1m[1;3mThe answer is 75.0.
Final Answer: 75.0[0m

[1m> Finished chain.[0m


{'input': 'What is 25% of 300?', 'output': '75.0'}

In [30]:
# Wikipedia example
question = "Tom M. Mitchell is an American computer scientist and the Founders University Professor at Carnegie Mellon University (CMU) what book did he write?"
result = agent(question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I can use Wikipedia to find out what book Tom M. Mitchell wrote.
Action:
```json
{
  "action": "Wikipedia",
  "action_input": "Tom M. Mitchell"
}
```[0m
Observation: [33;1m[1;3mPage: Tom M. Mitchell
Summary: Tom Michael Mitchell (born August 9, 1951) is an American computer scientist and the Founders University Professor at Carnegie Mellon University (CMU). He is a founder and former Chair of the Machine Learning Department at CMU. Mitchell is known for his contributions to the advancement of machine learning, artificial intelligence, and cognitive neuroscience and is the author of the textbook Machine Learning. He is a member of the United States National Academy of Engineering since 2010. He is also a Fellow of the American Academy of Arts and Sciences, the American Association for the Advancement of Science and a Fellow and past President of the Association for the Advancement of Artificial Intelligence. In Oct

`Python Agent`

`A common behaviour in certain specific copilot and even ChatGPT, is that they write code and then execute the code.`

In [31]:
agent = create_python_agent(
    llm,
    tool=PythonREPLTool(), # REPL: is a way to interact w/ code, think of it as jupyter-notebook
    verbose=True
)

  warn_deprecated(


In [32]:
customer_list = [
    ["Harrison", "Chase"],
    ["Lang", "Chain"],
    ["Dolly", "Too"],
    ["Elle", "Elem"],
    ["Geoff","Fusion"],
    ["Trance","Former"],
    ["Jen","Ayai"]
]

In [33]:
agent.run(f"Sort these customers by last name and then first name and print the output: {customer_list}")



[1m> Entering new AgentExecutor chain...[0m


  warn_deprecated(


[32;1m[1;3mI can use the `sorted()` function to sort the list of customers. I will need to provide a key function that specifies the sorting order based on last name and then first name.
Action: Python_REPL
Action Input: sorted([['Harrison', 'Chase'], ['Lang', 'Chain'], ['Dolly', 'Too'], ['Elle', 'Elem'], ['Geoff', 'Fusion'], ['Trance', 'Former'], ['Jen', 'Ayai']], key=lambda x: (x[1], x[0]))[0m
Observation: [36;1m[1;3m[0m
Thought:[32;1m[1;3mThe customers have been sorted by last name and then first name.
Final Answer: [['Jen', 'Ayai'], ['Harrison', 'Chase'], ['Lang', 'Chain'], ['Elle', 'Elem'], ['Geoff', 'Fusion'], ['Trance', 'Former'], ['Dolly', 'Too']][0m

[1m> Finished chain.[0m


"[['Jen', 'Ayai'], ['Harrison', 'Chase'], ['Lang', 'Chain'], ['Elle', 'Elem'], ['Geoff', 'Fusion'], ['Trance', 'Former'], ['Dolly', 'Too']]"

In [34]:
# detailed output using langchain.debug = True
import langchain
langchain.debug = True
agent.run(f"Sort these customers by last name and then first name and print the output: {customer_list}")
langchain.debug = False

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "Sort these customers by last name and then first name and print the output: [['Harrison', 'Chase'], ['Lang', 'Chain'], ['Dolly', 'Too'], ['Elle', 'Elem'], ['Geoff', 'Fusion'], ['Trance', 'Former'], ['Jen', 'Ayai']]"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain] Entering Chain run with input:
[0m{
  "input": "Sort these customers by last name and then first name and print the output: [['Harrison', 'Chase'], ['Lang', 'Chain'], ['Dolly', 'Too'], ['Elle', 'Elem'], ['Geoff', 'Fusion'], ['Trance', 'Former'], ['Jen', 'Ayai']]",
  "agent_scratchpad": "",
  "stop": [
    "\nObservation:",
    "\n\tObservation:"
  ]
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain > 3:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: You are an agent designed to write and execute python code to answer questions.\nYou

  warn_deprecated(


[36;1m[1;3m[llm/end][0m [1m[1:chain:AgentExecutor > 5:chain:LLMChain > 6:llm:ChatOpenAI] [4.98s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "The customers have been sorted by last name and then first name.\nFinal Answer: [['Jen', 'Ayai'], ['Harrison', 'Chase'], ['Lang', 'Chain'], ['Elle', 'Elem'], ['Geoff', 'Fusion'], ['Trance', 'Former'], ['Dolly', 'Too']]",
        "generation_info": {
          "finish_reason": "stop"
        },
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "The customers have been sorted by last name and then first name.\nFinal Answer: [['Jen', 'Ayai'], ['Harrison', 'Chase'], ['Lang', 'Chain'], ['Elle', 'Elem'], ['Geoff', 'Fusion'], ['Trance', 'Former'], ['Dolly', 'Too']]",
            "additional_kwargs": {}
          }
     

`Custom tool`

`Define your own tool`

In [35]:
# `decorator`; it can be called upon any function and it will turn that function
# into a tool which langchain can use.
from langchain.agents import tool
from datetime import date

In [36]:
# need to specifiy `docstring` within the function, since this is what agent will
# use to decide when to call this tool
@tool
def time(text: str) -> str:
    """Returns todays date, use this for any \
    questions related to knowing todays date. \
    The input should always be an empty string, \
    and this function will always return todays \
    date - any date mathmatics should occur \
    outside this function."""
    return str(date.today())

In [37]:
agent = initialize_agent(
    tools + [time],
    llm,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors=True,
    verbose=True
)

In [38]:
result = agent("whats the date today?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: What's the date today?
Thought: I can use the `time` tool to get the current date.
Action:
```
{
  "action": "time",
  "action_input": ""
}
```
[0m
Observation: [38;5;200m[1;3m2023-10-21[0m
Thought:[32;1m[1;3mCould not parse LLM output: I now know the final answer.[0m
Observation: Invalid or incomplete response
Thought:[32;1m[1;3mI made a mistake in my previous response. I apologize for the confusion. Let me correct it.

Question: What's the date today?
Thought: I can use the `time` tool to get the current date.
Action:
```
{
  "action": "time",
  "action_input": ""
}
```

[0m
Observation: [38;5;200m[1;3m2023-10-21[0m
Thought:[32;1m[1;3mI now know the final answer.
Final Answer: The date today is 2023-10-21.[0m

[1m> Finished chain.[0m
