# Tagging and Extraction Using OpenAI functions

In [70]:
import os
import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

In [71]:
from typing import List
from pydantic import BaseModel, Field
from langchain.utils.openai_functions import convert_pydantic_to_openai_function

In [72]:
class Tagging(BaseModel):
    """Tag the piece of text with particular info."""
    sentiment: str = Field(description="sentiment of text, should be `pos`, `neg`, or `neutral`")
    language: str = Field(description="language of text (should be ISO 639-1 code)")

In [73]:
convert_pydantic_to_openai_function(Tagging)

{'name': 'Tagging',
 'description': 'Tag the piece of text with particular info.',
 'parameters': {'title': 'Tagging',
  'description': 'Tag the piece of text with particular info.',
  'type': 'object',
  'properties': {'sentiment': {'title': 'Sentiment',
    'description': 'sentiment of text, should be `pos`, `neg`, or `neutral`',
    'type': 'string'},
   'language': {'title': 'Language',
    'description': 'language of text (should be ISO 639-1 code)',
    'type': 'string'}},
  'required': ['sentiment', 'language']}}

In [74]:
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI

In [75]:
model = ChatOpenAI(temperature=0)

In [76]:
tagging_functions = [convert_pydantic_to_openai_function(Tagging)]

In [77]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Think carefully, and then tag the text as instructed"),
    ("user", "{input}")
])

In [78]:
model_with_functions = model.bind(
    functions=tagging_functions,
    function_call={"name": "Tagging"}
)

In [79]:
tagging_chain = prompt | model_with_functions

In [80]:
tagging_chain.invoke({"input": "I love langchain"})

AIMessage(content='', additional_kwargs={'function_call': {'name': 'Tagging', 'arguments': '{"sentiment":"pos","language":"en"}'}})

In [81]:
tagging_chain.invoke({"input": "non mi piace questo cibo"})

AIMessage(content='', additional_kwargs={'function_call': {'name': 'Tagging', 'arguments': '{"sentiment":"neg","language":"it"}'}})

In [82]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser

In [83]:
tagging_chain = prompt | model_with_functions | JsonOutputFunctionsParser()

In [84]:
tagging_chain.invoke({"input": "non mi piace questo cibo"})

{'sentiment': 'neg', 'language': 'it'}

In [85]:
tagging_chain.invoke({"input": "j'aime animaux"})

{'sentiment': 'pos', 'language': 'fr'}

In [86]:
tagging_chain.invoke({"input": "mujhe bhindi nahi pasand"})

{'sentiment': 'neg', 'language': 'hi'}

## Extraction

Extraction is similar to tagging, but used for extracting multiple pieces of information.

In [87]:
from typing import Optional
class Person(BaseModel):
    """Information about a person."""
    name: str = Field(description="person's name")
    age: Optional[int] = Field(description="person's age")

In [88]:
class Information(BaseModel):
    """Information to extract."""
    people: List[Person] = Field(description="List of info about people")

In [89]:
convert_pydantic_to_openai_function(Information)

{'name': 'Information',
 'description': 'Information to extract.',
 'parameters': {'title': 'Information',
  'description': 'Information to extract.',
  'type': 'object',
  'properties': {'people': {'title': 'People',
    'description': 'List of info about people',
    'type': 'array',
    'items': {'title': 'Person',
     'description': 'Information about a person.',
     'type': 'object',
     'properties': {'name': {'title': 'Name',
       'description': "person's name",
       'type': 'string'},
      'age': {'title': 'Age',
       'description': "person's age",
       'type': 'integer'}},
     'required': ['name']}}},
  'required': ['people']}}

In [90]:
extraction_functions = [convert_pydantic_to_openai_function(Information)]
extraction_model = model.bind(functions=extraction_functions, function_call={"name": "Information"})

In [91]:
extraction_model.invoke("Joe is 30, his mom is Martha")

AIMessage(content='', additional_kwargs={'function_call': {'name': 'Information', 'arguments': '{"people":[{"name":"Joe","age":30},{"name":"Martha"}]}'}})

In [92]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information, if not explicitly provided do not guess. Extract partial info"),
    ("human", "{input}")
])

In [93]:
extraction_chain = prompt | extraction_model

In [94]:
extraction_chain.invoke({"input": "Joe is 30, his mom is Martha"})

AIMessage(content='', additional_kwargs={'function_call': {'name': 'Information', 'arguments': '{"people":[{"name":"Joe","age":30},{"name":"Martha"}]}'}})

In [95]:
extraction_chain = prompt | extraction_model | JsonOutputFunctionsParser()

In [96]:
extraction_chain.invoke({"input": "Joe is 30, his mom is Martha"})

{'people': [{'name': 'Joe', 'age': 30}, {'name': 'Martha'}]}

In [97]:
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser

In [98]:
extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="people")

In [99]:
extraction_chain.invoke({"input": "Joe is 30, his mom is Martha"})

[{'name': 'Joe', 'age': 30}, {'name': 'Martha'}]

## Doing it for real

We can apply tagging to a larger body of text.

For example, let's load this blog post and extract tag information from a sub-set of the text.

In [100]:
from langchain.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
documents = loader.load()

In [101]:
doc = documents[0]

In [102]:
page_content = doc.page_content[:10000]

In [103]:
print(page_content[:1000])







LLM Powered Autonomous Agents | Lil'Log







































Lil'Log

















|






Posts




Archive




Search




Tags




FAQ









      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


 


Table of Contents



Agent System Overview

Component One: Planning

Task Decomposition

Self-Reflection


Component Two: Memory

Types of Memory

Maximum Inner Product Search (MIPS)


Component Three: Tool Use

Case Studies

Scientific Discovery Agent

Generative Agents Simulation

Proof-of-Concept Examples


Challenges

Citation

References





Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.


In [104]:
class Overview(BaseModel):
    """Overview of a section of text."""
    summary: str = Field(description="Provide a concise summary of the content.")
    language: str = Field(description="Provide the language that the content is written in.")
    keywords: str = Field(description="Provide keywords related to the content.")

In [105]:
overview_tagging_function = [
    convert_pydantic_to_openai_function(Overview)
]
tagging_model = model.bind(
    functions=overview_tagging_function,
    function_call={"name":"Overview"}
)
tagging_chain = prompt | tagging_model | JsonOutputFunctionsParser()

In [106]:
tagging_chain.invoke({"input": page_content})

{'summary': 'The article discusses building autonomous agents powered by LLM (large language model) as the core controller. It covers components such as planning, memory, and tool use, along with examples and challenges in implementing LLM-powered agents.',
 'language': 'English',
 'keywords': 'LLM, autonomous agents, planning, memory, tool use, proof-of-concepts, challenges'}

In [107]:
class Paper(BaseModel):
    """Information about papers mentioned."""
    title: str
    author: Optional[str]


class Info(BaseModel):
    """Information to extract"""
    papers: List[Paper]

In [108]:
paper_extraction_function = [
    convert_pydantic_to_openai_function(Info)
]
extraction_model = model.bind(
    functions=paper_extraction_function, 
    function_call={"name":"Info"}
)
extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="papers")

In [109]:
extraction_chain.invoke({"input": page_content})

[{'title': 'LLM Powered Autonomous Agents', 'author': 'Lilian Weng'}]

In [115]:
template = """An article will be passed to you. Extract from it all papers that are mentioned by this article followed by its author. 

Do not extract the name of the article itself. If no papers are mentioned that's fine - you don't need to extract any! Just return an empty list.

Do not make up or guess ANY extra information. Only extract what exactly is in the text."""

prompt = ChatPromptTemplate.from_messages([
    ("system", template),
    ("human", "{input}")
])

In [116]:
extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="papers")

In [112]:
extraction_chain.invoke({"input": page_content})

[{'title': 'Chain of thought (CoT; Wei et al. 2022)'},
 {'title': 'Tree of Thoughts (Yao et al. 2023)'},
 {'title': 'LLM+P (Liu et al. 2023)'},
 {'title': 'ReAct (Yao et al. 2023)'},
 {'title': 'Reflexion (Shinn & Labash 2023)'},
 {'title': 'Chain of Hindsight (CoH; Liu et al. 2023)'},
 {'title': 'Algorithm Distillation (AD; Laskin et al. 2023)'}]

In [118]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=0)

In [119]:
splits = text_splitter.split_text(doc.page_content)

In [120]:
len(splits)

15

In [132]:
def flatten(matrix):
    flat_list = []
    for row in matrix:
        flat_list.extend(row)
    return flat_list

In [133]:
flatten([[1, 2], [3, 4]])

[1, 2, 3, 4]

In [134]:
print(splits[0])

LLM Powered Autonomous Agents | Lil'Log







































Lil'Log

















|






Posts




Archive




Search




Tags




FAQ









      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


 


Table of Contents



Agent System Overview

Component One: Planning

Task Decomposition

Self-Reflection


Component Two: Memory

Types of Memory

Maximum Inner Product Search (MIPS)


Component Three: Tool Use

Case Studies

Scientific Discovery Agent

Generative Agents Simulation

Proof-of-Concept Examples


Challenges

Citation

References





Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent 

In [135]:
from langchain.schema.runnable import RunnableLambda

In [136]:
prep = RunnableLambda(
    lambda x: [{"input": doc} for doc in text_splitter.split_text(x)]
)

In [137]:
prep.invoke("hi")

[{'input': 'hi'}]

In [138]:
chain = prep | extraction_chain.map() | flatten

In [140]:
chain.invoke(page_content)

[{'title': 'Prompt Engineering'},
 {'title': 'Chain of thought', 'author': 'Wei et al. 2022'},
 {'title': 'Tree of Thoughts', 'author': 'Yao et al. 2023'},
 {'title': 'LLM+P', 'author': 'Liu et al. 2023'},
 {'title': 'ReAct', 'author': 'Yao et al. 2023'},
 {'title': 'Reflexion', 'author': 'Shinn & Labash 2023'},
 {'title': 'Chain of Hindsight (CoH; Liu et al. 2023)'},
 {'title': 'Algorithm Distillation (AD; Laskin et al. 2023)'},
 {'title': 'Duan et al. 2017'}]

Testing an example article

In [142]:
loader1 = WebBaseLoader("https://www.teachermagazine.com/in_en/articles/using-worked-examples-in-the-literature-classroom")
documents1 = loader1.load()

In [144]:
doc1 = documents1[0]

In [146]:
page_content1 = doc1.page_content[:10000]

In [147]:
print(page_content1[:1000])








































Using worked examples in the literature classroom - Teacher Magazine


























skip to main content




Toggle navigation













 Australia - English
 Southeast Asia - English
 Asia Tenggara - Bahasa
 India - English










Awards


Articles 



                                    All Articles
                                    



                                    Short Articles
                                    



                                    Long Reads
                                    



                                    Reader Submission
                                    



                                    Early Years
                                    



                                    Leadership
                                    




Infographics


Podcasts


Video









×
Search Teacher Magazine 





Keyword(s) 





Search



















Using worked examples in the literatu

In [149]:
tagging_chain.invoke({"input": page_content1})

{'summary': 'The article discusses the use of worked examples in the literature classroom to support learners in developing critical analysis skills. It emphasizes the importance of providing cognitive support to students and guiding them through step-by-step learning experiences.',
 'language': 'English',
 'keywords': 'literature classroom, worked examples, cognitive support, critical analysis skills, teaching practices'}

In [150]:
chain.invoke(page_content1)

[{'title': 'Using worked examples in the literature classroom',
  'author': 'Ima Kazmi'},
 {'title': 'Principles of Instruction', 'author': 'Barak Rosenshine'},
 {'title': 'Commentary on Principles of Instruction',
  'author': 'Tom Sherrington'},
 {'title': 'Using Worked Examples in Teaching Literary Analysis',
  'author': 'Ima Kazmi'},
 {'title': 'Learning from Worked Examples: How to Design Examples for Learning and Assessment',
  'author': 'Renkl et al.'},
 {'title': 'The Role of Worked Examples in Cognitive Skill Acquisition',
  'author': 'Cerbin'},
 {'title': 'Cognitive Load Theory and Instructional Design: Recent Developments',
  'author': 'Chen et al.'},
 {'title': 'Worked examples', 'author': 'Cerbin, W. '},
 {'title': 'The effect of worked examples on learning solution steps and knowledge transfer',
  'author': 'Chen, O., Retnowati, E., Chan, B. K. Y., & Kalyuga, S. '},
 {'title': 'Why and how educators use exemplars',
  'author': 'Hawe, E., Dixon, H., and Hamilton, R. '},
 {'