In [None]:
#!pip install -U langchain_openai
#!pip install langchain
#!pip install -U langchain-community

In [1]:
import openai
from dotenv import load_dotenv
import os

load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("openai_api_key")

In [3]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

from langchain.output_parsers import ResponseSchema, StructuredOutputParser, CommaSeparatedListOutputParser
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser

model = ChatOpenAI(model="gpt-4o-mini")


Output parsers have an associated method 
.get_format_instructions() that returns a string containing instructions for how the output of a language model should be formattd.  These format instructions are also used an inputs in the prompt..


In [4]:
output_parser = CommaSeparatedListOutputParser()
csv_format_ins = output_parser.get_format_instructions()
csv_format_ins

'Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`'

Example 1:  Prompt that returns 5 examples of a given subject, formatted as CSV

In [5]:
prompt_template = """
List {number} examples of {subject}.  Format instructions:  {format_instructions}
"""

prompt_1 = ChatPromptTemplate.from_template(template = prompt_template)

In [6]:
prompt_1.messages[0].prompt.input_variables

['format_instructions', 'number', 'subject']

In [10]:
chain = prompt_1 | model | output_parser

subject_input = "Holidays"
number_input = 8

response = chain.invoke({"format_instructions": csv_format_ins, "number": number_input, "subject": subject_input})
print(response)

['Christmas', 'Thanksgiving', 'Halloween', 'Easter', "New Year's Day", 'Independence Day', "Valentine's Day", 'Labor Day']


In [9]:
response[1]

'Abraham Lincoln'

Example 2:  Model that users OpenAI fuction calling (by binding the model to a defined function) and JsonOutputFunctionsParser() to tag a document with keywords.

In [11]:
from langchain.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://finance.yahoo.com/news/us-economy-adds-303000-jobs-unemployment-falls-to-38-in-march-as-labor-market-continues-to-impress-123226886.html")
documents = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [12]:
doc = documents[0]
doc.page_content

'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nUS economy adds 303,000 jobs, unemployment falls to 3.8% in March as labor market continues to impress   \n\n\n\n              News  Today\'s news   US   Politics   World   Tech  Reviews and deals  Audio  Computing  Gaming  Health  Home  Phones  Science  TVs     Climate change   Health   Science   2024 election   Originals  The 360     Newsletters    Life  Health  COVID-19  Fall allergies  Health news  Mental health  Relax  Sexual health  Studies  The Unwind     Parenting  Family health  So mini ways     Style and beauty  It Figures  Unapologetically     Horoscopes   Shopping  Buying guides     Food   Travel   Autos   Gift ideas   Buying guides    Entertainment  Celebrity   TV   Movies   Music   How to Watch   Interviews   Videos     Finance  My Portfolio   News  Latest News  Stock Market  Originals  The Morning

Create the schema for a function call for "Keyword Tagging".  This function returns the arguments "summary" and "keywords", based on the document input by the user (in this case loaded from a website).

In [14]:
prompt_2 = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information, if not explicitly provided do not guess. Extract partial info"),
    ("human", "{input}")
])

In [15]:
function_tag_with_keywords = [{'name': 'Keyword_Tag',
                                'description': 'Extract keywords from document.',
                                'parameters': {  'properties':  
                                                 {
                                                    'summary': {'description': 'Provide a concise summary of the content.', 'type': 'string'},
                                                    'keywords': {'description': 'Provide keywords related to the content.', 'type': 'string'}
                                                 },
                                                 'required': ['summary', 'keywords'],
                                                 'type': 'object'
                                              }
                                }]

We can use the OpenAI function calling capacility from Langchin by binding the function to the model using model.bind()

We will also use the JsonOutputFunctionsParser() for the output.

In [16]:
model_tagging = model.bind(
    functions = function_tag_with_keywords,
    function_call = {"name":"Keyword_Tag"}
)

tagging_chain = prompt_2 | model_tagging | JsonOutputFunctionsParser()

In [18]:
response = tagging_chain.invoke({"input": doc.page_content})

In [23]:
response

{'summary': "In March 2024, the US economy added 303,000 jobs, surpassing expectations of 214,000. The unemployment rate fell to 3.8% from 3.9% in February. Wages increased by 4.1% year-over-year, marking the lowest annual gain since June 2021. The labor force participation rate rose to 62.7%. Major job gains were seen in healthcare (72,000 jobs) and government (71,000 jobs). This labor market strength may impact the Federal Reserve's decision on interest rate cuts.",
 'keywords': 'US economy, jobs added, unemployment rate, Bureau of Labor Statistics, labor market, wages, Federal Reserve, interest rates, healthcare jobs, government jobs, labor force participation'}

In [24]:
response['keywords']

'US economy, jobs added, unemployment rate, Bureau of Labor Statistics, labor market, wages, Federal Reserve, interest rates, healthcare jobs, government jobs, labor force participation'

Example 3: (adapted from Langchain course on deeplearning.ai)

Uses StructuredOutputParser to create structre from response schema.

In [33]:
customer_review = """\
This leaf blower is pretty amazing.  It has four settings: candle blower, gentle breeze, windy city, and tornado. 
It arrived in two days, just in time for my wife's anniversary present. 
I think my wife liked it so much she was speechless. So far I've been the only one using it, and I've been 
using it every other morning to clear the leaves on our lawn. 
It's slightly more expensive than the other leaf blowers out there,
but I think it's worth it for the extra features.
"""


In [34]:
gift_schema = ResponseSchema(name="gift",
                             description="Was the item purchased\
                             as a gift for someone else? \
                             Answer True if yes,\
                             False if not or unknown.")

delivery_days_schema = ResponseSchema(name="delivery_days",
                                      description="How many days\
                                      did it take for the product\
                                      to arrive? If this \
                                      information is not found,\
                                      output -1.")

price_value_schema = ResponseSchema(name="price_value",
                                    description="Extract any\
                                    sentences about the value or \
                                    price, and output them as a \
                                    comma separated Python list.")

response_schemas = [gift_schema, 
                    delivery_days_schema,
                    price_value_schema]

In [35]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()
format_instructions

'The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"gift": string  // Was the item purchased                             as a gift for someone else?                              Answer True if yes,                             False if not or unknown.\n\t"delivery_days": string  // How many days                                      did it take for the product                                      to arrive? If this                                       information is not found,                                      output -1.\n\t"price_value": string  // Extract any                                    sentences about the value or                                     price, and output them as a                                     comma separated Python list.\n}\n```'

In [36]:
review_template = """\
For the following text, extract the following information:

gift: Was the item purchased as a gift for someone else? \
Answer True if yes, False if not or unknown.

delivery_days: How many days did it take for the product\
to arrive? If this information is not found, output -1.

price_value: Extract any sentences about the value or price,\
and output them as a comma separated Python list.

text: {text}

{format_instructions}
"""

prompt_3 = ChatPromptTemplate.from_template(template=review_template)


In [37]:
prompt_3.messages[0].prompt.input_variables

['format_instructions', 'text']

In [38]:
chain = prompt_3 | model |output_parser

response = chain.invoke({"format_instructions": format_instructions, "text": customer_review})

response

{'gift': 'True',
 'delivery_days': '2',
 'price_value': "It's slightly more expensive than the other leaf blowers out there, but I think it's worth it for the extra features."}

In [28]:
type(response)

dict

In [29]:
response['delivery_days']

'2'