In [1]:
!pip install langchain==0.1.19
!pip install langchain-openai==0.1.6
!pip install langchain-community==0.0.38

Collecting langchain==0.1.19
  Downloading langchain-0.1.19-py3-none-any.whl.metadata (13 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain==0.1.19)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting langchain-community<0.1,>=0.0.38 (from langchain==0.1.19)
  Downloading langchain_community-0.0.38-py3-none-any.whl.metadata (8.7 kB)
Collecting langchain-core<0.2.0,>=0.1.52 (from langchain==0.1.19)
  Downloading langchain_core-0.1.52-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain-text-splitters<0.1,>=0.0.1 (from langchain==0.1.19)
  Downloading langchain_text_splitters-0.0.2-py3-none-any.whl.metadata (2.2 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain==0.1.19)
  Downloading langsmith-0.1.137-py3-none-any.whl.metadata (13 kB)
Collecting tenacity<9.0.0,>=8.1.0 (from langchain==0.1.19)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain==

In [2]:
from google.colab import userdata
OPENAI_API_KEY = userdata.get('OPEN_API_KEY')
HUGGINGFACEHUB_API_TOKEN = userdata.get('HF_TOKEN')

In [4]:
import os

os.environ['HUGGINGFACEHUB_API_TOKEN'] = HUGGINGFACEHUB_API_TOKEN
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [5]:
from langchain_openai import ChatOpenAI

chatgpt = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [6]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

# Define your desired data structure - like a python data class.
class QueryResponse(BaseModel):
    description: str = Field(description="A brief description of the topic asked by the user")
    pros: str = Field(description="3 bullet points showing the pros of the topic asked by the user")
    cons: str = Field(description="3 bullet points showing the cons of the topic asked by the user")
    conclusion: str = Field(description="One line conclusion of the topic asked by the user")

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=QueryResponse)
parser

PydanticOutputParser(pydantic_object=<class '__main__.QueryResponse'>)

In [7]:
# langchain pre-generated output response formatting instructions
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"description": {"title": "Description", "description": "A brief description of the topic asked by the user", "type": "string"}, "pros": {"title": "Pros", "description": "3 bullet points showing the pros of the topic asked by the user", "type": "string"}, "cons": {"title": "Cons", "description": "3 bullet points showing the cons of the topic asked by the user", "type": "string"}, "conclusion": {"title": "Conclusion", "description": "One line conclusion of the topic asked by the user", "type": "string"}}, "required": ["descriptio

In [8]:
# create the final prompt with formatting instructions from the parser
prompt_txt = """
             Answer the user query and generate the response based on the following formatting instructions

             Format Instructions:
             {format_instructions}

             Query:
             {query}
            """

prompt = PromptTemplate(
    template = prompt_txt,
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [9]:
prompt

PromptTemplate(input_variables=['query'], partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"description": {"title": "Description", "description": "A brief description of the topic asked by the user", "type": "string"}, "pros": {"title": "Pros", "description": "3 bullet points showing the pros of the topic asked by the user", "type": "string"}, "cons": {"title": "Cons", "description": "3 bullet points showing the cons of the topic asked by the user", "type": "string"}, "conclusion": {"title": "Conclusion", "description": "One l

In [10]:
# create a simple LCEL chain to take the prompt, pass it to the LLM, enforce response format using the parser
chain = (prompt
         |
         chatgpt
         |
         parser)

In [11]:
chain

PromptTemplate(input_variables=['query'], partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"description": {"title": "Description", "description": "A brief description of the topic asked by the user", "type": "string"}, "pros": {"title": "Pros", "description": "3 bullet points showing the pros of the topic asked by the user", "type": "string"}, "cons": {"title": "Cons", "description": "3 bullet points showing the cons of the topic asked by the user", "type": "string"}, "conclusion": {"title": "Conclusion", "description": "One l

In [12]:
question = "Tell me about Commercial Cricket"
response = chain.invoke({"query": question})

In [13]:
response

QueryResponse(description='Commercial Cricket refers to the business aspect of the sport of cricket, where various organizations, teams, players, and sponsors are involved in generating revenue through matches, tournaments, endorsements, and other related activities.', pros='1. Increased financial opportunities for players and teams. 2. Growth of the sport through better infrastructure and facilities. 3. Exposure to international markets and audiences.', cons='1. Risk of commercialization overshadowing the spirit of the game. 2. Pressure on players to perform for financial gains. 3. Disparities in financial resources leading to unequal competition.', conclusion='Commercial Cricket has brought both benefits and challenges to the sport, shaping its modern landscape.')

In [14]:
response.description

'Commercial Cricket refers to the business aspect of the sport of cricket, where various organizations, teams, players, and sponsors are involved in generating revenue through matches, tournaments, endorsements, and other related activities.'

In [15]:
response.dict()

{'description': 'Commercial Cricket refers to the business aspect of the sport of cricket, where various organizations, teams, players, and sponsors are involved in generating revenue through matches, tournaments, endorsements, and other related activities.',
 'pros': '1. Increased financial opportunities for players and teams. 2. Growth of the sport through better infrastructure and facilities. 3. Exposure to international markets and audiences.',
 'cons': '1. Risk of commercialization overshadowing the spirit of the game. 2. Pressure on players to perform for financial gains. 3. Disparities in financial resources leading to unequal competition.',
 'conclusion': 'Commercial Cricket has brought both benefits and challenges to the sport, shaping its modern landscape.'}

In [17]:
for k,v in response.dict().items():
    print(f"{k}:\n{v}\n")

description:
Commercial Cricket refers to the business aspect of the sport of cricket, where various organizations, teams, players, and sponsors are involved in generating revenue through matches, tournaments, endorsements, and other related activities.

pros:
1. Increased financial opportunities for players and teams. 2. Growth of the sport through better infrastructure and facilities. 3. Exposure to international markets and audiences.

cons:
1. Risk of commercialization overshadowing the spirit of the game. 2. Pressure on players to perform for financial gains. 3. Disparities in financial resources leading to unequal competition.

conclusion:
Commercial Cricket has brought both benefits and challenges to the sport, shaping its modern landscape.



In [19]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

# Define your desired data structure - like a python data class.
class QueryResponse(BaseModel):
    description: str = Field(description="A brief description of the topic asked by the user")
    pros: str = Field(description="3 bullet points showing the pros of the topic asked by the user")
    cons: str = Field(description="3 bullet points showing the cons of the topic asked by the user")
    conclusion: str = Field(description="One line conclusion of the topic asked by the user")

# Setup parser
parser = JsonOutputParser(pydantic_object=QueryResponse)
parser

JsonOutputParser(pydantic_object=<class '__main__.QueryResponse'>)

In [20]:
# create the final prompt with formatting instructions from the parser
prompt_txt = """
             Answer the user query and generate the response based on the following formatting instructions

             Format Instructions:
             {format_instructions}

             Query:
             {query}
            """
prompt = PromptTemplate(
    template=prompt_txt,
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

prompt

PromptTemplate(input_variables=['query'], partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"description": {"title": "Description", "description": "A brief description of the topic asked by the user", "type": "string"}, "pros": {"title": "Pros", "description": "3 bullet points showing the pros of the topic asked by the user", "type": "string"}, "cons": {"title": "Cons", "description": "3 bullet points showing the cons of the topic asked by the user", "type": "string"}, "conclusion": {"title": "Conclusion", "description": "One l

In [21]:
# create a simple LCEL chain to take the prompt, pass it to the LLM, enforce response format using the parser
chain = (prompt
              |
            chatgpt
              |
            parser)
chain

PromptTemplate(input_variables=['query'], partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"description": {"title": "Description", "description": "A brief description of the topic asked by the user", "type": "string"}, "pros": {"title": "Pros", "description": "3 bullet points showing the pros of the topic asked by the user", "type": "string"}, "cons": {"title": "Cons", "description": "3 bullet points showing the cons of the topic asked by the user", "type": "string"}, "conclusion": {"title": "Conclusion", "description": "One l

In [22]:
topic_queries = [
    "Tell me about commercial real estate",
    "Tell me about Generative AI"
]

topic_queries_formatted = [{"query": topic}
                    for topic in topic_queries]
topic_queries_formatted

[{'query': 'Tell me about commercial real estate'},
 {'query': 'Tell me about Generative AI'}]

In [23]:
responses = chain.map().invoke(topic_queries_formatted)

In [24]:
responses[0], type(responses[0])

({'description': 'Commercial real estate refers to properties used for business purposes, such as office buildings, retail spaces, and industrial facilities. It involves leasing, buying, or selling properties for commercial use.',
  'pros': '1. Potential for high returns on investment. 2. Diversification of investment portfolio. 3. Long-term leases provide stable income streams.',
  'cons': '1. Market fluctuations can impact property values. 2. High upfront costs for purchasing commercial properties. 3. Economic downturns can lead to higher vacancy rates.',
  'conclusion': 'Commercial real estate can be a lucrative investment option but requires careful research and management to mitigate risks.'},
 dict)

In [25]:
import pandas as pd

df = pd.DataFrame(responses)
df

Unnamed: 0,description,pros,cons,conclusion
0,Commercial real estate refers to properties us...,1. Potential for high returns on investment. 2...,1. Market fluctuations can impact property val...,Commercial real estate can be a lucrative inve...
1,Generative AI refers to a type of artificial i...,1. Can be used to generate creative content wi...,1. May produce biased or inappropriate content...,Generative AI has the potential to revolutioni...


In [26]:
for response in responses:
  for k,v in response.items():
    print(f"{k}:\n{v}\n")
  print('-----')

description:
Commercial real estate refers to properties used for business purposes, such as office buildings, retail spaces, and industrial facilities. It involves leasing, buying, or selling properties for commercial use.

pros:
1. Potential for high returns on investment. 2. Diversification of investment portfolio. 3. Long-term leases provide stable income streams.

cons:
1. Market fluctuations can impact property values. 2. High upfront costs for purchasing commercial properties. 3. Economic downturns can lead to higher vacancy rates.

conclusion:
Commercial real estate can be a lucrative investment option but requires careful research and management to mitigate risks.

-----
description:
Generative AI refers to a type of artificial intelligence that is capable of creating new content, such as images, text, or music, based on patterns and data it has been trained on.

pros:
1. Can be used to generate creative content without human intervention. 2. Can assist in tasks such as image 

### CommaSeparatedListOutputParser

This output parser can be used when you want to return a list of comma-separated items.

In [27]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser
from langchain_core.prompts import PromptTemplate

output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()
format_instructions

'Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`'

In [28]:
format_instructions = output_parser.get_format_instructions()

# And a query intented to prompt a language model to populate the data structure.
prompt_txt = """
             Create a list of 5 different ways in which Generative AI can be used

             Output format instructions:
             {format_instructions}
             """

prompt = PromptTemplate.from_template(template=prompt_txt)
prompt

PromptTemplate(input_variables=['format_instructions'], template='\n             Create a list of 5 different ways in which Generative AI can be used\n\n             Output format instructions:\n             {format_instructions}\n             ')

In [29]:
# create a simple LLM Chain - more on this later
llm_chain = (prompt
              |
            chatgpt
              |
            output_parser)

# run the chain
response = llm_chain.invoke({'format_instructions': format_instructions})
response

['Art generation',
 'Music composition',
 'Text generation',
 'Video game design',
 'Product design']

In [30]:
type(response)

list