In [None]:
!pip install openai
!pip install langchain

In [None]:
from langchain.llms import OpenAI

llm = OpenAI(temperature=0.5) # openai_api_key=OPENAI_API_KEY
llm("我饿了")

PromptTemplate: 给通用型LLMs的输入

In [None]:
from langchain import PromptTemplate

prompt = PromptTemplate(input_variables=[], template="请给我讲个笑话")
prompt.format()

带参数的PromptTemplate

In [None]:
prompt = PromptTemplate(input_variables=["adjective"], template="请给我讲个{adjective}的笑话.")
prompt.format(adjective="有趣")

In [None]:
multiple_input_prompt = PromptTemplate(
    input_variables=["adjective", "content"], 
    template="请给我讲个关于{content}的{adjective}的笑话."
)
multiple_input_prompt.format(adjective="有趣", content="猪八戒")

In [None]:
print(llm(multiple_input_prompt.format(adjective="灰色幽默", content="猪八戒")))

few_shot_prompt_template: 给定examples，让llm理解examples的语义并按照同样的格式输出

In [None]:
print(llm("我很开心"))

In [None]:
examples = [
    {
        "query": "我很难过",
        "answer": "不，你不难过"
    }, {
        "query": "我很矮",
        "answer": "不，你不矮"
    }, {
        "query": "我很高",
        "answer": "不，你不高"
    }, {
        "query": "我很怕黑",
        "answer": "不，你不怕黑"
    }
]

In [None]:
example_template = """
Human: {query}
AI: {answer}
"""

example_prompt = PromptTemplate(
    input_variables=["query", "answer"],
    template=example_template
)

prefix = """以下是我跟AI的对话，在这有一些例子:"""

suffix = """
Human: {query}
AI: """

from langchain import FewShotPromptTemplate
few_shot_prompt_template = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["query"],
    example_separator="\n\n"
)

print(few_shot_prompt_template.format(query="我很开心"))

In [None]:
print(llm(few_shot_prompt_template.format(query="我很开心")))

example_selector: token有限，为避免最终的prompts超出token限制，对example进行部分选择

In [None]:
from langchain.prompts.example_selector import LengthBasedExampleSelector

example_selector = LengthBasedExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    max_length=10
)

In [None]:
few_shot_prompt_template = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["query"],
    example_separator="\n"
)
print(few_shot_prompt_template.format(query="我很开心"))

ChatOpenAI: 用于聊天的LLM，对比通用型LLMs的区别:
1. 请求时可以传递多条消息: list[message]
2. 每条消息可以有: role, content, name
   
Role包含: Ai, User, System

对应4种Message: ChatMessage，SystemMessage，AIMessage, HumanMessage

对应4种PromptTemplate: ChatPromptTemplate，SystemMessagePromptTemplate，AIMessagePromptTemplate，HumanMessagePromptTemplate

In [None]:
from langchain.chat_models import ChatOpenAI
chat = ChatOpenAI(temperature=0)

In [None]:
from langchain.schema import AIMessage, HumanMessage, SystemMessage, ChatMessage

# 返回json格式字符串,按照每项参数拆分,不要说明和解释信息
message = [
    SystemMessage(content="返回json格式,按照每项参数拆分,不要说明和解释信息"),
    HumanMessage(content="告诉我特斯拉model Y汽车的尺寸参数"),
]
print(chat(message))

In [None]:
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate
)

system_template = "返回{format},按照每项参数拆分,不要说明和解释信息"
system_template_prompt = SystemMessagePromptTemplate.from_template(system_template)

human_template = "告诉我特斯拉{car}汽车的尺寸参数"
human_template_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([system_template_prompt, human_template_prompt])
message = chat_prompt.format_prompt(format="json字符串格式", car="model Y")
message2 = chat_prompt.format_prompt(format="json格式字符串", car="model Y")
print(message)
print(message2)

In [None]:
print(chat(message.to_messages()))
print(chat(message2.to_messages()))

输出选择器(Output Parsers): 语言模型的输出为文本，很多时候需要获取比文本更加结构化的信息，这就是输出选择器的作用
比如上面的: “返回json格式字符串,按照每项参数拆分,不要说明和解释信息” 这个提示会让LLM返回json数据

CommaSeparatedListOutputParser: 逗号分割，列表输出解析器

In [52]:
from langchain.output_parsers import CommaSeparatedListOutputParser

output_parser = CommaSeparatedListOutputParser()
format_instructions = output_parser.get_format_instructions() # 提示信息，提示LLM应该返回什么格式的数据
prompt = PromptTemplate(
    template="列举3种主要的： {subject}.\n{format_instructions}",
    input_variables=["subject"],
    partial_variables={"format_instructions": format_instructions}
)
   

In [53]:
print(format_instructions)
print("\n")
print(prompt.template)

Your response should be a list of comma separated values, eg: `foo, bar, baz`


列举3种主要的： {subject}.
{format_instructions}


In [None]:
llm = OpenAI(temperature=0)
input_prompt = prompt.format(subject="花")
print(input_prompt)

In [None]:
output = llm(input_prompt)
print(output)

In [None]:
output_parser.parse(output)

StructuredOutputParser: json格式输出解析器

In [61]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

response_schemas = [
    ResponseSchema(name="answer", description="answer to the human's question"),
    ResponseSchema(name="source", description="source used to answer the human's question, should be a website.")
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema:

```json
{
	"answer": string  // answer to the human's question
	"source": string  // source used to answer the human's question, should be a website.
}
```


In [63]:
prompt = PromptTemplate(
    template="answer the users question as best as possible.\n{format_instructions}\n{question}",
    input_variables=["question"],
    partial_variables={"format_instructions": format_instructions}
)
print(prompt.template)

answer the users question as best as possible.
{format_instructions}
{question}


In [65]:
input_prompt = prompt.format_prompt(question="牛奶的成分是什么？")
output = llm(input_prompt.to_string())

print(output)



```json
{
	"answer": "牛奶的主要成分是水、脂肪、蛋白质、糖类和矿物质。",
	"source": "https://baike.baidu.com/item/%E7%89%9B%E5%A5%B6/814"
}
```


In [71]:
output_parser.parse(output)
print(type(json_data), ": ", json_data)

<class 'dict'> :  {'answer': '牛奶的主要成分是水、脂肪、蛋白质、糖类和矿物质。', 'source': 'https://baike.baidu.com/item/%E7%89%9B%E5%A5%B6/814'}
