In [None]:
from dotenv import dotenv_values

env_config = dotenv_values(".env")
API_KEY = env_config["API_KEY"]
BASE_URL = env_config["BASE_URL"]

In [None]:
from langchain.prompts.prompt import PromptTemplate

_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [None]:
from langchain.llms import OpenAI

## langchain 六个模块
- Model 
    - LLM model
    - chat model
    - emb model
- prompt
    - input
    - output parser prompt
- chain
    - 自己 LECL 定义
        -  prompt | chat | output_part
    -  import LLMChain
    - xxxxx
- Retrieval
    - database
    - retriver
- memory
- agent

帮我们写了一些胶水代码

## langchain model

In [None]:

# 普通 LLM model 
from langchain.llms import OpenAI, HuggingFaceHub, HuggingFaceTextGenInference

llm = OpenAI(api_key=API_KEY, base_url=BASE_URL, model_name="gpt-3.5-turbo-instruct")

llm("大模型是什么？")


# # 如果是其他的模型，这里可以自己去设置一些 hugging
# llm = HuggingFaceHub(repo_id ="NYTK/text-generation-news-gpt2-small-hungarian",
#                       huggingfacehub_api_token="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", 
#                       model_kwargs={"temperature": 0.7}) 
# llm("大模型是什么?")


# # # 使用 本地模型
# llm = HuggingFaceTextGenInference(
#                 inference_server_url="http://localhost:8010/",
#                 max_new_tokens=512,
#                 top_k=10,
#                 top_p=0.95,
#                 typical_p=0.95,
#                 temperature=0.01,
#                 repetition_penalty=1.03,
#             )
# print(llm("What is Deep Learning?"))

In [None]:
# 使用 chat 模型
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings

chat = ChatOpenAI(api_key=API_KEY, 
                  base_url=BASE_URL, 
                  model_name="gpt-3.5-turbo")

ans = chat.invoke("帮我写一个计算机相关的笑话，不要超过 100 个字")
print(ans.content)

# # 使用 embedding 模型
emb_model = OpenAIEmbeddings(api_key=env_config.get("API_KEY"), 
                             base_url=env_config.get("BASE_URL"))

# emb_content = emb_model.embed_query("hello ")
# print(emb_content)

In [None]:
chat.batch(["帮我写一个和猫相关的笑话，不超过100字。"])

## langchain prompt

In [None]:
from langchain.prompts import PromptTemplate

template = "请帮我写一个关于{topic}的笑话"

# 用法 1
prompt = PromptTemplate(
    input_variables=["topic"],
    template=template,)

# 用法2
prompt = PromptTemplate.from_template(
    "请帮我写一个关于{topic}的笑话。输出遵循以下格式: {format}", 
    partial_variables={"format": "仅仅输出一个笑话，不要输出任何其他内容"}
    )

pp = prompt.format(topic="猫")
print(pp)


In [None]:
from langchain.prompts import PromptTemplate

prompt_test = PromptTemplate.from_template("写一个{topic}相关的笑话")
prompt_test.format(topic="dog")


prompt_test_verbose = """

帮我写一个很长很长的笑话帮我写一个很长很长的笑话
"""


# 如果 copilot; 提升 1 倍不夸张
# pylance 

In [None]:
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
examples =[
    {"word":"高", "antonym":"矮"},
    {"word":"胖","antonym":"瘦"},
    ]
example_template ="""
词语: {word}
反义词: {antonym}\n
"""
example_prompt = PromptTemplate(
    input_variables=["word","antonym"],
    template=example_template,)
few_shot_prompt = FewShotPromptTemplate(
    examples=examples[:1],
    example_prompt=example_prompt,
    prefix="给出输入词语的反义词",
    suffix="词语: {input}\n反义词:",
    input_variables=["input"],
    example_separator="\n",)

print(few_shot_prompt.format(input="美丽"))



In [None]:
example_prompt.input_variables

In [None]:

few_test = """
给出输入词语的反义词

## example
词语: {word} 
反义词: {antonym}\n

## input
词语: {word1} 
反义词: {antonym2}\n

# 假设有 10 个变量，
"""
few_test.format(word1="好", antonym2="坏", word="高", antonym="矮")




## langchain chain

In [None]:
# prompt, model, output parser


from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel

# output 
class Output(BaseModel):
    query: str
    punchline: str
"""
我们希望输出
{
"query": "帮我写一个和猫相关的笑话。"
"punchline": "punchline".
}
"""

output_parser = PydanticOutputParser(pydantic_object=Output)

prompt = PromptTemplate.from_template(
    template="help me write a joke about {topic}。 \n{format}", 
    partial_variables={"format": output_parser.get_format_instructions()})
chat = ChatOpenAI(base_url=BASE_URL, api_key=API_KEY, model_name="gpt-3.5-turbo", temperature=0)

chain = prompt | chat | output_parser

ans = chain.invoke({"topic": "cat"})


In [None]:
ans

In [None]:
llm = OpenAI(api_key=API_KEY, base_url=BASE_URL, model_name="gpt-3.5-turbo-instruct")

tmp = llm(prompt.format(topic="cat"))

In [None]:
print(prompt.format(topic="cat"))

In [None]:
# tmp = tmp + "你好啊"

In [None]:
# import json
# json.loads(tmp)

In [None]:
from langchain.chains import LLMChain
chain1 = LLMChain(llm=llm, 
                 prompt=PromptTemplate.from_template(
                     "帮我的{topic}起一个名字。输出一个名字就可以，不要加其他内容。"))
# chain1.run("猫")
second_prompt = PromptTemplate.from_template("帮我写一个关于{topic}的诗歌")
chain2 = LLMChain(llm=llm, 
                 prompt=second_prompt)
# chain2.run("猫")
from langchain.chains import SimpleSequentialChain
"""Simple chain where the outputs of one step feed directly into next."""
chain = SimpleSequentialChain(chains=[chain1, chain2], verbose=True)
chain.run("猫")

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI

prompt = PromptTemplate.from_template("帮我写一个关于{topic}的笑话")
chat = ChatOpenAI(api_key=API_KEY, 
                  base_url=BASE_URL, 
                  model_name="gpt-3.5-turbo")

chain = prompt | chat

ans = chain.invoke({"topic": "apple"})
print(ans)

In [None]:
# github copilot

## langchain indexs(retieval)

In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

loader = TextLoader(file_path="./data/text.txt")
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
texts = text_splitter.split_documents(documents)


# database = Chroma.from_documents(texts, emb_model)

query = "RAG的本质是什么？"
# retrieval_docs = database.similarity_search(query, k=1)

# print(retrieval_docs)
# 如何保存在 disk 中
# # save to disk
db2 = Chroma.from_documents(texts, emb_model, persist_directory="./chroma_db")
docs = db2.similarity_search(query)
print(docs)
# # load from disk
# db3 = Chroma(persist_directory="./chroma_db", embedding_function=emb_model)
# docs = db3.similarity_search(query)
# print(docs[0].page_content)

In [None]:
from langchain.vectorstores import FAISS

db = FAISS.from_documents(texts, emb_model)

query = "RAG的本质是什么？"
retrieval_docs = db.similarity_search(query, k=1)

print(retrieval_docs)

# faiss save disk
# db.save_local("faiss_index")
# new_db = FAISS.load_local("faiss_index", emb_model)

In [None]:
retriever = db.as_retriever(search_type="mmr", 
                            search_kwargs={"k": 1, 
                                           "score_threshold": 0.5})
retriever.get_relevant_documents("RAG的本质是什么", k=1)

search type 有很多
- 相似度
- 多样性
- MaxMarginalRelevance

MMR 根据示例与输入的相似度以及多样性进行选择。它通过找到与输入具有最高余弦相似度的示例，并在此基础上进行迭代添加示例，同时对其与已选择示例的相似度进行惩罚。

In [None]:
from langchain.chains import RetrievalQA

retriever = db.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    return_source_documents=True)

query = "RAG的本质是什么?"
result = qa({
   "query": query})
print(result['result'])

In [None]:
print(llm("RAG的本质是什么?"))

## LangChain memory

In [None]:
from langchain.chains import ConversationChain

conversation = ConversationChain(llm=llm, verbose=True)

conversation.predict(input="所有的北极熊都是白色的")
conversation.predict(input="bob是一只北极熊")
conversation.predict(input="bob是什么颜色的?")

In [None]:
from langchain.memory import ChatMessageHistory

history = ChatMessageHistory()

history.add_user_message("hi!")

history.add_ai_message("whats up?")

In [None]:
history

In [None]:
from langchain.memory import ConversationBufferWindowMemory, ConversationBufferMemory, ConversationSummaryMemory

from langchain.chains import ConversationChain
from langchain.llms import OpenAI

conversation = ConversationChain(llm=llm, memory=ConversationBufferWindowMemory(k=5))
conversation.run("What is the capital of Canada?")
# Output: "The capital of Canada is Ottawa."
conversation.run("What is the capital of France?")
# Output: "The capital of France is Paris."
conversation.memory.buffer
# Output

In [None]:
print(conversation.memory.buffer)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain.schema import SystemMessage
from langchain.chains import LLMChain

prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content="You are a chatbot having a conversation with a human."
        ),  # The persistent system prompt
        MessagesPlaceholder(
            variable_name="chat_history"
        ),  # Where the memory will be stored.
        HumanMessagePromptTemplate.from_template(
            "{human_input}"
        ),  # Where the human input will injected
    ]
)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

chat_llm_chain = LLMChain(
    llm=chat,
    prompt=prompt,
    verbose=True,
    memory=memory,
)
chat_llm_chain.predict(human_input="Hi there my friend")


## langchain agents

In [None]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType

tools = load_tools(["wikipedia","llm-math"], llm=llm)
agent = initialize_agent(tools, 
                         llm, 
                         agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
                         verbose=True)

agent.run("马斯克（elon musk）的生日是哪天? 到2023年他多少岁了?")

In [None]:
## 自定义 tool

from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.tools.render import format_tool_to_openai_function
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.agents import AgentExecutor

from langchain.agents import tool

@tool
def get_word_length(word: str) -> int:
    """Returns the length of a word."""
    return len(word)

tools = [get_word_length]

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are very powerful assistant, but don't know current events",
        ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

llm_with_tools = chat.bind(functions=[format_tool_to_openai_function(t) for t in tools])

agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_function_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIFunctionsAgentOutputParser()
)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
agent_executor.invoke({"input": "How many letters in the word educa"})

In [None]:
format_tool_to_openai_function(get_word_length)

In [None]:
from langchain.agents import get_all_tool_names
# get_all_tool_names()

In [None]:
from langchain.schema import HumanMessage
from langchain.agents import tool

@tool
def get_word_length(word: str) -> int:
    """Returns the length of a word."""
    return len(word)

tools = [get_word_length]

chat.predict_messages([HumanMessage(content="How many letters in the word educa")], 
                      functions=[format_tool_to_openai_function(get_word_length)])

In [None]:
# # 推荐阅读 openai 怎么调用 function
# import openai
# import json
# from enum import Enum

# class BaseTool(Enum):
#     Bookkeeping = "record_price"
#     RecordingTask = "record_task"

# # record_price是用来给Function Calling调用的函数，
# # 这个函数接收两个必填的参数，category类目（string类型），count 数量（int类型）
# def record_price(category, count):
#     print(category, count)
#     print("调用获取实时物价的 API")
#     return count*12

# def funtion_call_conversation(message):
#     messages = [
#       {"role": "user", "content": "今天买了一斤肉，花了多少钱"}
#     ]
#     response = openai.ChatCompletion.create(
#     model="gpt-3.5-turbo-0613",
#     messages = message,
#     temperature=0,
#     functions=[
#       {
#         "name": BaseTool.Bookkeeping.value,
#         "description": "返回物品的数量",
#         "parameters": {
#           "type": "object",
#           "properties": {
#             "category": {"type": "string","description": "类目",},
#             "count": {"type": "integer", "description": "数量"},
#           },
#           "required": ["category","count"],
#         },
#       }
#     ],
#     function_call="auto",
#     )
#     message = response["choices"][0]["message"]
#     print(message)
#     if(message.get("function_call")):
#         function_name = message["function_call"]["name"]
#         if function_name == BaseTool.Bookkeeping.value:
#             arguments = json.loads(message["function_call"]["arguments"])
#             price = record_price(arguments.get('category'), arguments.get('count'))
#             messages.append(message)
#             messages.append({"role": "function", "name": BaseTool.Bookkeeping.value, "content": price})
#             print(messages)
#             role_function_conversation(messages)

# def role_function_conversation(message):
#     response = openai.ChatCompletion.create(
#     model="gpt-3.5-turbo-0613",
#     messages = message,
#     temperature=0,
#     # name字段表示要调用的函数名，description表示函数描述（让 LLM 读得懂的函数说明）
#     # paramters是一个符合JSON Schema格式的对象，用来描述这个函数的入参信息
#     functions=[
#       {
#         "name": BaseTool.Bookkeeping.value,
#         "description": "返回物品的数量",
#         "parameters": {
#           "type": "object",
#           "properties": {
#             "category": {"type": "string","description": "类目",},
#             "count": {"type": "integer", "description": "数量"},
#           },
#           "required": ["category","count"],
#         },
#       }
#     ],
#     function_call="auto",
#     )
#     message = response["choices"][0]["message"].get("content")
#     print(message)
    

# if __name__ == "__main__":
#     funtion_call_conversation()