# 1.单链结构

## 1.1 最简单的消息链

In [4]:
import os
from langchain_core.output_parsers import StrOutputParser, CommaSeparatedListOutputParser
from langchain_community.llms import Tongyi
from langchain_core.prompts import ChatPromptTemplate

from dotenv import load_dotenv
load_dotenv()

import os
os.environ['http_proxy'] = os.getenv("http_proxy")
os.environ['https_proxy'] = os.getenv("https_proxy")

os.environ["LANGCHAIN_TRACING_V2"] = os.getenv("LANGCHAIN_TRACING_V2")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

os.environ["DASHSCOPE_API_KEY"] = os.getenv("DASHSCOPE_API_KEY")
# 1. 初始化模型
model = Tongyi(model="qwen-turbo", temperature=0.7)

# 2. prompt
prompt_template = ChatPromptTemplate.from_messages([
    ('system', '请以{language}来回答问题'),
    ('user', "{text}")
])

# 3. analysis
parser = StrOutputParser()

# 4. chain
chain = prompt_template | model | parser

# 5. run
print(chain.invoke({'language': 'english', 'text': '常用的颜色包括哪些？'}))

Common colors include red, blue, green, yellow, purple, orange, black, white, and gray. Additionally, there are a lot of variations and shades within these colors such as pink (a shade of red), teal (a mixture of blue and green), and so on.


## 1.2 RAG检索链

In [5]:
# 1. Load data
import os
from langchain.document_loaders import TextLoader, DirectoryLoader

home_path = os.getcwd()
data_path = os.path.join(home_path, 'data')
text_loader_kwargs ={'autodetect_encoding': True}
loader = DirectoryLoader(data_path, glob="**/*.txt", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs, show_progress=True)
docs = loader.load()

# 2. Split text
from langchain_community.embeddings import DashScopeEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(docs)

print(len(documents))
print("="*200)
for doc in documents:
    print(len(doc.page_content))

# 3. Embedding
db = Chroma.from_documents(documents, DashScopeEmbeddings())

  0%|          | 0/8 [00:00<?, ?it/s]

100%|██████████| 8/8 [00:00<00:00, 64.27it/s]


1134
440
998
543
14
996
239
876
489
993
492
999
323
801
982
612
995
433
842
995
558
987
757
873
991
709
760
951
424
918
992
994
619
495
894
977
961
890
727
979
466
831
632
896
611
970
71
932
995
383
646
557
916
793
997
790
774
670
696
804
974
506
972
997
654
723
951
999
904
686
728
742
850
862
887
967
790
776
758
837
518
573
691
770
940
965
941
920
994
510
837
749
875
683
814
999
915
353
773
673
917
566
873
636
839
406
890
760
942
553
997
287
823
953
973
272
888
974
963
936
964
989
807
936
993
972
846
612
581
996
283
966
968
989
986
927
895
992
966
968
520
968
999
983
644
933
447
939
704
878
888
983
987
979
881
935
973
874
936
949
876
878
850
893
932
985
908
861
957
864
984
857
994
975
845
831
442
996
354
94
995
555
96
995
451
971
558
990
483
579
991
757
90
995
707
999
509
721
928
992
494
805
951
717
730
421
715
938
986
911
976
938
593
995
407
884
970
888
442
969
994
351
915
906
978
669
505
985
663
623
783
722
828
961
913
999
993
799
788
895
725
194
998
896
979
976
981
847
993
591
995


In [3]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.llms import Tongyi

# 4. Retriever
retriever = db.as_retriever()

# 5. Prompt
template = """Answer the question based only on the following context:
{context}

Question:
{question}
"""

prompt = ChatPromptTemplate.from_template(template)

# 6. LLM
model = Tongyi(model="qwen-turbo", temperature=0.7)

def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

# 7. Chain
chain = (
    {"context":retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

# 8. run

chain.invoke("What is prompt?")

"A prompt in the context provided is a specific instruction or context given to a Pre-trained Language Model (PLM) to guide its generation of text or to perform a specific task, such as classification or query rewriting. It can consist of a task description, demonstrations, and can be constructed in various ways including zero-shot, few-shot, or chain-of-thought prompting styles to influence the model's output according to the input provided."

# 2.多链结构

## 2.1 把RAG改造为多链结构

In [54]:
# from langchain.vectorstores.base import BaseRetriever
from langchain.prompts import ChatPromptTemplate
# from langchain.output_parsers import StrOutputParser
from langchain.schema import StrOutputParser
from langchain_core.runnables import RunnableMap
from operator import itemgetter

# 定义 Retriever
retriever = db.as_retriever()

# 模板定义
template = """Answer the question based only on the following context:
{context}

Question:
{question}
"""
prompt = ChatPromptTemplate.from_template(template)

# 模型定义
from langchain_community.llms import Tongyi  # 替换为实际 Tongyi 模型的导入路径
model = Tongyi(model="qwen-turbo", temperature=0.7)

# 示例问题
question = "What is prompt?"

# 工具函数
from langchain.load import dumps, loads

def format_docs(docs):
    """将文档内容格式化为字符串"""
    return "\n\n".join([d.page_content for d in docs])

def get_unique_union(documents: list[list]):
    """对嵌套文档列表进行去重并合并"""
    # 展平列表并将每个文档序列化为字符串
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # 获取唯一文档
    unique_docs = list(set(flattened_docs))
    # 反序列化为文档对象
    return [loads(doc) for doc in unique_docs]

# 构建 First Chain
first_chain = (
    retriever
    # | format_docs  # 格式化检索结果
    | StrOutputParser()  # 将输出解析为字符串
)

# first_chain_output = first_chain.invoke(question)
# print("First Chain Output:", first_chain_output)
# print("="*200)
# print(type(first_chain_output))

# 调整模型输入格式
class AdjustModelInput:
    """将输入调整为 Tongyi 模型所需的格式"""
    def __call__(self, inputs):
        # 打印输入调试信息
        print("Input to AdjustModelInput:", inputs)
        
        # 确保模型输入是一个包含数组的结构
        adjusted_input = {"texts": [inputs]}
        
        # 打印调整后的输入格式
        print("Adjusted Model Input:", adjusted_input)
        return adjusted_input

# context = "prompt is ……"

second_chain = (
    {
        "context": first_chain,
        "question": lambda x: question,
    }
    | prompt
    # | AdjustModelInput()  # 调整输入格式
    # | model
    # | StrOutputParser()
)


# 执行 Second Chain
# result = second_chain.invoke({"question": question, "context": context})
result = second_chain.invoke({"question": question})
print(result)


ValueError: status_code: 400 
 code: InvalidParameter 
 message: input.texts should be array

In [48]:
from langchain.prompts import ChatPromptTemplate

# 定义 Prompt 模板
template = """Answer the question based only on the following context:
{context}

Question:
{question}
"""
prompt = ChatPromptTemplate.from_template(template)

# 定义 Context 和 Question
context = "Prompt is a specific instruction provided to guide the model's behavior."
question = "What is a prompt?"

# 构建链条
second_chain = (
    {
        "context": lambda x:first_chain
        "context": lambda x:context,  # 提供上下文
        "question": lambda x:question,  # 提供问题
    }
    | prompt  # 模板生成
)

# 执行链条
try:
    result = second_chain.invoke({"context": context, "question": question})
    print("Result:", result)
except Exception as e:
    print("Error:", e)


Result: messages=[HumanMessage(content="Answer the question based only on the following context:\nPrompt is a specific instruction provided to guide the model's behavior.\n\nQuestion:\nWhat is a prompt?\n", additional_kwargs={}, response_metadata={})]


In [19]:
retriever = db.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question:
{question}
"""


model = Tongyi(model="qwen-turbo", temperature=0.7)

prompt = ChatPromptTemplate.from_template(template)

question = "What is prompt?"

from langchain.load import dumps, loads



def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]


In [20]:
from operator import itemgetter
from langchain_core.runnables import RunnableMap
first_chain = (
    retriever 
    | format_docs
    | StrOutputParser()
    | get_unique_union
    | format_docs
    )

# first_chain.invoke("What did the president say about Ketanji Brown Jackson")

second_chain = (
    {'context': first_chain, 'question': itemgetter("question")}
    | prompt
    | model
    | StrOutputParser()
)

second_chain.invoke({"question": question})
# second_chain = (
#     RunnableMap({
#         "context": first_chain,
#         "question": lambda x: x  # 这里我们简单地返回固定的 question
#     })
#     | prompt
#     | model
#     | StrOutputParser()
# )
second_chain.invoke({"question": question})
# itemgetter("question")

# {'context': first_chain, 'question': itemgetter("question")}
# second_chain.invoke(question)

KeyError: "Input to ChatPromptTemplate is missing variables {'language', 'text'}.  Expected: ['language', 'text'] Received: ['question']\nNote: if you intended {language} to be part of the string and not a variable, please escape it with double curly braces like: '{{language}}'.\nFor troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_PROMPT_INPUT"

## 2.2 更加高级的链结构
- 来自于【🔥 从零开始学习 RAG｜2️⃣ 优化原始提问】 https://www.bilibili.com/video/BV1QE421L7qe/?share_source=copy_web&vd_source=f65342ebbf0b68f07ba0fc6772b947f7

In [None]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
# from langchain_openai import ChatOpenAI
from langchain_community.chat_models import ChatTongyi

generate_queries = (
    prompt_perspectives 
    | ChatTongyi(model='qwen-turbo')
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [None]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})

In [None]:
from operator import itemgetter
# from langchain_openai import ChatOpenAI
from langchain_community.chat_models import ChatTongyi
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

# llm = ChatOpenAI(temperature=0)
llm = ChatTongyi(temperature=0)

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})