In [1]:
from dotenv import load_dotenv
load_dotenv()

import os
os.environ['http_proxy'] = os.getenv("http_proxy")
os.environ['https_proxy'] = os.getenv("https_proxy")

os.environ["LANGCHAIN_TRACING_V2"] = os.getenv("LANGCHAIN_TRACING_V2")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

os.environ["DASHSCOPE_API_KEY"] = os.getenv("DASHSCOPE_API_KEY")

# 1.合并操作符

In [3]:
from langchain_community.chat_models import ChatTongyi
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


model = ChatTongyi()
prompt = ChatPromptTemplate.from_template("Tell me a joke about {topic}")
output_parser = StrOutputParser()
chain = prompt | model | output_parser

chain.invoke({"topic": "chickens"})

"Sure! Here's a joke about chickens:\n\nWhy don't chickens tell secrets in the rain?\n\nBecause the rain wets their feathers and their stories get all clucked up! \n\nI hope you found that amusing! Do you want to hear another one?"

# 2. RunnableLambda

- 使用RunnableLambda包裹后的函数，能够放置在chain当中,只需要在第一个函数添加即可，后边会自动添加。

In [4]:
from langchain_core.runnables import RunnableLambda

def add_five(x):
    return x+5

def multiply_by_two(x):
    return x*2

chain = RunnableLambda(add_five) | multiply_by_two

chain.invoke(5)

20

# 3. itemgetter

In [5]:
from operator import itemgetter

itemgetter(1)("ABCDE")

itemgetter(1, 3, 5)('ABCDEFG')

soldier = dict(rank='captain', name='dotterbart')
itemgetter('rank') (soldier)

'captain'

In [6]:
def length_function(text):
    return len(text)

def _multiplt_length_function(text1, text2):
    return length_function(text1) * length_function(text2)

def multiple_length_function(_dict):
    return _multiplt_length_function(_dict["text1"], _dict["text2"])

prompt = ChatPromptTemplate.from_template("what is {a} + {b}")
model = ChatTongyi()

chain = (
    {
        "a": itemgetter("foo") | RunnableLambda(length_function),
        "b": {"text1": itemgetter("foo"), "text2": itemgetter("bar")} 
        | RunnableLambda(multiple_length_function),
    }
    | prompt
    | model
)

chain.invoke({"foo": "bar", "bar": "gah"})

AIMessage(content='3 + 9 equals 12.', additional_kwargs={}, response_metadata={'model_name': 'qwen-turbo', 'finish_reason': 'stop', 'request_id': '0a174a9e-0231-943c-8669-c4b76be78758', 'token_usage': {'input_tokens': 15, 'output_tokens': 9, 'total_tokens': 24}}, id='run-72279218-2c9b-4f25-9e6b-27bc569791f9-0')

# 4. RunnablePassthrough
- 功能相当于占位符，能够将参数直接转递到后边的流程中。

## 4.1 一个变量的情况

In [7]:
prompt = ChatPromptTemplate.from_template("tell me a joke about {topic}")
from langchain_core.runnables import RunnablePassthrough
model = ChatTongyi()

runnable = (
    {"topic": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

runnable.invoke("bears")

'Sure! Here\'s a bear joke for you:\n\nWhy don\'t bears like coffee?\n\nBecause they don\'t like caffeinated! (get it? "caffeinated" sounds like "caffein-ated", which sounds like "irritated") \n\nIf you prefer a different type of joke, let me know!'

## 4.2 两个变量的情况

In [8]:
from operator import itemgetter

prompt = ChatPromptTemplate.from_template(
    "tell me a joke about {foo}, in {language} language"
)

runnable = (
    {"foo" : itemgetter("foo"), "language": itemgetter("language")}
    | prompt
    | model
    | StrOutputParser()
)


# runnable.invoke(("熊猫", "zh"))
runnable.invoke({"foo":"熊猫", "language":"zh"})

'当然可以，这里有一个关于熊猫的笑话：\n\n为什么熊猫总是拿着手机拍照？\n\n因为它想成为“朋友圈”里最萌的那个！ \n\n希望你喜欢这个笑话！'

# 5. RunnableParallel

In [9]:
prompt = ChatPromptTemplate.from_template(
    "Tell me a joke about {topic}"
)

chain = prompt | model | output_parser
chain.batch(["ice cream", "spaghetti", "dumplings"])

["Sure! Here's a joke about ice cream:\n\nWhy don't ice creams ever get lost?\n\nBecause they always know the scoop!",
 "Sure! Here's a spaghetti-themed joke for you:\n\nWhy did the spaghetti go to the gym?\n\nTo get better at faceplanting into plates! \n\n(Alternatively, you could say: To get al dente!) \n\nHope you found that amusing! 😄",
 'Sure! Here\'s a dumpling-themed joke for you:\n\nWhy did the dumpling go to space?\n\nBecause it wanted to see what\'s in the wrapper! \n\n(Play on words: "What\'s in the wrapper" sounds like "what\'s up there.")']

In [10]:
from langchain_core.runnables import RunnableParallel

chain1 = ChatPromptTemplate.from_template("tele me a joke about {topic}") | model
chain2 = ChatPromptTemplate.from_template("write a short poem about {topic}") | model

combined = RunnableParallel(joke=chain1, poem=chain2)
combined.invoke({"topic": "群峦"})

{'joke': AIMessage(content='Sure, here\'s a playful joke incorporating "群峦" (cluster of peaks or mountain ranges):\n\nWhy did the mountain range refuse to go on a diet?\n\nBecause it didn’t want to lose its 群峦 (natural beautiful peaks)!\n\nThis joke plays with the idea that the beauty of a mountain range lies in its peaks, so losing weight (or in this case, mass) would mean losing those distinctive features. Hope you enjoyed it!', additional_kwargs={}, response_metadata={'model_name': 'qwen-turbo', 'finish_reason': 'stop', 'request_id': '4556f179-201c-9f49-a374-b7ef1d1da9b4', 'token_usage': {'input_tokens': 17, 'output_tokens': 91, 'total_tokens': 108}}, id='run-5e84accd-6479-4d49-8178-dc3a8e255c86-0'),
 'poem': AIMessage(content="In misty veils, the群峦 (clustered mountains) rise,\nGuardians of secrets, beneath azure skies.\nTheir peaks in whispers, to the clouds do talk,\nIn silent symphonies, nature's grand walk.\n\nThrough verdant paths where seldom footsteps stray,\nEchos of time in

# 6.Retriever 配合

In [11]:
from langchain_community.embeddings import DashScopeEmbeddings
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import DashScopeEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma


vectorstore = Chroma.from_texts(
    ["harrison worked at kensho"], embedding=DashScopeEmbeddings()
)

retriever = vectorstore.as_retriever()

template = """Answer the user's question using only the provided context:
{context}

Question: 
{question}
"""

prompt = ChatPromptTemplate.from_template(template)
chain = (
    {"context":retriever, "question": RunnableParallel()}
    | prompt
    | model
    | StrOutputParser()
)

chain.invoke("What did harrison work at?")

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


'It is stated that Harrison worked at Kensho.'

# 7. 练习案例

In [12]:
from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain_community.embeddings import DashScopeEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

home_path = os.getcwd()
data_path = os.path.join(home_path, 'data')
text_loader_kwargs ={'autodetect_encoding': True}
loader = DirectoryLoader(data_path, glob="**/*.txt", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs, show_progress=True)
docs = loader.load()

100%|██████████| 8/8 [00:00<00:00, 73.37it/s]


In [13]:
from langchain_community.embeddings import DashScopeEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma


text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
# text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
# documents = text_splitter.split_documents(docs)
documents = text_splitter.split_documents(docs)
print(len(documents))
for doc in documents:
    print(len(doc.page_content))
db = Chroma.from_documents(documents, DashScopeEmbeddings())

2622
440
497
495
490
438
14
499
497
436
497
496
199
489
498
499
494
381
495
493
499
215
458
404
397
490
292
173
438
497
499
499
329
496
497
235
493
494
493
456
13
491
285
429
236
498
217
499
369
197
492
497
47
494
364
390
459
494
285
320
424
494
330
353
490
284
419
498
493
340
13
497
300
495
490
469
469
477
379
446
476
333
489
16
496
411
495
279
488
466
498
490
209
499
329
497
492
295
411
393
498
465
71
494
494
337
499
490
496
292
12
496
327
497
244
10
492
349
254
253
325
213
378
469
468
488
378
395
396
273
499
279
306
300
364
307
442
260
245
495
476
330
366
299
396
257
462
243
214
473
236
433
423
438
333
489
277
486
199
482
244
387
354
421
491
406
486
385
260
360
476
410
379
309
466
428
329
435
401
474
243
184
435
495
386
497
268
196
498
436
496
497
363
486
456
402
474
466
495
490
487
424
74
494
422
499
340
473
452
465
282
498
335
299
496
217
181
392
340
353
497
444
23
498
349
495
218
491
464
169
494
346
464
357
498
360
406
496
498
283
182
499
261
460
485
458
209
494
497
490
350
486
1

In [None]:
# retriever = db.as_retriever()

question1 = "what is the large language model?"
retrieved_docs = db.similarity_search(question1)

class Classification(BaseModel):
    classification : str = Field(..., 
                        description= description_text,
                        enum=classification_list,)
    context: str = Field(
        ...,
        description="原文内容",
    )
    title: str = Field(
        ...,
        description="文章来源"
    )
class DocumentList(BaseModel):
    Documents: List[Classification]

# 设置解析器
parser = PydanticOutputParser(pydantic_object=DocumentList)

template = """
据提供的文档参考信息，检查是否存在与问题相关的内容，参考信息列表为：\n{context}\n如果发现有关信息原文，请以如下模式将输出包裹在 `json` 格式中\n{format_instructions},\n如果没有找到相关信息，则回答：“没有相关信息”。\n注意，确保完整引用原文内容及出处。
Question: {question}
"""


tagging_prompt = ChatPromptTemplate.from_messages([
    ("system", template),
    ("human", "{question}"),
]).partial(format_instructions=parser.get_format_instructions())


    def format_docs(docs):
        content = ""
        for d in docs:
            source = d.metadata["source"]
            context = d.page_content
            # 从路径中提取文件名（带后缀）
            filename_with_extension = source.split('\\')[-1]  # 使用split分割路径并取最后一个元素
            # 分离文件名和扩展名
            name, _ = filename_with_extension.rsplit('.', 1)  # 确保只分割一次
            # 格式化输出
            title = f"《{name}》"
            content += f"[原文内容]:{context}\n[文章来源]{title}.\n\n"
        return content


    # LLM
    # llm = ChatTongyi(model="qwen-max")
    llm = ChatTongyi(temperature=0)


    chain = (
        { "context":retriever | format_docs, 'question': RunnablePassthrough()}
        | tagging_prompt
        | llm
        | parser
    )

    result = chain.invoke(question)

    # output the result
    for document in result.Documents:
        entry = {
            "Field": field,
            "Classification": getattr(document, 'classification', None),
            "Title": getattr(document, 'title', None),
            "Context": getattr(document, 'context', None),
        }
        result_dict.append(entry)


result = retriever.invoke(question1)
prompt = ChatPromptTemplate.from_template(template)
chain = (
    {"context":retriever, "question": RunnableParallel()}
    | prompt
    | model
    | StrOutputParser()
)

chain.invoke("What did harrison work at?")

NameError: name 'query' is not defined

In [None]:
k