In [None]:
import { load } from "dotenv";
const env = await load();

const process = { env };
process.env;


In [None]:
import { TextLoader } from "langchain/document_loaders/fs/text";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { OpenAIEmbeddings } from "@langchain/openai";

// 加载知识库
const loader = new TextLoader("documents/chatbot.txt");
const docs = await loader.load();
// 切分（超出LLM上下文限制）
const splitter = new RecursiveCharacterTextSplitter({
  chunkSize: 100,
  chunkOverlap: 20,
});

const splitDocs = await splitter.splitDocuments(docs);

// 构建 vector store
const embeddings = new OpenAIEmbeddings({
  model: process.env.EMBEDDING_MODEL_NAME,
  batchSize: 20,
  configuration: {
    baseURL: process.env.BASE_URL,
    apiKey: process.env.OPENAI_API_KEY,
  },
});

const vectorstore = new MemoryVectorStore(embeddings);
await vectorstore.addDocuments(splitDocs);


In [None]:
import { RunnableSequence } from "@langchain/core/runnables";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { ChatOpenAI } from "@langchain/openai";

// // 构建retriever
const retriever = vectorstore.asRetriever(2);
// const res = await retriever.invoke("原文中，谁提出了宏原子的假设？并详细介绍给我宏原子假设的理论")

// 将获取到的关联上下文处理成纯文字
const convertDocsToString = (documents: Document[]): string => {
  return documents.map((document) => document.pageContent).join("\n");
};

const contextRetrieverChain = RunnableSequence.from([
  (input) => input.question,
  retriever,
  convertDocsToString,
]);

// await contextRetriverChain.invoke({
//   question: "球形闪电",
// });

// 构建prompt
const TEMPLATE = `
你是一个熟知内部知识库的机器人，你在回答时会引用知识库，并擅长通过自己的总结归纳，组织语言给出答案。
并且回答时仅根据知识库，尽可能回答用户问题，如果知识库中没有相关内容，你可以回答“原文中没有相关内容”，不要回答知识库以外的内容。

以下是知识库中跟用户回答相关的内容：
{context}

现在，你需要基于知识库，回答以下问题：
{question}`;

const prompt = ChatPromptTemplate.fromTemplate(TEMPLATE);

const llm = new ChatOpenAI({
  model: process.env.MODEL_NAME,
  configuration: {
    baseURL: process.env.BASE_URL,
    apiKey: process.env.OPENAI_API_KEY,
  },
});

// function getContext(question: string) {
//   console.log(question)
//   return '球形闪电是一种面包'
// }

const ragChain = RunnableSequence.from([
  // RunnableParallel.from({
  //     context: contextRetrieverChain,
  //     question: (input) => input.question,
  // }),
  // async (input) => ({
  //   context: await contextRetrieverChain.invoke({ question: input.question }),
  //   question: input.question
  // }),
  {
    context: async (input) => {
      const context = await contextRetrieverChain.invoke({ question: input.question });
      return context
    },
    question: (input) => input.question,
  },
  prompt,
  llm,
  new StringOutputParser(),
]);


In [None]:
await ragChain.invoke({
  question: "书中有哪些人物",
});