In [None]:
import { load } from "dotenv";
const env = await load();

const process = { env };
process.env;


In [None]:
import { TextLoader } from "langchain/document_loaders/fs/text";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { OpenAIEmbeddings } from "@langchain/openai";

// 加载知识库
const loader = new TextLoader("documents/chatbot.txt");
const docs = await loader.load();
// 切分（超出LLM上下文限制）
const splitter = new RecursiveCharacterTextSplitter({
  chunkSize: 100,
  chunkOverlap: 20,
});

const splitDocs = await splitter.splitDocuments(docs);

// 构建 vector store
const embeddings = new OpenAIEmbeddings({
  model: process.env.EMBEDDING_MODEL_NAME,
  batchSize: 20,
  configuration: {
    baseURL: process.env.BASE_URL,
    apiKey: process.env.OPENAI_API_KEY,
  },
});
// const embeddings = new OpenAIEmbeddings({
//   configuration: {
//     baseURL: `https://${process.env.AZURE_OPENAI_API_INSTANCE_NAME}.openai.azure.com/openai/deployments/${process.env.AZURE_EMBEDDING_MODEL_NAME}`,
//     apiKey: process.env.AZURE_OPENAI_API_KEY,
//     defaultQuery: {
//       "api-version": process.env.AZURE_OPENAI_API_VERSION,
//     },
//   },
// });

const vectorstore = new MemoryVectorStore(embeddings);
await vectorstore.addDocuments(splitDocs);


In [None]:
import { ChatOpenAI } from "@langchain/openai";
import {
  ChatPromptTemplate,
  MessagesPlaceholder,
} from "@langchain/core/prompts";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { AIMessage } from "@langchain/core/messages";
import {
  RunnableWithMessageHistory,
  RunnableSequence,
  RunnablePassthrough,
} from "@langchain/core/runnables";
import { JSONChatHistory } from "./node/history/index.ts";
import { BufferMemory } from "langchain/memory";
import { ConversationChain } from "langchain/chains";

const rephrasePrompt = ChatPromptTemplate.fromMessages([
  [
    "system",
    `给定以下对话和一个后续问题，请将后续问题重述为一个独立的问题。请注意，重述的问题应该包含足够的信息，使得没有看过对话历史的人也能理解。
    
    `,
  ],
  new MessagesPlaceholder("history"),
  ["human", "将这个问题重述为一个独立的问题：{question}"],
]);

const rephraseModel = new ChatOpenAI({
  model: process.env.MODEL_NAME,
  temperature: 0.2,
  configuration: {
    baseURL: process.env.BASE_URL,
    apiKey: process.env.OPENAI_API_KEY,
  },
});

const rephraseChain = rephrasePrompt
  .pipe(rephraseModel)
  .pipe(new StringOutputParser());

// const historyMessages = [
//   new AIMessage("这本书是《三体前传：球状闪电》"),
// ];

// const question = "你觉得我的名字怎么样？";

// await rephraseChain.invoke({
//   history: historyMessages,
//   question,
// });

const chatModel = new ChatOpenAI({
  model: process.env.MODEL_NAME,
  configuration: {
    baseURL: process.env.BASE_URL,
    apiKey: process.env.OPENAI_API_KEY,
  },
});

const chatPrompt = ChatPromptTemplate.fromMessages([
  [
    "system",
    `你是一个熟知内部知识库的机器人，你在回答时会引用知识库，并擅长通过自己的总结归纳，组织语言给出答案。
并且回答时仅根据知识库和历史记录，尽可能回答用户问题，如果知识库中没有相关内容，你可以回答“原文中没有相关内容”，不要回答知识库以外的内容。
 以下是原文中跟用户回答相关的内容：
    {context}
  以下是历史记录
    {history}
`,
  ],
  new MessagesPlaceholder("history"),
  [
    "human",
    `现在，你需要基于原文，回答以下问题：
    {question}`,
  ],
]);

// // 构建retriever
const retriever = vectorstore.asRetriever(2);
// const res = await retriever.invoke("原文中，谁提出了宏原子的假设？并详细介绍给我宏原子假设的理论")

// console.log(res)

// 将获取到的关联上下文处理成纯文字
const convertDocsToString = (documents: Document[]): string => {
  console.log("documents", documents);
  return documents.map((document) => document.pageContent).join("\n");
};

const contextRetrieverChain = RunnableSequence.from([
  (input) => input.question,
  retriever,
  convertDocsToString,
]);

const ragChain = RunnableSequence.from([
  RunnablePassthrough.assign({
    question: rephraseChain,
  }),
  RunnablePassthrough.assign({
    context: contextRetrieverChain,
  }),
  new RunnablePassthrough({
    func: (input) => {
      console.log("input", input);
    },
  }), // 打印一下context的内容
  chatPrompt,
  chatModel,
  new StringOutputParser(),
]);

const chainWithHistory = new RunnableWithMessageHistory({
  runnable: ragChain,
  getMessageHistory: (sessionId) => new JSONChatHistory({ sessionId }),
  inputMessagesKey: "question",
  historyMessagesKey: "history",
});

// const memory = new BufferMemory({ chatHistory: history });

// const chainWithHistory = new ConversationChain({
//   memory,
//   llm: ragChain,
// });


In [None]:
await chainWithHistory.invoke(
  {
    question: "三体的作者是谁",
  },
  {
    configurable: { sessionId: "test-history" },
  }
);
