# 构建 embedding 模型 并存储到 向量数据库

导入文本并分块

In [None]:
import { TextLoader } from "langchain/document_loaders/fs/text"
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"

const loader = new TextLoader("data/kong.txt")
const docs = await loader.load()

const splitter = new RecursiveCharacterTextSplitter({
  chunkSize: 100,
  chunkOverlap: 20,
})

const splitDocs = await splitter.splitDocuments(docs)

console.log(splitDocs)

导入环境变量

In [None]:
import { load } from "dotenv"
const env = await load({
  envPath: ".env.local",
})

const process = { env }

const chatOptions = {
  openAIApiKey: process.env.OPENAI_API_KEY,
  temperature: 1.5,
  model: "deepseek-chat",
  configuration: {
    baseURL: "https://api.deepseek.com",
  },
  azureOpenAIBasePath: "https://api.deepseek.com",
}

console.log(process.env)

创建 embedding 模型，并填充向量数据库

模型需要使用 openai 的 api key 本地测试使用 MemoryVectorStore 内存数据库即可

In [None]:
import { OpenAIEmbeddings } from "@langchain/openai";
const embeddings = new OpenAIEmbeddings()

// 测试 embedding 模型
const res = await embeddings.embedQuery(splitDocs[0].pageContent)
console.log(res)

In [None]:
import { AlibabaTongyiEmbeddings } from "@langchain/community/embeddings/alibaba_tongyi";

const embeddings = new AlibabaTongyiEmbeddings({
  apiKey: process.env.Tongyi_API_KEY,
  modelName: "text-embedding-v2",
})

console.log(embeddings)

In [None]:
console.log(Object.keys(embeddings.parameters))

In [None]:
const res = await embeddings.embedQuery(
  "What would be a good company name a company that makes colorful socks?"
)
console.log({ res });

创建内存向量数据库

In [5]:
import { MemoryVectorStore } from "langchain/vectorstores/memory"

const vectorstore = new MemoryVectorStore(embeddings)
await vectorstore.addDocuments(splitDocs)

创建召回检索（retriever）

In [6]:
const retriever = vectorstore.asRetriever(2)

测试召回检索

In [None]:
const res = await retriever.invoke("茴香豆是做什么用的？")
console.log(res)