## 本地数据库方式

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import ZhipuAIEmbeddings
from langchain_milvus import Milvus

# 文件导入
loader = WebBaseLoader("https://zh.d2l.ai/")
docs = loader.load()

# 文本切分
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=150
)
splits = text_splitter.split_documents(docs)
print(len(splits))

# 文本嵌入
embed = ZhipuAIEmbeddings(model="embedding-2", api_key="5713143e8fdc4b4a8b284cf97092e70f.qEK71mGIlavzO1Io")

# 向量库创建 - 添加 index_params to specify supported index type
vectorstore = Milvus.from_documents(
    documents=splits,  # You should use the splits, not the original docs
    embedding=embed,
    connection_args={
        "uri": "./milvus_demo.db",
    },
    index_params={
        "metric_type": "L2",  # or "IP" depending on your needs
        "index_type": "IVF_FLAT",  # Use a supported index type
        "params": {"nlist": 128}  # IVF_FLAT parameter
    },
    drop_old=True,
)


# 检索
question = "图像识别"
docs = vectorstore.similarity_search(question,k=3)
print(len(docs))
print(docs[0].page_content)

## 远端数据库方式

In [None]:
!docker port milvus-standalone 19530/tcp

In [None]:
from pymilvus import connections, db
 
conn = connections.connect(host="129.201.70.31", port=19530)
database = db.create_database("sample_db")

In [None]:
测试

In [None]:
from pymilvus import (
    db,
    MilvusClient,
    FieldSchema, CollectionSchema, DataType,
    Collection,
)

#1.创建Milvus客户端
fmt = "\n=== {:30} ===\n"
# 1. connect to Milvus 数据库必须选存在，可通过可视化的管理界面创建数据库
print(fmt.format("1. start connecting to Milvus"))
milvusclient = MilvusClient(uri="http://120.79.252.32:19530", db_name="default")

In [None]:
from pymilvus import MilvusClient

client = MilvusClient(
    uri="http://localhost:19530"
)

client.list_databases()


# client.describe_database(
#     db_name="default"
# )



In [None]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = WebBaseLoader(
    web_paths=(
        "https://lilianweng.github.io/posts/2023-06-23-agent/",
        "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    ),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)

docs = text_splitter.split_documents(documents)

docs[1]


In [None]:
from langchain_community.vectorstores import Milvus
from langchain_community.embeddings import ZhipuAIEmbeddings

embed = ZhipuAIEmbeddings(model="embedding-2",api_key="5713143e8fdc4b4a8b284cf97092e70f.qEK71mGIlavzO1Io")
vector = Milvus.from_documents(
     documents=documents, # 设置保存的文档
     embedding=embed, # 设置 embedding model
     collection_name="book2", # 设置 集合名称
     drop_old=True,
     connection_args={"host": "120.79.252.32", "port": "19530", "db_name":"sample_db"},# Milvus连接配置
)


In [None]:
query = "What is self-reflection of an AI Agent?"
vector.similarity_search(query, k=1)

In [13]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import ZhipuAIEmbeddings
from langchain_community.vectorstores import Milvus
# 文件导入
loader = WebBaseLoader("https://zh.d2l.ai/")
docs = loader.load()

# 文本切分
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)
splits = text_splitter.split_documents(docs)
print(len(splits))

# 文本嵌入
embed = ZhipuAIEmbeddings(model="embedding-2",api_key="5713143e8fdc4b4a8b284cf97092e70f.qEK71mGIlavzO1Io")

# 测试
# text_1 = "今天天气不错"

# query_result = embeddings.embed_query(text_1)
# print(query_result)

# 路径设置
# persist_directory = './langchain-milvus'

# 向量库创建
connection_args = {
    "host": "129.201.70.31",
    "port": "19530",
}
vectordb = Milvus.from_documents(
    documents=docs,
    embedding=embed,
    collection_name="book1",
    drop_old=False,
    connection_args=connection_args,
)
# print(vectordb._collection.count())

# 检索
question = "图像识别"
docs = vectordb.similarity_search(question,k=3)
print(len(docs))
print(docs[0].page_content)

18
2








《动手学深度学习》 — 动手学深度学习 2.0.0 documentation























《动手学深度学习》






search








      Quick search
      


code


Show Source








                  MXNet
              


                  PyTorch
              


                  Jupyter 记事本
              


                  课程
              


                  GitHub
              


                  English
              










Table Of Contents


前言
安装
符号


1. 引言
2. 预备知识
2.1. 数据操作
2.2. 数据预处理
2.3. 线性代数
2.4. 微积分
2.5. 自动微分
2.6. 概率
2.7. 查阅文档


3. 线性神经网络
3.1. 线性回归
3.2. 线性回归的从零开始实现
3.3. 线性回归的简洁实现
3.4. softmax回归
3.5. 图像分类数据集
3.6. softmax回归的从零开始实现
3.7. softmax回归的简洁实现


4. 多层感知机
4.1. 多层感知机
4.2. 多层感知机的从零开始实现
4.3. 多层感知机的简洁实现
4.4. 模型选择、欠拟合和过拟合
4.5. 权重衰减
4.6. 暂退法（Dropout）
4.7. 前向传播、反向传播和计算图
4.8. 数值稳定性和模型初始化
4.9. 环境和分布偏移
4.10. 实战Kaggle比赛：预测房价


5. 深度学习计算
5.1. 层和块
5.2. 参数管理
5.3. 延后初始化
5.4. 自定义层
5.5. 读写文件
5.6. GPU


6. 卷积神经网络
6.1. 从全连接层到卷积
6.2. 图像卷积
6.3. 填充和步幅
6.4. 多输入多输出通道
6.5. 汇聚层
6.6. 卷积神经网络（LeNet）


7. 现

In [12]:
# 