In [None]:
import os
import numpy as np
import faiss
import ollama
from tqdm import tqdm
import pickle

In [None]:
# 步骤2: 配置常量
EMBEDDING_MODEL = "nomic-embed-text"
VECTOR_STORE_DIR = "./vector_stores"
INDEX_NAME = "my_documents"
MODEL_NAME = "llama3.2:latest" 

# 步骤3: 加载向量存储
def load_vector_store(index_name):
    index_path = os.path.join(VECTOR_STORE_DIR, f"{index_name}.faiss")
    metadata_path = os.path.join(VECTOR_STORE_DIR, f"{index_name}_metadata.pkl")
    
    try:
        index = faiss.read_index(index_path)
        with open(metadata_path, "rb") as f:
            data = pickle.load(f)
            chunks = data['chunks']
            metadata = data['metadata']
        print(f"✅ 成功加载向量存储 | 索引大小: {index.ntotal}")
        return index, chunks, metadata
    except Exception as e:
        print(f"❌ 加载失败: {str(e)}")
        return None, None, None

# 加载向量存储
index, chunks, metadata = load_vector_store(INDEX_NAME)

# 步骤4: 定义查询函数
def query_documents(question, index, chunks, metadata, model_name=MODEL_NAME, k=3):
    """查询文档并获取回答"""
    try:
        # 获取问题嵌入
        response = ollama.embeddings(model=EMBEDDING_MODEL, prompt=question)
        query_embedding = np.array([response['embedding']], dtype=np.float32)
        
        # 搜索相似内容
        distances, indices = index.search(query_embedding, k)
        
        # 获取相关文本块
        context_chunks = [chunks[i] for i in indices[0]]
        context_metadata = [metadata[i] for i in indices[0]]
        
        # 构造上下文
        context = "\n\n".join([
            f"来源: {meta['filename']} 第{meta['page']}页\n内容: {chunk}"
            for chunk, meta in zip(context_chunks, context_metadata)
        ])
        
        # 构造提示
        prompt = f"""
        基于以下上下文信息回答问题：
        {context}
        
        问题: {question}
        回答:
        """
        
        # 获取回答
        response = ollama.chat(
            model=model_name,
            messages=[{'role': 'user', 'content': prompt}]
        )
        
        return response['message']['content'], context_metadata
        
    except Exception as e:
        return f"查询失败: {str(e)}", []

# 步骤5: 交互式文档查询
print("🚀 文档助手已启动! 输入 'exit' 退出")

In [None]:
question = "what is time llm"
answer, sources = query_documents(question, index, chunks, metadata)

print(f"🤖 助手回答:\n{answer}\n")
print("📚 来源:")
for i, source in enumerate(sources):
    print(f"{i+1}. {source['filename']} - 第{source['page']}页")

In [None]:
question = "time llm vs tab pfn, what is the difference"
answer, sources = query_documents(question, index, chunks, metadata)

print(f"🤖 助手回答:\n{answer}\n")
print("📚 来源:")
for i, source in enumerate(sources):
    print(f"{i+1}. {source['filename']} - 第{source['page']}页")

In [None]:
question = "how to compare results from times llm and tabpfn"
answer, sources = query_documents(question, index, chunks, metadata)

print(f"🤖 助手回答:\n{answer}\n")
print("📚 来源:")
for i, source in enumerate(sources):
    print(f"{i+1}. {source['filename']} - 第{source['page']}页")

In [None]:
question = "whats the difference between time llm and LLMs- Multi-Wavelet Number Embeddings"
answer, sources = query_documents(question, index, chunks, metadata)

print(f"🤖 Answer:\n{answer}\n")
print("📚 来源:")
for i, source in enumerate(sources):
    print(f"{i+1}. {source['filename']} - 第{source['page']}页")