In [3]:
import sys
import os

# 将项目根目录添加到 sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../.."))  # 跳出 src/langchainAgenticAi/retrieval
sys.path.append(project_root)

In [4]:
import os
import logging
from src.langchainAgenticAi.utils.base_retrieval import build_index, split_documents, recall_documents
import requests

# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [5]:
def cloud_retrieve(query, cloud_urls):
    """
    使用云端 URL 数组进行文档检索
    """
    documents = []
    for url in cloud_urls:
        try:
            # 从云端 URL 获取文档数据，这里假设云端返回的就是文档内容
            response = requests.get(url)
            if response.status_code == 200:
                documents.append(response.text)
            else:
                logger.warning(f"Failed to fetch from {url}: {response.status_code}")
        except Exception as e:
            logger.error(f"Error fetching from {url}: {str(e)}")
    
    if not documents:
        logger.error("No documents retrieved from cloud URLs.")
        return []

    # 对文档进行分片
    split_docs = split_documents(documents)
    
    # 构建云端向量索引
    index = build_index(split_docs)
    
    # 执行检索
    results = recall_documents(query, index)
    
    return results


In [6]:
def call_cloud_model(results):
    """
    调用云端 OpenAI 模型进行推理
    """
    # 获取 OpenRouter API 密钥
    api_key = os.getenv("OPENROUTER_API_KEY")  # 从环境变量获取 API 密钥

    # 设置云端模型
    model = ChatOpenAI(
        model="openai/gpt-4o-mini",  # 使用 OpenAI GPT-4 模型
        base_url="https://openrouter.ai/api/v1",  # OpenRouter API 基础 URL
        api_key=SecretStr(api_key),  # 设置 API 密钥
    )

    # 拼接查询结果为输入字符串
    input_text = "\n".join([doc.page_content for doc in results])

    try:
        # 调用云端 OpenAI 模型进行推理
        response = model.invoke(input_text)
        answer = response['choices'][0]['text'].strip()  # 提取模型生成的文本
        return answer
    except Exception as e:
        print(f"❌ 云端模型调用失败: {e}")
        return "⚠️ 云端模型调用失败，请检查网络或 API 配置。"