In [18]:
import os
import sys
from pathlib import Path

root_path = Path.cwd().parent.parent
print(root_path)
sys.path.append(str(root_path))

from src.settings import settings

/mnt/ssd2/steins/wenkai/project/doc-reading-agent-demo


In [19]:
from src.POC import *

In [20]:
file_path = os.path.join(root_path, "demo_data", "test.pdf")

In [21]:
from pdf2image import convert_from_path
from PIL import Image
from io import BytesIO
from typing import List

def convert_to_jpeg(images: List[Image.Image]) -> List[Image.Image]:
    processed_images = []

    for img in images:
        # 步骤 A: 确保颜色模式是 RGB (防止 CMYK 导致保存 JPEG 失败)
        if img.mode != 'RGB':
            img = img.convert('RGB')
        
        buffer = BytesIO()
        img.save(buffer, format='JPEG')
        buffer.seek(0)
        
        new_img = Image.open(buffer)
        processed_images.append(new_img)
    
    return processed_images

pdf_doc = convert_from_path(file_path, first_page=1)
images = convert_to_jpeg(pdf_doc)
print(f"Total pages converted to images: {len(images)}")
# for idx, img in enumerate(images):
#     img.save(f"page_{idx + 1}.jpeg", format='JPEG')

KeyboardInterrupt: 

In [None]:
import base64
import json
from typing import List, Dict, Any
from openai import OpenAI
import httpx
from httpx import AsyncClient, Timeout
import numpy as np
from loguru import logger

logger = logger.bind(name="JinaEmbedding客户端")

class JinaEmbeddingClient():
    def __init__(self,baseurl: str = "", api_key: str = "", embedding_name: str = "",):
        self.base_url = baseurl if baseurl else settings.JINA_EMBEDDING_BASE_URL
        self.api_key = api_key if api_key else settings.JINA_EMBEDDING_MODEL_API_KEY
        self.embedding_dim = settings.JINA_EMBEDDING_MODEL_DIMS
        self.embedding_name = embedding_name if embedding_name else settings.JINA_EMBEDDING_MODEL_NAME
        
        # self.base_url = settings.QWEN3_EMBEDDING_MODEL_BASE_URL
        # self.api_key = settings.QWEN3_EMBEDDING_MODEL_API_KEY
        # self.embedding_dim = None
        # self.embedding_name = settings.QWEN3_EMBEDDING_MODEL_NAME
        self.timeout = Timeout(60.0, connect=10.0)

        self.headers = {
            "Content-type": "application/json",
            "User-Agent": "wenkai_test"
        }
        
        logger.info(f"通过HTTP请求访问JinaEmbedding服务: {self.embedding_name} at {self.base_url} 成功！")
    
    async def get_embedding(self, text: str = "", *, image: Image.Image=None, is_base64=True) -> List[float]:
        """
        [异步] 获取多模态向量
        
        Args:
            text: 提示词文本
            image: PIL Image 对象 (可选)
        Returns:
            List[float]: 嵌入向量
        """

        content_block: List[Dict[str, Any]] = []

        if text:
            content_block.append(
                {
                    "type": "text",
                    "text":text 
                },
            )

        if image:
            image_http_url = ""#TODO：日后再添加，测试miniserve的静态文件服务器功能
            images_base64 = self._convert_to_base64(image)
            content_block.append(
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{images_base64}"if is_base64 else f"{image_http_url}"
                    }
                }
            )
        
        if not content_block:
            logger.error(f"未提供文本或图片内容，无法获取嵌入向量！")
            raise ValueError("必须提供text或image内容至少一项！")
        
        payload = {
            "model": self.embedding_name,
            "messages": [
                {
                    "role": "user",
                    "content": content_block
                }
            ]
        }
        logger.info(f"payload构造完毕，前50字符: {str(payload)}...")

        async with AsyncClient(timeout=self.timeout) as client:
            try:
                response = await client.post(
                    url=self.base_url,
                    headers= self.headers,
                    json=payload
                )
                
                if response.status_code != 200:
                    raise ValueError(f"HTTP Error {response.status_code}: {response.text}")

                result = response.json()
                if "data" in result and len(result["data"])>0:
                    return result["data"][0]["embedding"]
                
            except httpx.RequestError as e:
                logger.warning(f"请求JinaEmbedding服务器时出现异常：{e}")
                raise 

    def _convert_to_base64(self, image: Image.Image) -> str:
        logger.info(f"正在将 1 张图片转换为 Base64 编码, 以便发送到 Jina Embedding 服务...")
        images_base64 = []
        buffer = BytesIO()
        image.save(buffer, format='JPEG')
        buffer.seek(0)
        img_bytes = buffer.read()
        img_base64 = base64.b64encode(img_bytes).decode('utf-8')
        images_base64.append(img_base64)

        logger.info(f"图片转换为 Base64 编码完成！")
        return images_base64[0]

    def _cal_cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
        """用numpy计算两个向量的余弦相似度"""
        v1 = np.array(vec1)
        v2 = np.array(vec2)

        dot_product = np.dot(v1,v2)
        norm_v1 = np.linalg.norm(v1)
        norm_v2 = np.linalg.norm(v2)

        #防止除零
        if norm_v1 ==0 or norm_v2 ==0:
            return 0.0
        
        return dot_product / (norm_v1 * norm_v2)


        

jinaclient = JinaEmbeddingClient()
for img in images:
    embedding = await jinaclient.get_embedding(image=img)
    print(f"Embedding前20个维度: {embedding[:20]}")
while True:
    user_input = input("请输入您的问题（输入 'exit' 退出）：")
    if user_input.lower() == 'exit':
        break
    user_input_embedding = await jinaclient.get_embedding(text=user_input)
    similarity = jinaclient._cal_cosine_similarity(user_input_embedding, embedding)
    print("余弦相似度为:", similarity)
    print(f"与文档相似"if similarity >0.5 else "与文档不相似")
    

[32m2026-01-12 13:00:34.599[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m30[0m - [1m通过HTTP请求访问JinaEmbedding服务: jina/jina-embeddings-v4-vllm-retrieval at http://localhost:9908/v1/embeddings 成功！[0m
[32m2026-01-12 13:00:34.601[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m100[0m - [1m正在将 1 张图片转换为 Base64 编码, 以便发送到 Jina Embedding 服务...[0m
[32m2026-01-12 13:00:34.631[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m109[0m - [1m图片转换为 Base64 编码完成！[0m
[32m2026-01-12 13:00:34.633[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_embedding[0m:[36m78[0m - [1mpayload构造完毕，前50字符: {'model': 'jina/jina-embeddings-v4-vllm-retrieval', 'messages': [{'role': 'user', 'content': [{'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMj

Embedding前20个维度: [0.004197034053504467, 0.00379011919721961, 0.008603338152170181, 0.0019590032752603292, 0.007370967883616686, 0.019866734743118286, -0.05294075608253479, -0.0035134172067046165, 0.0029623385053128004, -0.01084485650062561, -0.03398551046848297, -0.015699928626418114, 0.01657421514391899, -0.017811235040426254, -0.0592653751373291, 0.008528931066393852, 0.0011754019651561975, -0.01011938601732254, 0.03394830599427223, -0.010110084898769855]


[32m2026-01-12 13:00:35.817[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m100[0m - [1m正在将 1 张图片转换为 Base64 编码, 以便发送到 Jina Embedding 服务...[0m
[32m2026-01-12 13:00:35.860[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m109[0m - [1m图片转换为 Base64 编码完成！[0m
[32m2026-01-12 13:00:35.862[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_embedding[0m:[36m78[0m - [1mpayload构造完毕，前50字符: {'model': 'jina/jina-embeddings-v4-vllm-retrieval', 'messages': [{'role': 'user', 'content': [{'type': 'image_url', 'image_url': {'url': '

Embedding前20个维度: [0.017288463190197945, -0.0018643067451193929, 0.02441837638616562, 0.0030286358669400215, 0.0034584812819957733, 0.017304236069321632, -0.05060345679521561, -0.004566614981740713, 0.010836837813258171, -0.006794712971895933, -0.015174726955592632, -0.01594766043126583, -0.0014354471350088716, -0.017919428646564484, -0.053568996489048004, 0.01664172299206257, -0.0021117636933922768, -0.013068879023194313, 0.05255945026874542, -0.002626395085826516]


[32m2026-01-12 13:00:36.410[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m100[0m - [1m正在将 1 张图片转换为 Base64 编码, 以便发送到 Jina Embedding 服务...[0m
[32m2026-01-12 13:00:36.451[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m109[0m - [1m图片转换为 Base64 编码完成！[0m
[32m2026-01-12 13:00:36.453[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_embedding[0m:[36m78[0m - [1mpayload构造完毕，前50字符: {'model': 'jina/jina-embeddings-v4-vllm-retrieval', 'messages': [{'role': 'user', 'content': [{'type': 'image_url', 'image_url': {'url': '

Embedding前20个维度: [0.01274822372943163, 0.0033756503835320473, 0.01793742924928665, 0.0017525950679555535, -0.007178019732236862, 0.020299622789025307, -0.0648612529039383, 0.0020231043454259634, 0.003674734616652131, -0.008298156782984734, -0.0124967647716403, -0.016687752678990364, 0.007703797891736031, -0.0202843826264143, -0.061965662986040115, 0.02001006342470646, -0.0012296739732846618, -0.024749688804149628, 0.047030504792928696, 0.006305532064288855]


[32m2026-01-12 13:00:37.004[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m100[0m - [1m正在将 1 张图片转换为 Base64 编码, 以便发送到 Jina Embedding 服务...[0m
[32m2026-01-12 13:00:37.050[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m109[0m - [1m图片转换为 Base64 编码完成！[0m
[32m2026-01-12 13:00:37.053[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_embedding[0m:[36m78[0m - [1mpayload构造完毕，前50字符: {'model': 'jina/jina-embeddings-v4-vllm-retrieval', 'messages': [{'role': 'user', 'content': [{'type': 'image_url', 'image_url': {'url': '

Embedding前20个维度: [0.011709267273545265, -0.0022188955917954445, 0.023248936980962753, -0.007154878228902817, -0.0031993789598345757, 0.02001245878636837, -0.06015044078230858, -0.0018691015429794788, 0.007893332280218601, -0.0026958873495459557, -0.020054858177900314, -0.013815098442137241, -0.006829816848039627, -0.012232191860675812, -0.0512748584151268, 0.01601279340684414, -0.0007101878873072565, -0.012528986670076847, 0.05059647187590599, 0.00891091488301754]


[32m2026-01-12 13:00:37.603[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m100[0m - [1m正在将 1 张图片转换为 Base64 编码, 以便发送到 Jina Embedding 服务...[0m
[32m2026-01-12 13:00:37.644[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m109[0m - [1m图片转换为 Base64 编码完成！[0m
[32m2026-01-12 13:00:37.646[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_embedding[0m:[36m78[0m - [1mpayload构造完毕，前50字符: {'model': 'jina/jina-embeddings-v4-vllm-retrieval', 'messages': [{'role': 'user', 'content': [{'type': 'image_url', 'image_url': {'url': '

Embedding前20个维度: [0.01828271895647049, -0.021264176815748215, 0.01883641816675663, 8.385347609873861e-05, 0.0016384702175855637, 0.01900678686797619, -0.04387000575661659, -0.0015186795499175787, 0.009865427389740944, 0.003955754451453686, -0.02589608170092106, -0.0061173103749752045, 0.0010628094896674156, -0.010775837115943432, -0.05651990324258804, 0.020976677536964417, 0.01047769095748663, -0.011936475522816181, 0.052686601877212524, -0.010323294438421726]


[32m2026-01-12 13:00:38.188[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m100[0m - [1m正在将 1 张图片转换为 Base64 编码, 以便发送到 Jina Embedding 服务...[0m
[32m2026-01-12 13:00:38.229[0m | [1mINFO    [0m | [36m__main__[0m:[36m_convert_to_base64[0m:[36m109[0m - [1m图片转换为 Base64 编码完成！[0m
[32m2026-01-12 13:00:38.231[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_embedding[0m:[36m78[0m - [1mpayload构造完毕，前50字符: {'model': 'jina/jina-embeddings-v4-vllm-retrieval', 'messages': [{'role': 'user', 'content': [{'type': 'image_url', 'image_url': {'url': '

Embedding前20个维度: [0.00945819728076458, -0.018543779850006104, 0.01997213624417782, -0.01247637439519167, -0.0003842586593236774, 0.013873678632080555, -0.03338625654578209, -0.01174977608025074, 0.01188640110194683, -0.009601032361388206, -0.03512512519955635, 0.0001901886280393228, -0.00267195631749928, -0.0024965170305222273, -0.047098468989133835, 0.009365043602883816, 0.0002055201621260494, -0.017140265554189682, 0.05070040747523308, 0.006169874686747789]


CancelledError: 

# 测试Qwen的embedding模型是否可用

In [None]:
jinaclient = JinaEmbeddingClient()
embedding = await jinaclient.get_embedding(text="""这是一个测试文本广东省政府采购 
公开招标文件 
采购计划编号：440403-2025-01826 
采购项目编号：DHH25-ZH1ZFGY-188 
项目名称：珠海市斗门区侨立中医院2025年病理服务采购项目 
采购人：珠海市斗门区侨立中医院 
采购代理机构：大航海（广东）项目咨询有限公司 
第一章投标邀请 """)
print(f"Embedding前20个维度: {embedding[:20]}")
while True:
    user_input = input("请输入您的问题（输入 'exit' 退出）：")
    if user_input.lower() == 'exit':
        break
    user_input_embedding = await jinaclient.get_embedding(text=user_input)
    similarity = jinaclient._cal_cosine_similarity(user_input_embedding, embedding)
    print("余弦相似度为:", similarity)
    print(f"与文档相似"if similarity >0.5 else "与文档不相似")

[32m2026-01-08 10:44:26.738[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m30[0m - [1m通过HTTP请求访问JinaEmbedding服务: jina/jina-embeddings-v4-vllm-retrieval at http://localhost:9908/v1/embeddings 成功！[0m
[32m2026-01-08 10:44:26.740[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_embedding[0m:[36m78[0m - [1mpayload构造完毕，前50字符: {'model': 'jina/jina-embeddings-v4-vllm-retrieval', 'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': '这是一个测试文本广东省政府采购 \n公开招标文件 \n采购计划编号：440403-2025-01826 \n采购项目编号：DHH25-ZH1ZFGY-188 \n项目名称：珠海市斗门区侨立中医院2025年病理服务采购项目 \n采购人：珠海市斗门区侨立中医院 \n采购代理机构：大航海（广东）项目咨询有限公司 \n第一章投标邀请 '}]}]}...[0m


Embedding前20个维度: [0.018903452903032303, -0.0022878132294863462, -0.0026976692024618387, -0.03616220876574516, 0.01030043140053749, -0.01578274928033352, -0.008133861236274242, -0.020252946764230728, 0.01981014385819435, 0.03498140349984169, -0.02521866001188755, 0.028086334466934204, 0.009056366980075836, 0.006842353846877813, -0.0300894882529974, -0.0015050015645101666, 0.017532873898744583, -0.03852382302284241, 0.019662542268633842, -0.027727875858545303]


[32m2026-01-08 10:44:28.104[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_embedding[0m:[36m78[0m - [1mpayload构造完毕，前50字符: {'model': 'jina/jina-embeddings-v4-vllm-retrieval', 'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': '### **第一页内容总结：**  - **文件类型**：广东省政府采购公开招标文件。 - **采购计划编号**：440403-2025-01826   - **采购项目编号**：DHH25-ZH1ZFGY-188   - **项目名称**：珠海市斗门区侨立中医院 2025 年病理服务采购项目   - **采购人**：珠海市斗门区侨立中医院   - **采购代理机构**：大航海(广东)项目咨询有限公司   - **章节信息**：本文件为《第一章 投标邀请》的起始页，标志着招标程序的正式开启。'}]}]}...[0m


余弦相似度为: 0.7734075732651663
与文档相似


[32m2026-01-08 10:44:50.540[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_embedding[0m:[36m78[0m - [1mpayload构造完毕，前50字符: {'model': 'jina/jina-embeddings-v4-vllm-retrieval', 'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': '我随便说一个话，我是张文凯，我是广东省省长'}]}]}...[0m


余弦相似度为: 0.19874629356852133
与文档不相似


# 保存image到本地

In [None]:
from PIL import Image
import os

def save_image_as_jpeg(img: Image.Image, output_folder: str, filename: str):
    # 1. 确保目标文件夹存在 (如果不存在会自动创建)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # 2. 拼接完整路径
    save_path = os.path.join(output_folder, filename)

    # 3. 【关键】处理透明度问题
    # 如果图片是 RGBA (带透明) 或 P (调色板) 模式，JPEG 不支持，需转为 RGB
    if img.mode in ("RGBA", "P"):
        img = img.convert("RGB")

    # 4. 保存文件
    # quality 参数可选 (1-95)，默认约 75，95 为最高质量
    img.save(save_path, "JPEG", quality=95)
    print(f"图片已保存到: {save_path}")

# --- 使用示例 ---
# 假设你有一个 Image 对象叫 my_img
for i,img in enumerate(images):
    save_image_as_jpeg(img, "../../demo_data_images", f"test_{i+1}.jpeg")

图片已保存到: ../../示例数据_images/test_1.jpeg
图片已保存到: ../../示例数据_images/test_2.jpeg
图片已保存到: ../../示例数据_images/test_3.jpeg
图片已保存到: ../../示例数据_images/test_4.jpeg
图片已保存到: ../../示例数据_images/test_5.jpeg
图片已保存到: ../../示例数据_images/test_6.jpeg
图片已保存到: ../../示例数据_images/test_7.jpeg
图片已保存到: ../../示例数据_images/test_8.jpeg
图片已保存到: ../../示例数据_images/test_9.jpeg
图片已保存到: ../../示例数据_images/test_10.jpeg
图片已保存到: ../../示例数据_images/test_11.jpeg
图片已保存到: ../../示例数据_images/test_12.jpeg
图片已保存到: ../../示例数据_images/test_13.jpeg
图片已保存到: ../../示例数据_images/test_14.jpeg
图片已保存到: ../../示例数据_images/test_15.jpeg
图片已保存到: ../../示例数据_images/test_16.jpeg
图片已保存到: ../../示例数据_images/test_17.jpeg
图片已保存到: ../../示例数据_images/test_18.jpeg
图片已保存到: ../../示例数据_images/test_19.jpeg
图片已保存到: ../../示例数据_images/test_20.jpeg
图片已保存到: ../../示例数据_images/test_21.jpeg
图片已保存到: ../../示例数据_images/test_22.jpeg
图片已保存到: ../../示例数据_images/test_23.jpeg
图片已保存到: ../../示例数据_images/test_24.jpeg
图片已保存到: ../../示例数据_images/test_25.jpeg
图片已保存到: ../../示例数据_images/test_26.

# 测试Qwen的两个模型怎么用


In [3]:
import sys
import os
import torch
from pathlib import Path
root_path = Path.cwd().parent.parent
sys.path.append(str(root_path))

from src.settings import settings

# -----------------------------------------------------------------------------
# 1. 【核心步骤】将 Qwen3-VL-Embedding 代码仓库的根目录加入 Python 路径
# -----------------------------------------------------------------------------
# 你的 GitHub 仓库代码所在的绝对路径（就是包含 src 文件夹的那个目录）
# 根据你的描述，推测是这个目录的上级：
repo_code_path = "/mnt/ssd2/steins/wenkai/models/Qwen3-VL-Embedding"

if repo_code_path not in sys.path:
    sys.path.append(repo_code_path)

# -----------------------------------------------------------------------------
# 2. 现在可以像官方一样导入了
# -----------------------------------------------------------------------------
try:
    from src.models.qwen3_vl_embedding import Qwen3VLEmbedder
    from src.models.qwen3_vl_reranker import Qwen3VLReranker
except ImportError as e:
    print("❌ 导入失败！请检查 'repo_code_path' 是否指向了包含 'src' 文件夹的目录。")
    print(f"当前尝试的路径: {repo_code_path}")
    raise e

# -----------------------------------------------------------------------------
# 3. 初始化模型 (使用绝对路径)
# -----------------------------------------------------------------------------
# 这是你权重的绝对路径
embed_model_weights_path = settings.QWEN3_VL_EMBEDDING_PATH

print(f"🚀 Loading model from: {embed_model_weights_path}")

embed_model = Qwen3VLEmbedder(
    model_name_or_path=embed_model_weights_path,
    # 强烈建议开启 flash_attention_2 以支持长文档并节省显存
    # 如果你的显卡是 3090/4090/A100/H100，请取消下面的注释
    attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16
)


rerank_model_weights_path = settings.QWEN3_VL_RERANKER_PATH
rerank_model = Qwen3VLReranker(
    model_name_or_path=rerank_model_weights_path,
    torch_dtype=torch.bfloat16,
    attn_implementation="flash_attention_2",
)

  from .autonotebook import tqdm as notebook_tqdm
`torch_dtype` is deprecated! Use `dtype` instead!


🚀 Loading model from: /mnt/ssd2/steins/wenkai/models/Qwen3-VL-Embedding/models/Qwen3-VL-Embedding-8B


Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 64.39it/s]
Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 88.36it/s]


In [None]:
inputs = [{
    "text": "A woman playing with her dog on a beach at sunset.",
    "instruction": "Retrieve images or text relevant to the user's query.",
}, {
    "text": "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, as the dog offers its paw in a heartwarming display of companionship and trust."
}, {
    "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
}, {
    "text": "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, as the dog offers its paw in a heartwarming display of companionship and trust.", 
    "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
}]

embeddings = embed_model.process(inputs)
print(embeddings)
print(embeddings @ embeddings.T)

tensor([[-0.0267, -0.0222, -0.0094,  ..., -0.0177,  0.0156, -0.0068],
        [-0.0298, -0.0342, -0.0129,  ..., -0.0098, -0.0002, -0.0153],
        [-0.0117, -0.0293,  0.0085,  ..., -0.0215,  0.0155, -0.0181],
        [-0.0262, -0.0371, -0.0148,  ..., -0.0117,  0.0103, -0.0126]],
       device='cuda:0', dtype=torch.bfloat16)
tensor([[1.0000, 0.7148, 0.6562, 0.6562],
        [0.7148, 1.0000, 0.7266, 0.8203],
        [0.6562, 0.7266, 1.0078, 0.8438],
        [0.6562, 0.8203, 0.8438, 1.0000]], device='cuda:0',
       dtype=torch.bfloat16)


In [4]:
from src.code.data_base.database import VectorDatabase, VectorSchema, convert_from_path, convert_to_jpeg
from loguru import logger
from typing import List, Dict, Any

COLLECTION_NAME = "WENKAI_reading_agent_with_Qwen3VL"

vector_db = VectorDatabase(collection_name=COLLECTION_NAME,vector_dim=4096)
# vector_db.delete_collection(COLLECTION_NAME)#先删除
# vector_db.create_collection(COLLECTION_NAME)#已创建

# file_path = Path.cwd().parent.parent / "demo_data" / "test.pdf"
# pdf_doc = convert_from_path(file_path, first_page=1)
# images = convert_to_jpeg(pdf_doc)
# logger.info(f"成功获取{len(images)}张pdf，并将其转换为JEPG图片")

# vectors: List[VectorSchema]=[]


# for idx, img in enumerate(images):
#     vectors.append(
#         VectorSchema(
#             id=idx,
#             vector=embed_model.process(
#                 [
#                     {
#                         "image": img
#                     }
#                 ]
#             ).detach().cpu().tolist()[0],
#             page_index=idx + 1,
#             image_url=f"/mnt/ssd2/steins/wenkai/project/doc-reading-agent-demo/demo_data_images/test_{idx+1}.jpeg"
#         )
#     )

# processed_vectors: List[Dict[str,Any]] = []
# for vec in vectors:
#     processed_vectors.append(vec.model_dump())

# insert_count = vector_db.insert_vectors(collection_name=COLLECTION_NAME, vectors=processed_vectors)

# logger.info(f"KIAEr:已添加文件 {file_path} 到向量数据库，共 {insert_count} 页。")


[32m2026-01-12 16:41:34.008[0m | [1mINFO    [0m | [36msrc.code.embedding.embedding_model[0m:[36m__init__[0m:[36m55[0m - [1m通过HTTP请求访问JinaEmbedding服务: jina/jina-embeddings-v4-vllm-retrieval at http://localhost:9908/v1/embeddings 成功！[0m


In [6]:
from src.code.visual_reasoner.model import VisionLanguageModel
# 初步查询
def query(query, top_k=10):
    vector = embed_model.process([
        {"text": query}
    ]).detach().cpu().tolist()[0]
    search_result = vector_db.client.search(
        collection_name=COLLECTION_NAME,
        data=[vector],
        limit=top_k,
        output_fields=["id", "vector", "page_index", "image_url"],
    )

    return search_result

def rerank(query, results):#对相关的结果进行重排序
    image_urls = [item['image_url'] for item in related_results[0]]

    rerank_block = {
            "query":{"text": query_text},
            "documents":[
                {"image": url} for url in image_urls
            ]
        }
    rerank_block
    reranked_results = rerank_model.process(rerank_block)

    #构建最终的输出
    output = []
    for idx, item in enumerate(reranked_results):
        output.append(
            {
                "original_id": related_results[0][idx]['id'],
                "page_index": related_results[0][idx]['page_index'],
                "image_url": related_results[0][idx]['image_url'],
                "rerank_score": item
            }
        )

    output.sort(key=lambda x: x['rerank_score'], reverse=True)#按重排序分数降序排列

    return output

while True:
    query_text = input("请输入您的查询内容（输入 'exit' 退出）：")
    if query_text.lower() == 'exit':
        break

    related_results = query(query_text, top_k=10)

    reranked_results = rerank(query_text, related_results)

    vlm_model = VisionLanguageModel()
    response = vlm_model.run(
        query=query_text,
        image_urls=[item["image_url"] for item in reranked_results[:3]]
    )
    print("回答内容:", response)

[32m2026-01-12 17:07:28.996[0m | [1mINFO    [0m | [36msrc.code.visual_reasoner.model[0m:[36m__init__[0m:[36m35[0m - [1mVisionLanguageModel 已就绪[0m


回答内容: 好的，我将根据 |<Page 43>|、|<Page 37>| 和 |<Page 44>| 的内容来回答您的问题。

根据 |<Page 43>| 的内容，投标报价得分采用了低价优先法计算，即满足招标文件要求且投标价格最低的投标报价为评标基准价，其投标报价得分为满分。其他投标人的投标报价得分统一按照下列公式计算：投标报价得分=（评标基准价/投标报价）×报价权重×100。这表明给出了低价优先法公式。

根据 |<Page 43>| 的内容，投标报价得分为10.0分，诚信记录为5.0分，合计15.0分。这表明价分比重为货物≥30%、服务≥10%。

根据 |<Page 44>| 的内容，评标结果按评审后总得分由高到低顺序排列。总得分相同的按投标报价由低到高顺序排列。这表明没有禁止剔除最高/最低价。

综上所述，针对政府采购，给出了低价优先法公式，价分比重为货物≥30%、服务≥10%，没有禁止剔除最高/最低价。


[32m2026-01-12 17:10:50.812[0m | [1mINFO    [0m | [36msrc.code.visual_reasoner.model[0m:[36m__init__[0m:[36m35[0m - [1mVisionLanguageModel 已就绪[0m


回答内容: 好的，我需要从图片中提取价格部分的计算方法，包括基准价确定方式、价格分计算公式及价格权重等。让我先看看每张图片的页码和内容，找出相关的信息。

首先，我注意到第三张图片的左上角显示的是 |<Page 36>|。让我仔细检查一下这张图片的内容，看看是否有关于价格计算的部分。

在第三张图片中，我找到了关于“投标报价得分”的描述。它写道：“投标报价得分采用低价优先法计算，即满足招标文件要求且投标价格最低的投标报价为评标基准价，其投标报价得分为满分。其他投标人的投标报价得分统一按照下列公式计算：投标报价得分=（评标基准价/投标报价）×报价权重×100。” 这里提到了评标基准价的确定方式，以及价格分的计算公式和价格权重。

让我再确认一下是否有遗漏。问题要求包括基准价确定方式、价格分计算公式及价格权重。根据第三张图片的内容，评标基准价是“满足招标文件要求且投标价格最低的投标报价”，价格分计算公式是“（评标基准价/投标报价）×报价权重×100”，并且明确提到了“报价权重”。

所以，价格部分的计算方法如下：

- **评标基准价确定方式**：满足招标文件要求且投标价格最低的投标报价为评标基准价。
- **价格分计算公式**：投标报价得分=（评标基准价/投标报价）×报价权重×100。
- **价格权重**：在公式中明确提及为“报价权重”。

这些信息都来自第三张图片，页码为 |<Page 36>|。
</think>

根据 |<Page 36>| 的内容，价格部分的计算方法如下：  
- **评标基准价确定方式**：满足招标文件要求且投标价格最低的投标报价为评标基准价。  
- **价格分计算公式**：投标报价得分=（评标基准价/投标报价）×报价权重×100。  
- **价格权重**：在公式中明确提及为“报价权重”。
