# 02-视觉理解

kimi-k2.5 视觉理解，参考：https://platform.moonshot.cn/docs/guide/kimi-k2-5-quickstart

**重要限制**：
- 仅支持 Base64 编码，不支持 URL
- 图片：png、jpeg、webp、gif
- 视频：mp4、mpeg、mov、avi、x-flv、mpg、webm、wmv、3gpp

In [None]:
from openai import OpenAI
import base64
import os

client = OpenAI(
    api_key=os.getenv("MOONSHOT_API_KEY", "your-api-key"),
    base_url="https://api.moonshot.cn/v1",
)

## 单张图片理解（来自官方文档）

In [None]:
# 官方示例：读取图片并 Base64 编码
image_path = "kimi.png"  # 替换为你的图片路径

with open(image_path, "rb") as f:
    image_data = f.read()

# Base64 编码
image_url = f"data:image/{image_path.split('.')[-1]};base64,{base64.b64encode(image_data).decode('utf-8')}"

# 调用视觉模型
completion = client.chat.completions.create(
    model="kimi-k2.5",
    messages=[
        {"role": "system", "content": "你是 Kimi。"},
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {"url": image_url}
                },
                {
                    "type": "text",
                    "text": "请描述图片的内容。"
                }
            ]
        }
    ]
)

print(completion.choices[0].message.content)

## 从 URL 加载图片

In [None]:
import requests

def encode_image_from_url(url):
    """从 URL 加载图片并 Base64 编码"""
    response = requests.get(url)
    if response.status_code == 200:
        return base64.b64encode(response.content).decode('utf-8')
    return None

# 示例（替换为实际 URL）
# image_url = "https://example.com/image.png"
# base64_data = encode_image_from_url(image_url)
# image_content = f"data:image/png;base64,{base64_data}"

## 多张图片

In [None]:
# 多张图片对比
image_paths = ["image1.png", "image2.png"]  # 替换为实际路径

content = []
for path in image_paths:
    with open(path, "rb") as f:
        data = base64.b64encode(f.read()).decode('utf-8')
        ext = path.split('.')[-1]
        content.append({
            "type": "image_url",
            "image_url": {"url": f"data:image/{ext};base64,{data}"}
        })

content.append({"type": "text", "text": "对比这两张图片的异同"})

# 调用
# response = client.chat.completions.create(
#     model="kimi-k2.5",
#     messages=[{"role": "user", "content": content}]
# )

## 视觉 + 思考模式

In [None]:
# kimi-k2.5 默认启用思考模式
# 分析图片时会返回 reasoning_content

# with open("chart.png", "rb") as f:
#     image_data = base64.b64encode(f.read()).decode('utf-8')

# response = client.chat.completions.create(
#     model="kimi-k2.5",
#     messages=[{
#         "role": "user",
#         "content": [
#             {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_data}"}},
#             {"type": "text", "text": "详细分析这个图表的数据趋势"}
#         ]
#     }]
# )

# message = response.choices[0].message
# reasoning = getattr(message, "reasoning_content", None)
# if reasoning:
#     print(f"思考过程: {reasoning}")
# print(f"分析结果: {message.content}")