In [28]:
# 检测 chat_rag.py 文件
file_path = "../app_image.py"
# file_path = r"C:\Users\A\Project_InputCheck\test.py"
!flake8 {file_path} --max-line-length=240
!pylint {file_path}

../app_image.py:27:1: E402 module level import not at top of file
../app_image.py:37:1: E402 module level import not at top of file
************* Module Project_QRRag.app_image
c:\Users\A\Project_QRRag\app_image.py:27:0: E0401: Unable to import 'Module.Common.scripts.llm.utils.google_whisk' (import-error)
c:\Users\A\Project_QRRag\app_image.py:27:0: C0413: Import "from Module.Common.scripts.llm.utils.google_whisk import generate_image_base64, generate_caption, generate_image_fx, generate_story_board, DEFAULT_STYLE_PROMPT_DICT, DEFAULT_HEADERS, AspectRatio, Category" should be placed at the top of the module (wrong-import-position)
c:\Users\A\Project_QRRag\app_image.py:37:0: E0401: Unable to import 'Module.Common.scripts.common.auth_manager' (import-error)
c:\Users\A\Project_QRRag\app_image.py:37:0: C0413: Import "from Module.Common.scripts.common.auth_manager import AuthKeeper, sustain_auth" should be placed at the top of the module (wrong-import-position)
c:\Users\A\Project_QRRag\app_i

In [None]:
%%writefile ..\app.py
# pylint: disable=no-member  # Project structure requires dynamic path handling
"""
For more information on `huggingface_hub` Inference API support
please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
import os
import sys
import gradio as gr
import cv2
from pyzbar.pyzbar import decode
import numpy as np
from google import genai
from google.genai import types
from dotenv import load_dotenv

# ===== 2. 初始化配置 =====
# 获取当前文件所在目录的绝对路径
if "__file__" in globals():
    current_dir = os.path.dirname(os.path.abspath(__file__))
    root_dir = os.path.normpath(os.path.join(current_dir, ".."))
else:
    # 在 Jupyter Notebook 环境中
    current_dir = os.getcwd()
    current_dir = os.path.join(current_dir, "..")
    root_dir = os.path.normpath(os.path.join(current_dir))

current_dir = os.path.normpath(current_dir)
sys.path.append(current_dir)

with open(
    os.path.join(current_dir, "system_role_prompt.md"), "r", encoding="utf-8"
) as f:
    system_role = f.read()


BEGIN_PROMPT = """
总结一下最新的内容
"""


CONFIRM_PROMPT = """
对比一下两个版本的差异
"""

load_dotenv(dotenv_path=os.path.join(current_dir, ".env"))  # current_dir + "\.env")
api_key = os.getenv("GEMINI_API_KEY")
gemini_client = None
if api_key:
    gemini_client = genai.Client(api_key=api_key)

MODEL_NAME = "gemini-2.0-flash-exp"


# Common safety settings for all requests
def get_safety_settings():
    return [
        types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
        types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"),
        types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
        types.SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"),
        types.SafetySetting(category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF")
    ]


def format_content(role: str, text: str) -> types.Content:
    """格式化单条消息内容"""
    return types.Content(
        role=role,
        parts=[types.Part(text=text)]
    )


def respond(
    message,
    history: list[tuple[str, str]],
    use_system_message,
):
    # 构建对话历史
    def build_contents(message=None, before_message=None):
        contents = []
        for val in history:
            if val["content"] == "开始":
                context = BEGIN_PROMPT
            elif val["content"] == "确认":
                context = CONFIRM_PROMPT
            else:
                context = val["content"]
            contents.append(format_content(
                val["role"],
                context
            ))

        if before_message:
            contents.append(format_content(
                "assistant",
                before_message
            ))

        if message:
            contents.append(format_content(
                "user",
                message
            ))
        return contents
    if message == "开始" and not history:
        message = BEGIN_PROMPT

    if message == "确认" and len(history) == 2:
        message = CONFIRM_PROMPT

    if message:
        # 处理普通消息
        contents = build_contents(message)
        response = ""
        if use_system_message:
            config = types.GenerateContentConfig(
                system_instruction=system_role,
                safety_settings=get_safety_settings(),
            )
        else:
            config = types.GenerateContentConfig(
                safety_settings=get_safety_settings(),
            )
        for chunk in gemini_client.models.generate_content_stream(
            model=MODEL_NAME,
            contents=contents,
            config=config
        ):
            if chunk.text:  # Check if chunk.text is not None
                response += chunk.text
                yield response


def get_gradio_version():
    return gr.__version__


game_state = {"gr_version": get_gradio_version()}  # 示例 game_state


def process_qr_frame(frame):
    """处理视频帧，检测和解码二维码"""
    if frame is None:
        return frame, game_state

    # 转换图像格式确保兼容性
    if isinstance(frame, np.ndarray):
        img = frame.copy()
    else:
        img = np.array(frame).copy()

    # 检测二维码
    decoded_objects = decode(img)

    if decoded_objects:
        # 获取二维码数据
        qr_data = decoded_objects[0].data.decode('utf-8')

        # 解析二维码数据
        qr_info = {"设定": qr_data}

        game_state.update(qr_info)

        # 在图像上绘制识别框和状态
        points = decoded_objects[0].polygon
        if points:
            # 绘制绿色边框表示成功识别
            pts = np.array(points, np.int32)
            pts = pts.reshape((-1, 1, 2))
            cv2.polylines(img, [pts], True, (0, 255, 0), 2)

            # 添加文本显示已更新
            cv2.putText(img, "QR Code Updated!", (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    return img, game_state


def export_chat(history):
    export_string = ""
    for item in history:
        user_msg = item[0]['content']
        bot_msg = item[1]
        export_string += f"User: {user_msg}\nBot: {bot_msg}\n\n"
    return export_string


def get_chat_history(chatbot_component):
    return chatbot_component.load_history()


with gr.Blocks(theme="soft") as demo:
    if gemini_client:
        chatbot = gr.ChatInterface(
            respond,
            title="知识助理",
            type="messages",
            additional_inputs=[
                gr.Checkbox(value=False, label="Use system message"),
            ],

        )
    else:
        gr.Markdown("Gemini API key not found. Please check your .env file.")

    with gr.Accordion("查看状态", open=False):
        game_info_image = gr.Image(label="二维码设定",
                                   webcam_constraints={
                                        "video": {
                                            "facingMode": {"ideal": "environment"}
                                        }
                                    })
        output_img = gr.Image(label="识别结果")
        game_state_output = gr.JSON(value=game_state)  # 初始显示 game_state

        # 设置实时流处理
        game_info_image.upload(
            fn=process_qr_frame,
            inputs=[game_info_image],
            outputs=[output_img, game_state_output],
            show_progress=False,
        )
        # 设置实时流处理
        game_info_image.stream(
            fn=process_qr_frame,
            inputs=[game_info_image],
            outputs=[output_img, game_state_output],
            show_progress=False,
            stream_every=0.5  # 每0.5秒处理一次
        )

if __name__ == "__main__":
    cert_file = os.path.join(current_dir, "localhost+1.pem")
    key_file = os.path.join(current_dir, "localhost+1-key.pem")

    if os.path.exists(cert_file) and os.path.exists(key_file):
        demo.launch(
            server_name="0.0.0.0",
            ssl_certfile=cert_file,
            ssl_keyfile=key_file
        )
    else:
        demo.launch(server_name="0.0.0.0")


In [30]:
%%writefile ..\app_image.py
# pylint: disable=no-member  # Project structure requires dynamic path handling
"""
whisk逆向图片生成
"""
import os
import sys
import hashlib
import json
from datetime import datetime
from typing import Optional, List, Dict
from PIL import Image
import gradio as gr
# ===== 2. 初始化配置 =====
# 获取当前文件所在目录的绝对路径
if "__file__" in globals():
    current_dir = os.path.dirname(os.path.abspath(__file__))
    root_dir = os.path.normpath(os.path.join(current_dir, ".."))
else:
    # 在 Jupyter Notebook 环境中
    current_dir = os.getcwd()
    current_dir = os.path.join(current_dir, "..")
    root_dir = os.path.normpath(os.path.join(current_dir))

current_dir = os.path.normpath(current_dir)
sys.path.append(current_dir)

from Module.Common.scripts.llm.utils.google_whisk import (
    generate_image_base64,
    generate_caption,
    generate_image_fx,
    generate_story_board,
    DEFAULT_STYLE_PROMPT_DICT,
    DEFAULT_HEADERS,
    AspectRatio,
    Category
)
from Module.Common.scripts.common.auth_manager import (
    AuthKeeper,
    sustain_auth
)

# 缓存文件路径
CACHE_DIR = "cache"
CAPTION_CACHE_FILE = os.path.join(current_dir, CACHE_DIR, "image_caption_cache.json")
STORY_CACHE_FILE = os.path.join(current_dir, CACHE_DIR, "story_prompt_cache.json")

IMAGE_CACHE_DIR = os.path.join(current_dir, CACHE_DIR, "image")
# 创建缓存目录
os.makedirs(os.path.join(current_dir, CACHE_DIR), exist_ok=True)
os.makedirs(IMAGE_CACHE_DIR, exist_ok=True)

# 全局变量初始化
image_caption_cache = {}
story_prompt_cache = {}


# 加载缓存
def load_cache():
    """从文件加载缓存"""
    global image_caption_cache, story_prompt_cache

    if os.path.exists(CAPTION_CACHE_FILE):
        with open(CAPTION_CACHE_FILE, 'r', encoding='utf-8') as cache_file:
            image_caption_cache = json.load(cache_file)
    else:
        image_caption_cache = {}

    if os.path.exists(STORY_CACHE_FILE):
        with open(STORY_CACHE_FILE, 'r', encoding='utf-8') as cache_file:
            story_prompt_cache = json.load(cache_file)
    else:
        story_prompt_cache = {}


# 保存缓存
def save_cache():
    """保存缓存到文件"""
    with open(CAPTION_CACHE_FILE, 'w', encoding='utf-8') as cache_file:
        json.dump(image_caption_cache, cache_file, ensure_ascii=False, indent=2)
    with open(STORY_CACHE_FILE, 'w', encoding='utf-8') as cache_file:
        json.dump(story_prompt_cache, cache_file, ensure_ascii=False, indent=2)


# 初始加载缓存
load_cache()


def get_cached_caption(image_base64: str):
    """获取缓存的图片描述"""
    if not image_base64:
        return None

    # 直接使用base64字符串计算哈希值
    hash_key = hashlib.md5(image_base64.encode()).hexdigest()
    return image_caption_cache.get(hash_key)


def cache_caption(image_base64: str, caption: str):
    """缓存图片描述"""
    if not image_base64 or not caption:
        return

    hash_key = hashlib.md5(image_base64.encode()).hexdigest()
    image_caption_cache[hash_key] = caption
    save_cache()


def get_cached_story_prompt(caption: str, style_key: str, additional_text: str):
    """获取缓存的故事提示词"""
    if not caption:
        return None

    # 使用所有输入参数组合生成缓存键
    cache_key = hashlib.md5(f"{caption}_{style_key}_{additional_text}".encode()).hexdigest()
    return story_prompt_cache.get(cache_key)


def cache_story_prompt(caption: str, style_key: str, additional_text: str, prompt: str):
    """缓存故事提示词"""
    if not caption or not prompt:
        return

    cache_key = hashlib.md5(f"{caption}_{style_key}_{additional_text}".encode()).hexdigest()
    story_prompt_cache[cache_key] = prompt
    save_cache()


gradio_auth_keeper = AuthKeeper(
    config_path=os.path.join(current_dir, "auth_config.json"),
    default_headers=DEFAULT_HEADERS
)

gradio_sustain_auth_cookies = sustain_auth(gradio_auth_keeper, 'cookies')
gradio_sustain_auth_token = sustain_auth(gradio_auth_keeper, 'auth_token')


@gradio_sustain_auth_cookies
def gradio_generate_caption(
    image_base64: str,
    category: Category = Category.CHARACTER,
    cookies: Optional[str] = None,  # 显式声明装饰器将注入的参数
) -> Dict:
    """生成图片描述包装函数"""
    return generate_caption(image_base64, category, cookies)


@gradio_sustain_auth_cookies
def gradio_generate_story_board(
    characters: Optional[List[str]] = None,
    style_prompt: Optional[str] = None,
    location_prompt: Optional[str] = None,
    pose_prompt: Optional[str] = None,
    additional_input: str = "",
    cookies: Optional[str] = None,  # 显式声明装饰器将注入的参数
) -> Dict:
    """生成故事提示词包装函数"""
    return generate_story_board(
        characters,
        style_prompt,
        location_prompt,
        pose_prompt,
        additional_input,
        cookies
    )


@gradio_sustain_auth_token
def gradio_generate_image_fx(
    prompt: str,
    seed: Optional[int] = None,
    aspect_ratio: AspectRatio = AspectRatio.LANDSCAPE,
    output_prefix: str = "generated_image",
    image_number: int = 4,
    auth_token: Optional[str] = None,  # 显式声明装饰器将注入的参数
) -> Dict:
    """生成图片包装函数"""
    return generate_image_fx(
        prompt,
        seed,
        aspect_ratio,
        output_prefix,
        image_number,
        auth_token
    )


# 添加新的测试函数
def output_type_wrapper(image_path1, image_path2):
    """测试不同返回类型的包装函数"""

    def load_as_pil(path):
        if path is None:
            return None
        return Image.open(path)

    # 测试不同返回类型（按需切换注释）
    return (
        load_as_pil(image_path1),   # 返回PIL.Image类型
        load_as_pil(image_path2)  # 返回numpy数组类型
    )


# 在 demo 定义之前添加函数
def generate_images(
    image_input1: str,
    image_input2: str,
    style_key: str,
    additional_text: str
) -> tuple[str, str]:
    """处理图片生成请求"""
    try:
        # 1. 基础检查
        if image_input1 is None and image_input2 is None:
            return None, None

        # 2. 预检查所有缓存状态
        image_data = []
        all_caption_cached = True

        # 检查图片描述缓存
        for image_input in [image_input1, image_input2]:
            if image_input is not None:
                base64_str = generate_image_base64(image_input)
                cached_caption = get_cached_caption(base64_str)
                image_data.append((base64_str, cached_caption))
                if cached_caption is None:
                    all_caption_cached = False

        # 检查story prompt缓存
        if all_caption_cached:
            caption_text = "|".join(
                caption for _, caption in image_data
            )
            story_prompt_cached = get_cached_story_prompt(
                caption_text,
                style_key,
                additional_text
            ) is not None if caption_text else False
        else:
            story_prompt_cached = False

        # 打印缓存状态
        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        print(
            f"[{current_time}] 生成图片 | "
            f"素材提示词缓存: {'启用' if all_caption_cached else '未启用'} | "
            f"最终提示词缓存: {'启用' if story_prompt_cached else '未启用'}"
        )

        # 3. 处理图片描述
        captions = []
        for base64_str, cached_caption in image_data:
            if cached_caption:
                captions.append(cached_caption)
            else:
                new_caption = gradio_generate_caption(base64_str)
                if new_caption:
                    cache_caption(base64_str, new_caption)
                    captions.append(new_caption)

        if not captions:
            return None, None

        if not story_prompt_cached:
            caption_text = "|".join(captions)

        # 4. 获取故事提示词
        final_prompt = get_cached_story_prompt(caption_text, style_key, additional_text)
        if final_prompt is None:
            final_prompt = gradio_generate_story_board(
                characters=captions,
                style_prompt=DEFAULT_STYLE_PROMPT_DICT.get(style_key, ""),
                additional_input=additional_text
            )
            if final_prompt:
                cache_story_prompt(caption_text, style_key, additional_text, final_prompt)

        # 5. 生成图片
        if not final_prompt:
            return None, None

        print(f"最终Prompt: \n{final_prompt}")

        current_date = datetime.now().strftime("%Y%m%d")
        prefix = f'generated_image_{current_date}'
        existing_files = [
            f for f in os.listdir(IMAGE_CACHE_DIR)
            if f.startswith(prefix)
        ]
        file_count = len(existing_files)

        image_files = gradio_generate_image_fx(
            prompt=final_prompt,
            output_prefix=os.path.join(
                IMAGE_CACHE_DIR,
                f"generated_image_{datetime.now().strftime('%Y%m%d')}_take{file_count + 1}"
            ),
            image_number=2
        )

        return output_type_wrapper(
            image_files[0] if image_files else None,
            image_files[1] if len(image_files) > 1 else None
        )

    except Exception as e:
        print(f"Error generating images: {e}")
        return None, None


# 在 demo 定义中添加新的界面
with gr.Blocks(theme="soft") as demo:
    with gr.Row():
        # 左侧输入区域
        with gr.Column(scale=1):
            with gr.Row():
                input_image1 = gr.Image(
                    label="上传图片1",
                    type="filepath",
                    height=300
                )
                input_image2 = gr.Image(
                    label="上传图片2（可选）",
                    type="filepath",
                    height=300
                )
            style_dropdown = gr.Dropdown(
                choices=list(DEFAULT_STYLE_PROMPT_DICT.keys()),
                value=list(DEFAULT_STYLE_PROMPT_DICT.keys())[0],
                label="选择风格"
            )
            additional_text_ui = gr.Textbox(
                label="补充提示词",
                placeholder="请输入额外的提示词...",
                lines=3
            )
            generate_btn = gr.Button("生成图片")

        # 右侧输出区域
        with gr.Column(scale=2):
            output_image1 = gr.Image(label="生成结果 1")
            output_image2 = gr.Image(label="生成结果 2")

    generate_btn.click(
        fn=generate_images,
        inputs=[input_image1, input_image2, style_dropdown, additional_text_ui],
        outputs=[output_image1, output_image2]
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=80,
        ssl_verify=False,
        share=True,
        allowed_paths=[IMAGE_CACHE_DIR]
    )


Overwriting ..\app_image.py


In [27]:
valid_json_str = '{"prompt": "A whimsical close-up shot of a bento box. Inside, a miniature young woman, crafted from colored rice and vegetables, with a single sesame seed for an eye, is shown saluting.  Her kimono, rendered in shades of pastel red with a white floral pattern, has long, flowing sleeves.  A tiny pink heart-shaped sign with miniature Japanese characters is in front of her.  The woman\'s dark hair is neatly pulled back, adorned with a small pink flower. Her expression is pleasant. The bento box sits on a table, the surrounding environment outside the box is not visible. The overall style is kawaii, with soft pastel colors and delicate details. The lighting is soft and diffused.  All elements are miniature and made of edible foods, kept entirely within the bento box"}'
import json
# 可以成功解析
parsed = json.loads(valid_json_str)
json.dumps(parsed)
# print(parsed["prompt"][:30])  # 输出前30个字符验证

'{"prompt": "A whimsical close-up shot of a bento box. Inside, a miniature young woman, crafted from colored rice and vegetables, with a single sesame seed for an eye, is shown saluting.  Her kimono, rendered in shades of pastel red with a white floral pattern, has long, flowing sleeves.  A tiny pink heart-shaped sign with miniature Japanese characters is in front of her.  The woman\'s dark hair is neatly pulled back, adorned with a small pink flower. Her expression is pleasant. The bento box sits on a table, the surrounding environment outside the box is not visible. The overall style is kawaii, with soft pastel colors and delicate details. The lighting is soft and diffused.  All elements are miniature and made of edible foods, kept entirely within the bento box"}'

In [None]:
# 在这里，您可以通过 ‘args’  获取节点中的输入变量，并通过 'ret' 输出结果
# 'args' 和 'ret' 已经被正确地注入到环境中
# 下面是一个示例，首先获取节点的全部输入参数params，其次获取其中参数名为‘input’的值：
# params = args.params;
# input = params.input;
# 下面是一个示例，输出一个包含多种数据类型的 'ret' 对象：
# ret: Output =  { "name": ‘小明’, "hobbies": [“看书”, “旅游”] };
import requests_async as requests

# 公共配置
DEFAULT_HEADERS = {
    'accept': '*/*',
    'accept-encoding': 'gzip, deflate, br, zstd',
    'accept-language': 'zh-CN,zh;q=0.9,ja;q=0.8,zh-TW;q=0.7,en;q=0.6',
    'cache-control': 'no-cache',
    'content-type': 'text/plain;charset=UTF-8',
    'origin': 'https://labs.google',
    'pragma': 'no-cache',
    'priority': 'u=1, i',
    'referer': 'https://labs.google/',
    'sec-ch-ua': '"Not A(Brand";v="8", "Chromium";v="132", "Google Chrome";v="132"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'cross-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
    'x-client-data': 'CKu1yQEIlLbJAQiitskBCKmdygEIuoXLAQiVocsBCIqjywEIhaDNAQjU284BCJDfzgEYj87NAQ=='
}

DEFAULT_STYLE_PROMPT_DICT = {
    "贺卡": """
A rectangular shaped Valentine's Day card depicting a layered papercut diorama of a dreamy, romantic scene at sunset with a cute rim embellished with ribbons.
Scene and subject should be made out of paper, ribbons, and stickers. No other materials allowed. Incorporate ALL SUBJECTS and ALL DETAILS FROM THE SCENE.

The color palette is soft and romantic, with pastel pinks, purples, and oranges dominating the sky.
The figures are small and cute, and the overall style is whimsical and charming.
The card is heavily embellished with glitter, adding to the romantic and festive atmosphere.

The final output should look like a card that was handcrafted with love and include a cute Valentine's day message.
""",
    "巧克力": """
A Valentine's day box of chocolates is shown in a close-up shot.
The chocolates are arranged in a dark brown plastic tray with individual compartments.
The chocolates are various shapes and sizes, and are decorated with different colors and patterns.
The lighting is soft and diffused, and the overall tone of the image is warm and inviting.
The image has a slightly desaturated look, with muted colors and a slightly grainy texture.

Exquisitely sculpt all characters as a cute, small, round, chocolate nestled into the box.
If provided a scene or environment, the chocolates are painted and made into creative shapes to depict elements of the scene in detail SEPARATE from the characters.
The characters are positioned within the chocolate box, surrounded by other beautiful chocolates, creating a visually appealing and balanced composition.
The final product should show ONLY a close up of the exquisite and colorful box of decorated chocolates.
""",
    "便当": """
A close-up shot of a super cute bento box scene featuring miniature, real, colorful Japanese foods and veggies.
Characters have single sesame seeds for eyes and are crafted out of different colored rice and veggies to be tiny, kawaii pieces nestled INTO the box.
The characters are positioned centrally within the bento box, surrounded by other miniature food items, creating a visually appealing and balanced composition.

Elements must be mini and created by edible foods only and always kept INSIDE the bento box.
If provided, incorporate a scene/environment OUTSIDE of the box. Make it such that the box sits on a table in that location.
The overall style is whimsical and charming, with a focus on soft, pastel colors and delicate details. The lighting is soft and diffused, enhancing the delicate textures and colors.

The final product should show ONLY a close up of the exquisite bento box sitting on a surface of a given location.
""",
}


def generate_imagen3_character_prompt(
    characters_descriptions=None,
    location=None,
    style_description=None,
    pose_description=None,
    user_instructions=None
) -> str:
    prompt_parts = []

    # 添加基础开头
    base_start = """你是一个提示词大师，请根据下面的信息制作一个提供给图片生成模型的提示词。
Instructions:
1. You will be creating a prompt for a text-to-image model that will be placing characters/subjects into a location background.
2. You will make sure to concisely describe each character/subject and what they are doing and will not lose track of the characters'/subjects' visual details.
3. You will make sure the background is the one provided by the location prompt."""
    prompt_parts.append(base_start)

    # 处理角色描述
    if characters_descriptions and len(characters_descriptions) > 0:
        character_template = """
There are {num_characters} characters/subjects in the scene. Make sure to ignore the locations from these character/subject prompts and only extract the character/subject:
{character_prompts}"""

        character_prompts = []
        for i, desc in enumerate(characters_descriptions, 1):
            character_prompts.append(f"Character/Subject {i} description: '{desc}'")
        formatted_characters = "\n".join(character_prompts)

        prompt_parts.append(character_template.format(
            num_characters=len(characters_descriptions),
            character_prompts=formatted_characters
        ))

    # 处理位置
    if location:
        location_template = """
This is the location used for the background (Ignore the locations in characters'/subjects' prompt above!)
It is critical to use this exact location: '{location}'"""
        prompt_parts.append(location_template.format(location=location))

    # 处理风格
    if style_description:
        style_template = """
This is the aesthetic/visual/artistic style to use (ignore any other style or visual aesthetic mentioned above);
ONLY extract the aesthetic/visual/artistic styles from this prompt and nothing about the subjects/location/objects/etc. within it:
'{style}'

Make sure to rewrite the prompt to conform the character/subject/location/etc. to these colors and visual styles and produce a strong style transfer.
Emphasize the updated styles early in the prompt and if applicable, include them as parts of the character/subject descriptions.
It's critical to use this exact style unless the user says otherwise.
When describing the scene and colors, it's critical that they conform to this style."""
        prompt_parts.append(style_template.format(style=style_description))

    # 处理姿势
    if pose_description:
        pose_template = """
This is the pose to use (ignore any other pose mentioned above);
ONLY extract the pose from this prompt and nothing about the subjects/location/style/etc. within it:
'{pose}'

Make sure to rewrite the prompt to conform to these poses and produce a pose transfer in a way that makes sense for the user's chosen subject.
Describe the pose of the character/subject in the image in detail so someone could draw it without the original reference.
Do not detail any of the actual objects or subjects in the scene. Purely describe the pose and action.
Emphasize the updated poses early in the prompt, but do so only concisely.
Make sure to overwrite any other poses mentioned above unless the user says otherwise."""
        prompt_parts.append(pose_template.format(pose=pose_description))

    # 处理用户指令
    if user_instructions:
        instruction_template = """
User Instructions: '{instructions}'"""
        prompt_parts.append(instruction_template.format(instructions=user_instructions))

    # 添加通用的结尾部分
    ending_template = """
Describe this picture in great detail, thinking of all the details someone would need to recreate that picture without seeing it.
Make sure to not exceed more than 200 words in this prompt.
Do not include details that do not conform to the user's specified characters/subjects, location, style, or pose.

Don't generate images, just write text. Do not prompt the user for more information. Only return the new prompt."""
    prompt_parts.append(ending_template)

    # 组合所有部分
    final_prompt = "\n".join(prompt_parts)
    return final_prompt


async def _send_request(
    url: str,
    payload,
    auth_token=None,
    cookies=None
):
    """统一处理HTTP请求（异步版本）"""
    headers = DEFAULT_HEADERS.copy()
    if auth_token:
        headers['authorization'] = auth_token
    if cookies:
        headers['Cookie'] = cookies
    try:
        response = await requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.HTTPError as e:
        return {"error": f"HTTP error: {str(e)}"}
    except Exception as e:
        return {"error": f"Request failed: {str(e)}"}


async def generate_image_fx(
    prompt: str,
    seed=None,
    aspect_ratio="IMAGE_ASPECT_RATIO_LANDSCAPE",
    image_number: int = 2,
    auth_token=None,
):
    """生成图像API封装（返回base64数据）"""
    payload = {
        'userInput': {
            'candidatesCount': image_number,
            'prompts': [prompt],
            **({'seed': seed} if seed is not None else {})
        },
        'clientContext': {'tool': 'BACKBONE'},
        'modelInput': {'modelNameType': 'IMAGEN_3_1'},
        'aspectRatio': aspect_ratio
    }

    response = await _send_request(
        'https://aisandbox-pa.googleapis.com/v1:runImageFx',
        payload,
        auth_token=auth_token
    )

    if 'error' in response:
        return response

    output = {
        "images": [],
        "metadata": {
            "prompt": prompt,
            "seed": seed,
            "aspect_ratio": aspect_ratio
        }
    }

    if 'imagePanels' in response:
        for panel in response['imagePanels']:
            for img_data in panel['generatedImages']:
                try:
                    output['images'].append({
                        "base64": img_data['encodedImage'],
                        "seed": img_data.get('seed'),
                        "format": "png"
                    })
                except KeyError as e:
                    print(f"Invalid image data format: {e}")
    return output


async def main(args: Args) -> Output:
    "提示词生成"
    # 解析输入参数
    style_key = args.params['style']
    character_list = []
    image1 = args.params['image1']
    image2 = args.params['image2']
    character_list = [image1, image2]
    style_info = DEFAULT_STYLE_PROMPT_DICT.get(style_key, DEFAULT_STYLE_PROMPT_DICT['贺卡'])
    instruction = args.params['extra']
    prompt = generate_imagen3_character_prompt(
        characters_descriptions=character_list,
        style_description=style_info,
        user_instructions=instruction
    )
    params = {"prompt": prompt}

    # 构建标准化输出
    return params


Temporary URL: https://tmpfiles.org/dl/20967136/image1.png
Coze API Status Code: 200
Coze API Response: {'code': 0, 'cost': '0', 'data': '{"data":"https://s.coze.cn/t/PAY1XI4j2JQ/","msg":"success"}', 'debug_url': 'https://www.coze.cn/work_flow?execute_id=7473074535089799202&space_id=7374905002604806179&workflow_id=7473020021918203940', 'msg': 'Success', 'token': 757}


In [6]:

# 测试上传到tmpfiles
image_path = r"E:\Download\image (1).png"  # 替换为实际图片路径
temp_url = upload_to_tmpfiles(image_path)
print(f"Temporary URL: {temp_url}")

Temporary URL: https://tmpfiles.org/dl/20967028/image1.png
