In [None]:
import os
import json
import base64
import pandas as pd
from tqdm import tqdm
from openai import OpenAI
from PIL import Image
from concurrent.futures import ThreadPoolExecutor, as_completed

# ===== 参数配置 =====
api_key = "sk-1d0c4ad63cca426ea45c2b1027425673"
client = OpenAI(api_key=api_key, base_url="https://dashscope.aliyuncs.com/compatible-mode/v1")
model = "qwen-vl-plus-latest"

json_path = "/root/autodl-tmp/data/RSCC-reason-order/dataset_create/remote_sensing_1k_train_dataset.json"
output_tsv = "/root/autodl-tmp/data/RSCC-reason-order/dataset_create/individual_image_descriptions.tsv"

prompt = (
    "You are given a remote sensing image. Please provide a detailed and precise description of the scene. "
    "Include information about visible features such as buildings, roads, vegetation, water bodies, and any other land cover types. "
    "Describe their relative positions using spatial references such as 'in the north', 'on the southwest side', or 'towards the center'. "
    "Mention the functional characteristics if possible, like 'residential area', 'industrial zone', or 'agricultural field'. "
    "Be concise, but include as much relevant spatial and structural detail as possible."
    "Description Only!! No other explainations or inference!"
)
print(prompt)

max_workers = 12  # 根据机器性能设置（建议 8~16 之间）
# ====================

def encode_image(path):
    with open(path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

def describe_image(img_path):
    try:
        img_base64 = encode_image(img_path)
        response = client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}"}}
                    ]
                }
            ]
        )
        description = response.choices[0].message.content.strip()
    except Exception as e:
        description = f"ERROR: {e}"
    return {"image_path": img_path, "description": description}

# 加载图像列表
with open(json_path, "r") as f:
    dataset = json.load(f)
image_list = [img for item in dataset for img in item["images"]]

# 多线程推理
results = []
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = {executor.submit(describe_image, img_path): img_path for img_path in image_list}
    for future in tqdm(as_completed(futures), total=len(futures), desc="Generating descriptions"):
        results.append(future.result())

You are given a remote sensing image. Please provide a detailed and precise description of the scene. Include information about visible features such as buildings, roads, vegetation, water bodies, and any other land cover types. Describe their relative positions using spatial references such as 'in the north', 'on the southwest side', or 'towards the center'. Mention the functional characteristics if possible, like 'residential area', 'industrial zone', or 'agricultural field'. Be concise, but include as much relevant spatial and structural detail as possible.Description Only!! No other explainations or inference!


Generating descriptions:   0%|          | 0/7585 [00:01<?, ?it/s]


In [4]:
# 保存为 TSV 文件
df = pd.DataFrame(results)
df.to_csv(output_tsv, sep="\t", index=False)
print(f"✅ 完成！已保存至：{output_tsv}")

✅ 完成！已保存至：/root/autodl-tmp/data/RSCC-reason-order/dataset_create/individual_image_descriptions.tsv
