In [10]:
import pandas as pd
from google import genai
from PIL import Image
import io
import numpy as np
import time
import matplotlib.pyplot as plt
from IPython.display import display
from holoviews.operation import threshold
from menuinst.platforms.win_utils.knownfolders import folder_path


def load_image(image_path):
    with open(image_path, "rb") as img_file:
        image = Image.open(io.BytesIO(img_file.read()))
        if image_path.endswith(".tif"):
            # 转换为 numpy 数组
            img_array = np.array(image)
            
            # 归一化到 0~255（防止丢失图像信息）
            img_8bit = (255 * (img_array / img_array.max())).astype(np.uint8)
            
            # 转换为 Image 对象
            image = Image.fromarray(img_8bit)
        image=image.resize((1024,1024))
        return image

def identify_motif(image_path, name, description, max_retries=10):
    image = load_image(image_path)
    question=f"""
This is a silk image named {name}. There are also some description about this silk: {description}. Please identify all the motifs in this Chinese silk image and mark their locations with bounding boxes using the coordinates of the top-left and bottom-right corners. Bounding boxes should aim to fully enclose the motif. Please return a list includes the bounding boxes and corresponding labels.
"""
    # display(image)
    
    client = genai.Client(api_key="AIzaSyCrk1SnBIu-h59CEpubCi0h9Q86nnwVfmo")
    
    retries = 0
    wait_time = 3 # 初始等待时间（秒）
    while retries < max_retries:
       try:
            response = client.models.generate_content(
                model="gemini-2.0-flash",
                contents=[question, image],
            )
            return response.text
       except Exception as e:
            error_msg = str(e)
            if "429" in error_msg or "rate limit" in error_msg.lower():
                print(f"请求受限，等待 {wait_time} 秒后重试...（第 {retries + 1} 次）")
                time.sleep(wait_time)
                retries += 1
            else:
                print(f"请求失败: {e}")
                break

    print("达到最大重试次数，未能完成请求。")
    return None

In [4]:
import re
import json
import matplotlib.patches as patches

# Draw the bounding boxes on the image
def identify_motifs(image_path,results):
    #obtain list
    image = load_image(image_path)
    image = image.resize((1024, 1024))
    match = re.search(r'```json\s*(\[.*?\])\s*```', results, re.DOTALL)
    if match:
        json_str = match.group(1)
        detections = json.loads(json_str)  # 变成 Python list of dicts
    
        # 显示图像
        # fig, ax = plt.subplots()
        # ax.imshow(image)
        
        # 遍历每个 detection，画框并加标签
        # for det in detections:
        #     box = det["box_2d"]
        #     label = det["label"]
        #     y1, x1, y2, x2 = box
        #     width, height = x2 - x1, y2 - y1
        # 
        #     # 画框
        #     rect = patches.Rectangle((x1, y1), width, height, linewidth=2, edgecolor='red', facecolor='none')
        #     ax.add_patch(rect)
        # 
        #     # 加标签（显示在框上方）
        #     ax.text(x1, y1 - 5, label, fontsize=10, color='white',
        #             bbox=dict(facecolor='red', alpha=0.5, edgecolor='none', pad=1))
        # 
        # plt.axis('off')
        # plt.show()
        return detections
    else:
        print("No JSON found.")
        return None



In [6]:
#Motif 分成 6类
def classify_motif(labels, max_retries=5):
    question=f"""
Here are  a list of the motif’s names {labels}. Please classify them into 6 categories:
[0: "animals",
1: "plants",
2: "inanimate objects",
3: "geometric patterns",
4: "textual motifs",
5: "human motifs",
]
Please return a list only contains the number.
"""
    # display(image)
    retries = 0
    wait_time = 3 # 初始等待时间（秒）
    
    client = genai.Client(api_key="AIzaSyCrk1SnBIu-h59CEpubCi0h9Q86nnwVfmo")
    while retries < max_retries:
       try:
            response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=question,
    )
            return response.text
       except Exception as e:
            error_msg = str(e)
            if "429" in error_msg or "rate limit" in error_msg.lower():
                print(f"请求受限，等待 {wait_time} 秒后重试...（第 {retries + 1} 次）")
                time.sleep(wait_time)
                retries += 1
            else:
                print(f"请求失败: {e}")
                break

    print("达到最大重试次数，未能完成请求。")
    return None

    


In [11]:
import os

#存放所有图片的路径
folder_path="Data/Book_Dataset/Book_Dataset/Ming/Image"
#输出的dataframe
df = pd.DataFrame(columns=["ID", "boxes","labels"])
#Description所在的csv
metadata=pd.read_csv("Data/SilkPatternCollection.csv",encoding="utf-8-sig")
results=pd.read_csv("output.csv",encoding="utf-8-sig")
processed_ids = set(results["ID"])
for image_path in os.listdir(folder_path):
    #根据对应的名字提取description
    image_name = os.path.splitext(image_path)[0]
    if image_name in processed_ids:
        print(f"跳过已处理：{image_name}")
        continue
    print(image_name)
    imagepath=os.path.join(folder_path,image_path)
    meta_data=metadata[metadata['ID'].values[:]==image_name]
    results = identify_motif(imagepath, meta_data['Name_zh'].values, meta_data['Description'].values)
    #提取bound boxes
    boxes=identify_motifs(imagepath,results)
    
    if boxes is not None:
        labels=pd.DataFrame(boxes)
        labels=labels["label"]
        #motif分类
        res2=classify_motif(labels)
        
        
        newrow=pd.DataFrame([{"ID":image_name,"boxes":boxes,"labels":res2}])
        df = pd.concat([df, newrow], ignore_index=True)
    else:
        newrow=pd.DataFrame([{"ID":image_name,"boxes":boxes,"labels":None}])
        df = pd.concat([df, newrow], ignore_index=True)
    newrow.to_csv("output.csv", mode='a', index=False, header=not os.path.exists("output.csv"), encoding="utf-8-sig")

            
    

跳过已处理：Ming001
跳过已处理：Ming002
跳过已处理：Ming003
跳过已处理：Ming005
跳过已处理：Ming006
跳过已处理：Ming007
跳过已处理：Ming008
跳过已处理：Ming009
跳过已处理：Ming010
跳过已处理：Ming011
跳过已处理：Ming012
跳过已处理：Ming013
跳过已处理：Ming014
跳过已处理：Ming015
Ming016
Ming017
Ming018


PermissionError: [Errno 13] Permission denied: 'output.csv'

In [9]:
print(results)

Given the nature of the image and the description, the dominant motif is the "缠枝莲" (entwined lotus) pattern, which is spread all over the silk fabric. While pinpointing individual lotus patterns is very difficult and impractical due to the density and the resolution, I will return one bounding box enclosing a sample area showing the pattern.

```json
[
  {"box_2d": [312, 377, 621, 646]},
]
```
