In [8]:
import os
import time
import google.generativeai as genai
from PIL import Image, ImageDraw, ImageFont
import json
import typing_extensions as typing
from google.api_core import exceptions

# ---------------------------------------------------------
# 設定
# ---------------------------------------------------------

PRIMARY_MODEL = "models/gemini-flash-latest"
FALLBACK_MODEL = "gemini-1.5-flash"

INPUT_DIR = "./test_images"
OUTPUT_DIR = "./results_final" # 最終保存先

target_images_list = [
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample1.png",
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample2.png",
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample3.jpg",
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample4.jpg",
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample5.jpg",
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample6.jpg",
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample7.jpg",
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample8.jpg",
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample9.jpg",
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample10.jpg",
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample11.jpg",
    # "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/IMG_4578 (1).jpg"
    "/Users/sakauchikanato/ハッカソン/asobitai/smart_doc_gen/services/API用いてみた/sample13.jpg"
]

# ---------------------------------------------------------

def extract_marks_final_polished(image_path, output_dir, api_key):
    file_name = os.path.basename(image_path)
    base_name = os.path.splitext(file_name)[0]
    
    genai.configure(api_key=api_key)
    
    class Box2D(typing.TypedDict):
        ymin: int
        xmin: int
        ymax: int
        xmax: int

    class MarkItem(typing.TypedDict):
        mark_type: str
        description: str
        confidence: str 
        box_2d: Box2D

    # 画像読み込み
    if not os.path.exists(image_path):
        print(f"[{file_name}] スキップ: ファイルなし")
        return

    try:
        img = Image.open(image_path)
        if img.mode != 'RGB':
            img = img.convert('RGB')
    except Exception as e:
        print(f"[{file_name}] 画像読み込みエラー: {e}")
        return

    # ★★★ 最終調整版プロンプト ★★★
    prompt = """
    Analyze this image and detect ONLY the specific handwritten grading SYMBOLS (Red/Colored Ink).
    
    Target Objects (Detect these):
    - Circle (丸)
    - Cross (バツ)
    - Triangle (三角)
    - Checkmark (チェック)

    Excluded Objects (IGNORE these):
    - **NUMBERS / SCORES**: Do NOT detect numbers like "10", "24", "100" even if written in red.
    - **TEXT**: Do NOT detect Japanese characters or corrections (e.g., correct answers written by the teacher).
    - **LINES**: Do NOT detect simple underlines unless they are part of a cross/check.

    Strict Constraints:
    1. **Symbols Only**: If it's a number (score), ignore it. If it's a word, ignore it.
    2. **Tight Bounding Boxes**: The box must enclose ONLY the symbol's ink. Do NOT include surrounding text or the answer box.
    3. **Separate Marks**: Do not group a mark with nearby text. Box only the mark.
    4. **Description**: Output description in Japanese (e.g., "問1の正解マーク").
    
    Return the result as a list of JSON objects.
    """

    print(f"--- [{file_name}] 最終解析開始 ---")

    # --- 実行ロジック ---
    models_to_try = [PRIMARY_MODEL, FALLBACK_MODEL]
    marks_data = None
    
    for model_name in models_to_try:
        model = genai.GenerativeModel(
            model_name=model_name,
            generation_config={
                "response_mime_type": "application/json", 
                "response_schema": list[MarkItem],
                "temperature": 0.0
            }
        )

        max_retries = 3
        for attempt in range(max_retries):
            try:
                response = model.generate_content([prompt, img])
                marks_data = json.loads(response.text)
                print(f"   -> 成功！ (モデル: {model_name})")
                break 
            except exceptions.NotFound:
                break
            except Exception as e:
                error_msg = str(e)
                if "limit: 0" in error_msg: break 
                if "429" in error_msg:
                    time.sleep(20)
                else:
                    break
        
        if marks_data is not None:
            break

    if marks_data is None:
        print(f"   [失敗] データ取得不可")
        return

    # --- フィルタリング処理 ---
    # AIがまだ数字を拾ってしまう場合に備えて、簡易的なフィルタをかけることも可能ですが、
    # 今回はプロンプトでの除外を優先します。
    
    print(f"   -> 検出数: {len(marks_data)} 個")

    # 可視化処理
    if marks_data:
        draw = ImageDraw.Draw(img)
        width, height = img.size
        
        # フォント設定
        font = None
        font_candidates = [
            "C:\\Windows\\Fonts\\msgothic.ttc",
            "/System/Library/Fonts/ヒラギノ角ゴシック W3.ttc", 
            "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
            "arial.ttf"
        ]
        for f_path in font_candidates:
            try:
                font = ImageFont.truetype(f_path, 20)
                break
            except:
                continue

        for item in marks_data:
            box = item['box_2d']
            label = item['mark_type']
            conf = item.get('confidence', 'Medium')
            
            # 除外対象が紛れ込んでいないか簡易チェック（念のため）
            if label.lower() in ["score", "number", "text", "other"]:
                continue

            abs_ymin = (box['ymin'] / 1000) * height
            abs_xmin = (box['xmin'] / 1000) * width
            abs_ymax = (box['ymax'] / 1000) * height
            abs_xmax = (box['xmax'] / 1000) * width

            # 枠線を「緑色」に変更して、最終版であることを区別
            outline_color = "#00FF00" # Lime Green
            
            if conf == "Low":
                outline_color = "yellow"

            # 枠の描画
            draw.rectangle([(abs_xmin, abs_ymin), (abs_xmax, abs_ymax)], outline=outline_color, width=3)
            
            if font:
                text_pos = (abs_xmin, max(0, abs_ymin - 30))
                display_text = label
                bbox = draw.textbbox(text_pos, display_text, font=font)
                draw.rectangle(bbox, fill=outline_color)
                draw.text(text_pos, display_text, fill="black", font=font)

    # 保存処理
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    out_img_path = os.path.join(output_dir, f"{base_name}_final.jpg")
    img.save(out_img_path, "JPEG", quality=95)
    
    out_json_path = os.path.join(output_dir, f"{base_name}_final.json")
    with open(out_json_path, "w", encoding="utf-8") as f:
        json.dump(marks_data, f, ensure_ascii=False, indent=2)

    print(f"   -> 保存完了: {out_img_path}")

if __name__ == '__main__':
    API_KEY = "AIzaSyCs32qOg7dnAes_j4IAHbt4AYP8erwPjGI" 

    print(f"対象枚数: {len(target_images_list)} 枚")
    print("------------------------------------------------")
        
    for filename in target_images_list:
        full_path = os.path.join(INPUT_DIR, filename)
        extract_marks_final_polished(full_path, OUTPUT_DIR, API_KEY)
        time.sleep(2) 
            
    print("\nすべての処理が完了しました。")

対象枚数: 1 枚
------------------------------------------------
--- [sample13.jpg] 最終解析開始 ---
   -> 成功！ (モデル: models/gemini-flash-latest)
   -> 検出数: 18 個
   -> 保存完了: ./results_final/sample13_final.jpg

すべての処理が完了しました。
