In [2]:
import json
import pandas as pd
from collections import defaultdict

def convert_to_task2(pred_json_path, output_csv_path, score_thresh=0.7, x_thresh=10):
    with open(pred_json_path) as f:
        predictions = json.load(f)

    # 將 pred 根據 image_id 分組
    pred_by_image = defaultdict(list)
    for pred in predictions:
        if pred['score'] >= score_thresh:
            pred_by_image[pred['image_id']].append(pred)

    results = []

    all_image_ids = set([p['image_id'] for p in predictions])
    
    for image_id in sorted(all_image_ids):
        preds = pred_by_image.get(image_id, [])
        
        if not preds:
            results.append({"image_id": image_id, "pred_label": -1})
            continue

        # 排序邏輯：先比 x，x 差異小再比 y
        def sort_key(p):
            x, y, _, _ = p['bbox']
            return (round(x // x_thresh), y)
        
        sorted_preds = sorted(preds, key=sort_key)

        # 注意：category_id 1 對應到實際數字 0，因此要 -1
        digit_str = ''.join([str(p['category_id'] - 1) for p in sorted_preds])
        results.append({"image_id": image_id, "pred_label": digit_str})

    # 存成 CSV
    df = pd.DataFrame(results)
    df.to_csv(output_csv_path, index=False)
    print(f"✅ Task 2 prediction saved to: {output_csv_path}")

In [3]:
convert_to_task2("/kaggle/input/task1-pred/pred.json", "pred.csv", score_thresh=0.7, x_thresh=10)

✅ Task 2 prediction saved to: pred.csv
