### Evaluate accuracy on production data



In [None]:
import os
import json
import re
import boto3
import requests
from urllib.parse import urlparse
from collections import defaultdict, Counter
# ----------------------------
# Load Environment Variables
# ----------------------------
LABEL_STUDIO_URL = os.environ['LABEL_STUDIO_URL']
LABEL_STUDIO_TOKEN = os.environ['LABEL_STUDIO_USER_TOKEN']
PROJECT_ID = 3
MINIO_URL = os.environ['MINIO_URL']
MINIO_ACCESS_KEY = os.environ['MINIO_USER']
MINIO_SECRET_KEY = os.environ['MINIO_PASSWORD']
BUCKET_NAME = "plantseg-upload"

# ----------------------------
# Connect to MinIO
# ----------------------------
s3 = boto3.client(
    "s3",
    endpoint_url=MINIO_URL,
    aws_access_key_id=MINIO_ACCESS_KEY,
    aws_secret_access_key=MINIO_SECRET_KEY,
    region_name="us-east-1"
)

# ----------------------------
# Get Tasks from Label Studio
# ----------------------------
resp = requests.get(
    f"{LABEL_STUDIO_URL}/api/projects/{PROJECT_ID}/tasks",
    headers={"Authorization": f"Token {LABEL_STUDIO_TOKEN}"}
)
tasks = resp.json()

# ----------------------------
# Match & Compare
# ----------------------------
total, correct = 0, 0

for task in tasks:
    try:
        # ----- Extract human label -----
        annotation = task['annotations'][0]
        result = annotation['result'][0]
        raw_choice = result['value']['choices'][0]

        # Strip prefix like '1: "label"', then cleanup
        match = re.search(r':\s*"?(.*?)"?[,"]?$', raw_choice)
        human_label = match.group(1).strip().lower() if match else ""
        if not human_label:
            continue

        # ----- Extract image key from URL -----
        image_url = task['data']['image']
        key = urlparse(image_url).path.lstrip("/")
        if key.startswith(f"{BUCKET_NAME}/"):
            key = key[len(f"{BUCKET_NAME}/"):]

        # Skip mask files
        if key.endswith('_mask.png'):
            continue

        # ----- Get MinIO predicted label -----
        tags = s3.get_object_tagging(Bucket=BUCKET_NAME, Key=key)['TagSet']
        tag_dict = {t['Key']: t['Value'] for t in tags}
        model_label = tag_dict.get('predicted_class', '').strip().lower()

        print(f"[{key}] human: '{human_label}' | model: '{model_label}'")

        if human_label and model_label:
            total += 1
            if human_label == model_label:
                correct += 1
    except Exception as e:
        print(f"⚠️ Error: {e}")
        continue

# ----------------------------
# Final Accuracy
# ----------------------------
print(f"✅ Accuracy: {correct}/{total} = {correct/total:.2%}" if total else "❌ No valid comparisons made.")


[class_04/ace148a2-de0f-4d2f-9fe8-eae201d4e35f.jpg] human: 'apple black rot' | model: 'apple scab'
[class_04/72e6b7e2-b33e-4290-ae4b-5f0e0031b6a6.jpg] human: 'apple black rot' | model: 'apple scab'
✅ Accuracy: 0/2 = 0.00%
