In [None]:
import requests
import boto3
import os
from urllib.parse import urlparse
from datetime import datetime
from collections import defaultdict

In [None]:
# ---- Set Environment Variables ----
LABEL_STUDIO_URL = os.getenv("LABEL_STUDIO_URL", "http://localhost:8080")
LABEL_STUDIO_TOKEN = os.getenv("LABEL_STUDIO_USER_TOKEN", "your-label-studio-token")  # set this in .env or docker
PROJECT_ID = 1  # your Label Studio project ID


MINIO_URL = os.getenv("MINIO_URL", "http://localhost:9000")
MINIO_ACCESS_KEY = os.getenv("MINIO_USER", "minioadmin")
MINIO_SECRET_KEY = os.getenv("MINIO_PASSWORD", "minioadmin")
BUCKET_NAME = "production"

In [None]:
# runs inside Jupyter container on node-eval-loop
LABEL_STUDIO_URL = os.environ['LABEL_STUDIO_URL']
LABEL_STUDIO_TOKEN = os.environ['LABEL_STUDIO_USER_TOKEN']
PROJECT_ID = 1  # use the first project set up in Label Studio

MINIO_URL = os.environ['MINIO_URL']
MINIO_ACCESS_KEY = os.environ['MINIO_USER']
MINIO_SECRET_KEY = os.environ['MINIO_PASSWORD']
BUCKET_NAME = "production"

In [None]:
# ---- Get Label Studio annotations ----
response = requests.get(
    f"{LABEL_STUDIO_URL}/api/projects/{PROJECT_ID}/export?exportType=JSON",
    headers={"Authorization": f"Token {LABEL_STUDIO_TOKEN}"}
)

tasks = response.json()

In [None]:
# ---- Evaluation ----
total = 0
correct = 0
mismatches = []

for task in tasks:
    try:
        human_label = task['annotations'][0]['result'][0]['value']['choices'][0]
        key = urlparse(task['data']['image']).path.lstrip('/')
        if key.startswith(f"{BUCKET_NAME}/"):
            key = key[len(f"{BUCKET_NAME}/"):]

        tags = s3.get_object_tagging(Bucket=BUCKET_NAME, Key=key)['TagSet']
        tag_dict = {t['Key']: t['Value'] for t in tags}
        predicted_label = tag_dict.get("predicted_class")

        if predicted_label and human_label:
            total += 1
            if predicted_label == human_label:
                correct += 1
            else:
                mismatches.append({
                    "image": task['data']['image'],
                    "predicted": predicted_label,
                    "actual": human_label
                })
    except Exception as e:
        print(f"Error processing task: {e}")

accuracy = correct / total if total else 0
print(f"\n🎯 Production Accuracy: {correct}/{total} = {accuracy:.2%}")

if mismatches:
    print("\n⚠️ Mismatched Predictions:")
    for m in mismatches:
        print(f"- {m['image']} | predicted: {m['predicted']} | actual: {m['actual']}")