In [1]:
import os
import csv
import lmstudio as lms
from difflib import SequenceMatcher

In [2]:
TEST_FOLDER = r"dataset"
GROUND_TRUTH_FILE = os.path.join(TEST_FOLDER, "ground-truth.csv")
OUTPUT_CSV = os.path.join(TEST_FOLDER, "prediction_results.csv")

In [3]:
model = lms.llm("google/gemma-3-4b")

In [4]:
def character_error_rate(pred, truth):
    sm = SequenceMatcher(None, truth, pred)
    substitutions = deletions = insertions = 0
    for tag, i1, i2, j1, j2 in sm.get_opcodes():
        if tag == 'replace':
            substitutions += max(i2 - i1, j2 - j1)
        elif tag == 'delete':
            deletions += (i2 - i1)
        elif tag == 'insert':
            insertions += (j2 - j1)

    N = len(truth)
    if N == 0:
        return 1.0 if len(pred) > 0 else 0.0
    cer = (substitutions + deletions + insertions) / N
    return cer

In [5]:
ground_truths = []
with open(GROUND_TRUTH_FILE, 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        ground_truths.append(row)

In [7]:
results = []
for row in ground_truths:
    image_file = row["images"]
    gt = row["ground_truth"]

    image_path = os.path.join(TEST_FOLDER, image_file)
    if not os.path.exists(image_path):
        print(f"[WARNING] File {image_file} tidak ditemukan, skip...")
        continue

    image_handle = lms.prepare_image(image_path)
    chat = lms.Chat()
    chat.add_user_message(
        "What is the license plate number shown in this image? Respond only with the plate number without spaces or punctuation, and do not include any expiration date.",
        images=[image_handle]
    )

    prediction_result = model.respond(chat)
    prediction = prediction_result.content.strip()

    cer_score = character_error_rate(prediction, gt)

    results.append({
        "image": image_file,
        "ground_truth": gt,
        "prediction": prediction,
        "CER_score": cer_score
    })

    print(f"[INFO] File: {image_file}")
    print(f"       Ground Truth: {gt}")
    print(f"       Prediction  : {prediction}")
    print(f"       CER Score   : {cer_score:.4f}\n")

[INFO] File: test001_2.jpg
       Ground Truth: B2407UZ0
       Prediction  : B2407UZO
       CER Score   : 0.1250

[INFO] File: test002_1.jpg
       Ground Truth: BG1352AE
       Prediction  : BG1352AE
       CER Score   : 0.0000

[INFO] File: test003_1.jpg
       Ground Truth: B2634UZF
       Prediction  : B2634UZF
       CER Score   : 0.0000

[INFO] File: test014_1.jpg
       Ground Truth: B1388DFX
       Prediction  : 1388DFX
       CER Score   : 0.1250

[INFO] File: test035_3.jpg
       Ground Truth: L2498AAM
       Prediction  : 2498AAM
       CER Score   : 0.1250

[INFO] File: test035_4.jpg
       Ground Truth: W1696YO
       Prediction  : W1696YO
       CER Score   : 0.0000

[INFO] File: test036_2.jpg
       Ground Truth: B1618UAE
       Prediction  : B1618UAE
       CER Score   : 0.0000

[INFO] File: test050_1.jpg
       Ground Truth: L1265OF
       Prediction  : 1265OF
       CER Score   : 0.1429

[INFO] File: test055_1.jpg
       Ground Truth: N1610KS
       Prediction  : N1

In [8]:
with open(OUTPUT_CSV, 'w', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=["image", "ground_truth", "prediction", "CER_score"])
    writer.writeheader()
    writer.writerows(results)

print(f"[INFO] Semua hasil tersimpan di {OUTPUT_CSV}")

[INFO] Semua hasil tersimpan di dataset\prediction_results.csv
