# **Nama : Nedia Waty**
# **Nim : 4222201010**
# **Prodi : Teknik Robotika**

**Library**

In [9]:
import os
import pandas as pd
from tqdm import tqdm
import Levenshtein
import lmstudio as lms

**Konfigurasi OCR (Optical Character Recognition)**

In [None]:
# === CONFIGURASI ===
IMAGE_FOLDER = "test"
GROUND_TRUTH_CSV = os.path.join(IMAGE_FOLDER, "ground_truth.csv")
OUTPUT_CSV =  "result.csv"
MODEL_NAME = "google/gemma-3-4b" 

 **Load Data pada Ground Truth**

In [4]:
# === Load ground truth ===
ground_truth_df = pd.read_csv(GROUND_TRUTH_CSV, header=None, names=["image", "ground_truth"])
ground_truth_dict = dict(zip(ground_truth_df["image"], ground_truth_df["ground_truth"]))

**Load Model pada OCR**

In [5]:
# === Load model ===
model = lms.llm(MODEL_NAME)

**Perhitungan Evaluasi (CER)**

In [6]:
# === CER calculation ===
def calculate_cer(gt, pred):
    return Levenshtein.distance(gt, pred) / max(1, len(gt))

**Proses Inference dan Evaluasi**

In [8]:
# === Proses inference dan Evaluasi ===
results = []

for filename in tqdm(sorted(os.listdir(IMAGE_FOLDER))):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    image_path = os.path.join(IMAGE_FOLDER, filename)
    image_handle = lms.prepare_image(image_path)

    chat = lms.Chat()
    chat.add_user_message(
        "What is the license plate number shown in this image? Respond only with the license plate characters, without any spaces, or punctuation. Do not include the expiration date.",
        images=[image_handle]
    )

    try:
        prediction_result = model.respond(chat)
        prediction =  prediction_result.content.strip()
        
    except Exception as e:
        prediction = "ERROR"
        print(f"Gagal memproses {filename}: {e}")

    ground_truth = ground_truth_dict.get(filename, "")
    cer = calculate_cer(ground_truth, prediction)

    results.append([filename, ground_truth, prediction, cer])

100%|██████████| 101/101 [2:57:50<00:00, 105.65s/it] 


**Penyimpanan Hasil dan Statistik Evaluasi**

In [10]:
# Simpan ke CSV
out_df = pd.DataFrame(results, columns=["image", "ground_truth", "prediction", "CER_score"])
out_df.to_csv(OUTPUT_CSV, index=False)

print("\n✅ Selesai memproses semua gambar.")
print(f"📁 Hasil disimpan ke '{OUTPUT_CSV}'")

# Statistik evaluasi
exact_match = (out_df["CER_score"] == 0).sum()
total = len(out_df)
mean_cer = out_df["CER_score"].mean()

print(f"\n📊 Statistik Evaluasi:")
print(f"- Jumlah Gambar       : {total}")
print(f"- Exact Match         : {exact_match} ({(exact_match / total) * 100:.2f}%)")
print(f"- Rata-rata CER Score : {mean_cer:.4f}")


✅ Selesai memproses semua gambar.
📁 Hasil disimpan ke 'result.csv'

📊 Statistik Evaluasi:
- Jumlah Gambar       : 50
- Exact Match         : 22 (44.00%)
- Rata-rata CER Score : 0.1173
