In [1]:
import os

# Kurangi konflik OpenMP/MKL
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"

# Sering membantu kalau ada error libomp duplicate
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"


#PaddeOCR Crop From Yolov11#

In [None]:
import os
import csv
import re
import cv2
from paddleocr import PaddleOCR

# ============================================================
# 0) CONFIG
# ============================================================
INPUT_FOLDER = r"D:\Uniska\~Disertasi\~Projek Jurnal\sr_crop-RJ"  # <-- ganti
OUT_CSV      = r"Result_Crop_Yolov11.csv"          # <-- ganti

# ============================================================
# 1) OCR INIT
# ============================================================
ocr = PaddleOCR(
    use_angle_cls=True,
    lang='en',
    show_log=False,
    det_db_thresh=0.3
)

# ============================================================
# 2) BASIC HELPERS
# ============================================================
def clean_raw(text: str) -> str:
    return re.sub(r"[^A-Z0-9]", "", str(text).upper())

def pick_best_line(result):
    
    if result is None:
        return "", 0.0
    if not isinstance(result, list) or len(result) == 0:
        return "", 0.0

    # kasus umum: result = [ [line1, line2, ...] ]
    lines = result[0] if (len(result) == 1 and isinstance(result[0], list)) else result
    if lines is None:
        return "", 0.0

    best_text, best_conf = "", 0.0
    for line in lines:
        try:
            txt = line[1][0]
            conf = float(line[1][1])
        except Exception:
            continue
        if conf > best_conf and txt:
            best_text, best_conf = str(txt), conf
    return best_text, best_conf

# ============================================================
# 3) VARIANTS (TANPA CLAHE & TANPA SR2x)
# ============================================================
def make_variants(img):
    # Hanya RAW (no preprocessing)
    return [("RAW", img)]
    # Jika mau pembanding ringan tapi tetap tanpa CLAHE/SR:
    # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # return [("RAW", img), ("GRAY", gray)]

# ============================================================
# 4) NORMALIZATION (RJ fixed position)
# ============================================================
def normalize_rj_fixedpos_with_meta(raw_text: str):
    """
    Normalisasi plat format fixed-length=10:
      [0]=R, [1]=J, [2-3]=digit, [4-5]=alpha, [6-9]=digit
    Return: final_plate (str), casts (int), anchor_bonus (float)
    """
    t0 = clean_raw(raw_text)

    # 1) Anchor RJ
    anchor_bonus = 0.0
    if "RJ" in t0:
        anchor_bonus = 0.15
        t = t0[t0.find("RJ"):]
    else:
        t = "RJ" + t0

    # 2) Enforce length 10
    if len(t) < 10:
        t = t.ljust(10, "0")
    else:
        t = t[:10]

    chars = list(t)

    # Hard prefix
    chars[0], chars[1] = "R", "J"

    # 3) Casting berbasis posisi
    digit_idx = {2, 3, 6, 7, 8, 9}
    alpha_idx = {4, 5}

    to_digit = {
        "O": "0", "Q": "0", "D": "0",
        "I": "1", "L": "1",
        "Z": "2",
        "S": "5",
        "B": "8",
    }
    to_alpha = {"0": "O", "1": "I", "2": "Z", "5": "S", "8": "B"}

    casts = 0

    for i in range(10):
        c = chars[i]

        if i in digit_idx and c in to_digit:
            newc = to_digit[c]
            if newc != c:
                chars[i] = newc
                casts += 1

        elif i in alpha_idx and c in to_alpha:
            newc = to_alpha[c]
            if newc != c:
                chars[i] = newc
                casts += 1

    # 4) Rule khusus pasangan ambiguitas pada posisi [4-5]
    #    Mendukung: OC, CO, 0C, C0  -> Target QC / CQ
    pair = chars[4] + chars[5]
    pair_map = {
        "OC": ("Q", "C"),
        "CO": ("C", "Q"),
        "0C": ("Q", "C"),
        "C0": ("C", "Q"),
    }
    if pair in pair_map:
        new4, new5 = pair_map[pair]
        if (chars[4], chars[5]) != (new4, new5):
            chars[4], chars[5] = new4, new5
            casts += 1

    final_plate = "".join(chars)
    return final_plate, casts, anchor_bonus

def struct_score_rj(plate10: str) -> float:
    if not plate10 or len(plate10) != 10:
        return 0.0
    c = list(plate10)
    ok = 0
    ok += 1 if c[0] == "R" else 0
    ok += 1 if c[1] == "J" else 0
    for i in [2, 3, 6, 7, 8, 9]:
        ok += 1 if c[i].isdigit() else 0
    for i in [4, 5]:
        ok += 1 if c[i].isalpha() else 0
    return ok / 10.0

# ============================================================
# 5) QC/CQ AMBIGUITY FIX WITH GATE (FIXED INDENTATION)
# ============================================================
def apply_qc_gate(plate10: str, raw_text: str, other_candidates_have_qpair: bool):
    """
    Fix:
      OC -> QC
      CO -> CQ
      0C -> QC
      C0 -> CQ
    ONLY if evidence (gate):
      - raw OCR text contains 'Q' anywhere, OR
      - another variant produced QC/CQ explicitly
    Return: (new_plate, add_cast)
    """
    if not plate10 or len(plate10) != 10:
        return plate10, 0

    chars = list(plate10)
    pair = chars[4] + chars[5]
    if pair not in ("OC", "CO", "0C", "C0"):
        return plate10, 0

    t = clean_raw(raw_text)
    gate_raw_has_q = ("Q" in t)
    gate_other = bool(other_candidates_have_qpair)

    if not (gate_raw_has_q or gate_other):
        return plate10, 0

    if pair == "OC":
        chars[4], chars[5] = "Q", "C"
        return "".join(chars), 1
    if pair == "CO":
        chars[4], chars[5] = "C", "Q"
        return "".join(chars), 1
    if pair == "0C":
        chars[4], chars[5] = "Q", "C"
        return "".join(chars), 1
    if pair == "C0":
        chars[4], chars[5] = "C", "Q"
        return "".join(chars), 1

    return plate10, 0

# ============================================================
# 6) PLATE-AWARE SCORING
# ============================================================
def score_candidate(conf, struct_score, casts, anchor_bonus):
    w_conf = 0.55
    w_struct = 0.45
    cast_penalty = min(casts, 6) * 0.02  # max 0.12
    return (w_conf * float(conf)) + (w_struct * float(struct_score)) + float(anchor_bonus) - cast_penalty

def select_best_across_variants(variant_results):
    """
    variant_results: list of (variant_name, paddle_result)
    Return dict best or None
    """
    # gate evidence: apakah ada kandidat yang eksplisit menghasilkan QC/CQ di raw OCR?
    other_has_qpair = False
    for vname, pres in variant_results:
        raw_text, conf = pick_best_line(pres)
        if not raw_text:
            continue
        t = clean_raw(raw_text)
        if ("QC" in t) or ("CQ" in t):
            other_has_qpair = True
            break

    best = None
    for vname, pres in variant_results:
        raw_text, conf = pick_best_line(pres)
        if not raw_text:
            continue

        final_plate, casts, anchor_bonus = normalize_rj_fixedpos_with_meta(raw_text)

        # apply gated ambiguity fix
        final_plate2, add_cast = apply_qc_gate(final_plate, raw_text, other_has_qpair)
        casts += add_cast
        final_plate = final_plate2

        s_struct = struct_score_rj(final_plate)
        s_total = score_candidate(conf, s_struct, casts, anchor_bonus)

        cand = {
            "variant": vname,
            "raw_text": raw_text,
            "final_plate": final_plate,
            "conf": float(conf),
            "score_total": float(s_total),
            "casts": int(casts),
            "struct_score": float(s_struct),
            "anchor_bonus": float(anchor_bonus),
        }

        if (best is None) or (cand["score_total"] > best["score_total"]):
            best = cand

    return best

# ============================================================
# 7) MAIN LOOP -> CSV
# ============================================================
image_files = sorted([
    f for f in os.listdir(INPUT_FOLDER)
    if f.lower().endswith((".png", ".jpg", ".jpeg"))
])

with open(OUT_CSV, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow([
        "File_Name","Variant","Raw_OCR","Final_Plate",
        "Conf_0to1","Score","Casts","StructScore","AnchorBonus"
    ])

    for fn in image_files:
        img_path = os.path.join(INPUT_FOLDER, fn)
        img = cv2.imread(img_path)

        if img is None:
            w.writerow([fn,"","","",0,0,0,0,0])
            print("BAD IMAGE:", fn)
            continue

        variant_results = []
        for vname, vimg in make_variants(img):
            try:
                pres = ocr.ocr(vimg, cls=True)
            except Exception:
                pres = None
            variant_results.append((vname, pres))

        best = select_best_across_variants(variant_results)

        if best is None:
            w.writerow([fn,"","","",0,0,0,0,0])
            print("NO TEXT:", fn)
        else:
            w.writerow([
                fn,
                best["variant"],
                best["raw_text"],
                best["final_plate"],
                round(best["conf"], 6),
                round(best["score_total"], 6),
                best["casts"],
                round(best["struct_score"], 6),
                round(best["anchor_bonus"], 6)
            ])
            print(f"OK: {fn} | {best['variant']} | {best['raw_text']} → {best['final_plate']}")

print("DONE. Saved:", OUT_CSV)


OK: 100.png | RAW | RJ45CK4748 → RJ45CK4748
OK: 1000.png | RAW | RJ14CG-6581 → RJ14CG6581
OK: 1001.png | RAW | FRJ14CK8305 → RJ14CK8305
OK: 101.png | RAW | RJ14CZ0391 → RJ14CZ0391
OK: 102.png | RAW | RJ45CH4583 → RJ45CH4583
OK: 103.png | RAW | FRJ45CJ9969 → RJ45CJ9969
OK: 104.png | RAW | RJ14LC6763 → RJ14LC6763
OK: 105.png | RAW | RJ31CF7171 → RJ31CF7171
OK: 106.png | RAW | -RJ08CA6767 → RJ08CA6767
OK: 107.png | RAW | RJ45CH9693 → RJ45CH9693
OK: 108.png | RAW | RJ45CK7827 → RJ45CK7827
OK: 109.png | RAW | RJ45CQ1702 → RJ45CQ1702
OK: 111.png | RAW | RJ14CF 9928 → RJ14CF9928
OK: 112.png | RAW | -RJ45CQ9137 → RJ45CQ9137
OK: 113.png | RAW | RS14CY6039 → RJR5I4CY60
OK: 114.png | RAW | RJ14 CG 9094 → RJ14CG9094
OK: 115.png | RAW | FRJ14VC0628 → RJ14VC0628
OK: 116.png | RAW | RJ14CD 4810 → RJ14CD4810
OK: 118.png | RAW | RJ14XC6909 → RJ14XC6909
NO TEXT: 119.png
OK: 120.png | RAW | RJ45CN2934 → RJ45CN2934
OK: 122.png | RAW | FRJ45C00611 → RJ45CQ0611
OK: 123.png | RAW | FRJ14079165 → RJ14O79165
N

#EVALUATION RESULT (RAW CROPING YOLOv11)#

In [None]:
import csv, re

def clean_raw(text):
    return re.sub(r"[^A-Z0-9]", "", str(text).upper())

def normalize_rj_fixedpos(raw_text):
    t = clean_raw(raw_text)
    if "RJ" in t:
        t = t[t.find("RJ"):]
    else:
        t = "RJ" + t
    t = (t + "0"*10)[:10]
    chars = list(t)
    chars[0], chars[1] = "R", "J"

    DIGIT_IDX = {2,3,6,7,8,9}
    ALPHA_IDX = {4,5}
    TO_DIGIT = {'O':'0','Q':'0','D':'0','I':'1','L':'1','Z':'2','S':'5','B':'8'}
    TO_ALPHA = {'0':'O','1':'I','2':'Z','5':'S','8':'B'}

    for i in range(10):
        if i in DIGIT_IDX and chars[i] in TO_DIGIT:
            chars[i] = TO_DIGIT[chars[i]]
        elif i in ALPHA_IDX and chars[i] in TO_ALPHA:
            chars[i] = TO_ALPHA[chars[i]]
    return "".join(chars)

def levenshtein(a, b):
    if a == b: return 0
    if not a: return len(b)
    if not b: return len(a)
    prev = list(range(len(b)+1))
    for i, ca in enumerate(a, 1):
        cur = [i]
        for j, cb in enumerate(b, 1):
            cur.append(min(cur[j-1]+1, prev[j]+1, prev[j-1]+(ca!=cb)))
        prev = cur
    return prev[-1]

# --- LOAD GT ---
GT_PATH = r"ground truth.txt"   # ganti jika perlu
gt = {}
with open(GT_PATH, "r", encoding="utf-8", errors="ignore") as f:
    for line in f:
        if "\t" in line:
            img, plate = line.strip().split("\t")
            fname = img.split("/")[-1]
            gt[fname] = normalize_rj_fixedpos(plate)

# --- LOAD CSV ---
CSV_PATH = r"Result_Crop_Yolov11.csv"  # <-- CSV hasil pipeline
rows = []
with open(CSV_PATH, newline="", encoding="utf-8", errors="replace") as f:
    rows = list(csv.DictReader(f))

total = unreadable = readable = correct_e2e = 0
recog_total = recog_correct = 0
char_sum = 0.0

for r in rows:
    fname = r.get("File_Name","").strip()
    if fname not in gt: continue
    total += 1

    raw = r.get("Raw_OCR","").strip()
    conf_raw = r.get("Conf_0to1","").strip()
    try:
        conf = float(conf_raw)
    except:
        conf = 1.0 if raw != "" else 0.0

    if raw == "" or conf == 0.0:
        unreadable += 1
        continue

    readable += 1
    recog_total += 1

    pred = normalize_rj_fixedpos(r.get("Final_Plate",""))
    gt_plate = gt[fname]

    if pred == gt_plate:
        recog_correct += 1
        correct_e2e += 1

    dist = levenshtein(pred, gt_plate)
    char_sum += (1 - dist/10)

print("\n=== EVALUATION RESULT (RAW CROPING YOLOv11) ===")
print("Total samples       :", total)
print("Unreadable (OCR=∅)  :", unreadable)
print("Readable            :", readable)
print("----------------------------------")
print(f"Readability Rate          : {readable/total*100:.2f}%")
print(f"End-to-End Exact Accuracy : {correct_e2e/total*100:.2f}%")
if recog_total:
    print(f"Recognition Exact Acc     : {recog_correct/recog_total*100:.2f}%")
    print(f"Average Character Acc     : {char_sum/recog_total*100:.2f}%")



=== EVALUATION RESULT (RAW CROPING YOLOv11) ===
Total samples       : 750
Unreadable (OCR=∅)  : 8
Readable            : 742
----------------------------------
Readability Rate          : 98.93%
End-to-End Exact Accuracy : 89.33%
Recognition Exact Acc     : 90.30%
Average Character Acc     : 97.41%


#PaddeOCR Real-ESRGAN #

In [None]:
import os
import csv
import re
import cv2
from paddleocr import PaddleOCR

# ============================================================
# 0) CONFIG
# ============================================================
INPUT_FOLDER = r"D:\Uniska\~Disertasi\~Projek Jurnal\sr_crops-SR"  
OUT_CSV      = r"Result_crop_REAL-ESRGAN.csv"          

# ============================================================
# 1) OCR INIT
# ============================================================
ocr = PaddleOCR(
    use_angle_cls=True,
    lang='en',
    show_log=False,
    det_db_thresh=0.3
)

# ============================================================
# 2) BASIC HELPERS
# ============================================================
def clean_raw(text: str) -> str:
    return re.sub(r"[^A-Z0-9]", "", str(text).upper())

def pick_best_line(result):
    
    if result is None:
        return "", 0.0
    if not isinstance(result, list) or len(result) == 0:
        return "", 0.0

    # kasus umum: result = [ [line1, line2, ...] ]
    lines = result[0] if (len(result) == 1 and isinstance(result[0], list)) else result
    if lines is None:
        return "", 0.0

    best_text, best_conf = "", 0.0
    for line in lines:
        try:
            txt = line[1][0]
            conf = float(line[1][1])
        except Exception:
            continue
        if conf > best_conf and txt:
            best_text, best_conf = str(txt), conf
    return best_text, best_conf

# ============================================================
# 3) VARIANTS (TANPA CLAHE & TANPA SR2x)
# ============================================================
def make_variants(img):
    # Hanya RAW (no preprocessing)
    return [("RAW", img)]
    # Jika mau pembanding ringan tapi tetap tanpa CLAHE/SR:
    # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # return [("RAW", img), ("GRAY", gray)]

# ============================================================
# 4) NORMALIZATION (RJ fixed position)
# ============================================================
def normalize_rj_fixedpos_with_meta(raw_text: str):
    """
    Normalisasi plat format fixed-length=10:
      [0]=R, [1]=J, [2-3]=digit, [4-5]=alpha, [6-9]=digit
    Return: final_plate (str), casts (int), anchor_bonus (float)
    """
    t0 = clean_raw(raw_text)

    # 1) Anchor RJ
    anchor_bonus = 0.0
    if "RJ" in t0:
        anchor_bonus = 0.15
        t = t0[t0.find("RJ"):]
    else:
        t = "RJ" + t0

    # 2) Enforce length 10
    if len(t) < 10:
        t = t.ljust(10, "0")
    else:
        t = t[:10]

    chars = list(t)

    # Hard prefix
    chars[0], chars[1] = "R", "J"

    # 3) Casting berbasis posisi
    digit_idx = {2, 3, 6, 7, 8, 9}
    alpha_idx = {4, 5}

    to_digit = {
        "O": "0", "Q": "0", "D": "0",
        "I": "1", "L": "1",
        "Z": "2",
        "S": "5",
        "B": "8",
    }
    to_alpha = {"0": "O", "1": "I", "2": "Z", "5": "S", "8": "B"}

    casts = 0

    for i in range(10):
        c = chars[i]

        if i in digit_idx and c in to_digit:
            newc = to_digit[c]
            if newc != c:
                chars[i] = newc
                casts += 1

        elif i in alpha_idx and c in to_alpha:
            newc = to_alpha[c]
            if newc != c:
                chars[i] = newc
                casts += 1

    # 4) Rule khusus pasangan ambiguitas pada posisi [4-5]
    #    Mendukung: OC, CO, 0C, C0  -> Target QC / CQ
    pair = chars[4] + chars[5]
    pair_map = {
        "OC": ("Q", "C"),
        "CO": ("C", "Q"),
        "0C": ("Q", "C"),
        "C0": ("C", "Q"),
    }
    if pair in pair_map:
        new4, new5 = pair_map[pair]
        if (chars[4], chars[5]) != (new4, new5):
            chars[4], chars[5] = new4, new5
            casts += 1

    final_plate = "".join(chars)
    return final_plate, casts, anchor_bonus

def struct_score_rj(plate10: str) -> float:
    if not plate10 or len(plate10) != 10:
        return 0.0
    c = list(plate10)
    ok = 0
    ok += 1 if c[0] == "R" else 0
    ok += 1 if c[1] == "J" else 0
    for i in [2, 3, 6, 7, 8, 9]:
        ok += 1 if c[i].isdigit() else 0
    for i in [4, 5]:
        ok += 1 if c[i].isalpha() else 0
    return ok / 10.0

# ============================================================
# 5) QC/CQ AMBIGUITY FIX WITH GATE (FIXED INDENTATION)
# ============================================================
def apply_qc_gate(plate10: str, raw_text: str, other_candidates_have_qpair: bool):
    """
    Fix:
      OC -> QC
      CO -> CQ
      0C -> QC
      C0 -> CQ
    ONLY if evidence (gate):
      - raw OCR text contains 'Q' anywhere, OR
      - another variant produced QC/CQ explicitly
    Return: (new_plate, add_cast)
    """
    if not plate10 or len(plate10) != 10:
        return plate10, 0

    chars = list(plate10)
    pair = chars[4] + chars[5]
    if pair not in ("OC", "CO", "0C", "C0"):
        return plate10, 0

    t = clean_raw(raw_text)
    gate_raw_has_q = ("Q" in t)
    gate_other = bool(other_candidates_have_qpair)

    if not (gate_raw_has_q or gate_other):
        return plate10, 0

    if pair == "OC":
        chars[4], chars[5] = "Q", "C"
        return "".join(chars), 1
    if pair == "CO":
        chars[4], chars[5] = "C", "Q"
        return "".join(chars), 1
    if pair == "0C":
        chars[4], chars[5] = "Q", "C"
        return "".join(chars), 1
    if pair == "C0":
        chars[4], chars[5] = "C", "Q"
        return "".join(chars), 1

    return plate10, 0

# ============================================================
# 6) PLATE-AWARE SCORING
# ============================================================
def score_candidate(conf, struct_score, casts, anchor_bonus):
    w_conf = 0.55
    w_struct = 0.45
    cast_penalty = min(casts, 6) * 0.02  # max 0.12
    return (w_conf * float(conf)) + (w_struct * float(struct_score)) + float(anchor_bonus) - cast_penalty

def select_best_across_variants(variant_results):
    """
    variant_results: list of (variant_name, paddle_result)
    Return dict best or None
    """
    # gate evidence: apakah ada kandidat yang eksplisit menghasilkan QC/CQ di raw OCR?
    other_has_qpair = False
    for vname, pres in variant_results:
        raw_text, conf = pick_best_line(pres)
        if not raw_text:
            continue
        t = clean_raw(raw_text)
        if ("QC" in t) or ("CQ" in t):
            other_has_qpair = True
            break

    best = None
    for vname, pres in variant_results:
        raw_text, conf = pick_best_line(pres)
        if not raw_text:
            continue

        final_plate, casts, anchor_bonus = normalize_rj_fixedpos_with_meta(raw_text)

        # apply gated ambiguity fix
        final_plate2, add_cast = apply_qc_gate(final_plate, raw_text, other_has_qpair)
        casts += add_cast
        final_plate = final_plate2

        s_struct = struct_score_rj(final_plate)
        s_total = score_candidate(conf, s_struct, casts, anchor_bonus)

        cand = {
            "variant": vname,
            "raw_text": raw_text,
            "final_plate": final_plate,
            "conf": float(conf),
            "score_total": float(s_total),
            "casts": int(casts),
            "struct_score": float(s_struct),
            "anchor_bonus": float(anchor_bonus),
        }

        if (best is None) or (cand["score_total"] > best["score_total"]):
            best = cand

    return best

# ============================================================
# 7) MAIN LOOP -> CSV
# ============================================================
image_files = sorted([
    f for f in os.listdir(INPUT_FOLDER)
    if f.lower().endswith((".png", ".jpg", ".jpeg"))
])

with open(OUT_CSV, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow([
        "File_Name","Variant","Raw_OCR","Final_Plate",
        "Conf_0to1","Score","Casts","StructScore","AnchorBonus"
    ])

    for fn in image_files:
        img_path = os.path.join(INPUT_FOLDER, fn)
        img = cv2.imread(img_path)

        if img is None:
            w.writerow([fn,"","","",0,0,0,0,0])
            print("BAD IMAGE:", fn)
            continue

        variant_results = []
        for vname, vimg in make_variants(img):
            try:
                pres = ocr.ocr(vimg, cls=True)
            except Exception:
                pres = None
            variant_results.append((vname, pres))

        best = select_best_across_variants(variant_results)

        if best is None:
            w.writerow([fn,"","","",0,0,0,0,0])
            print("NO TEXT:", fn)
        else:
            w.writerow([
                fn,
                best["variant"],
                best["raw_text"],
                best["final_plate"],
                round(best["conf"], 6),
                round(best["score_total"], 6),
                best["casts"],
                round(best["struct_score"], 6),
                round(best["anchor_bonus"], 6)
            ])
            print(f"OK: {fn} | {best['variant']} | {best['raw_text']} → {best['final_plate']}")

print("DONE. Saved:", OUT_CSV)


OK: 100.png | RAW | RJ45 CK4748 → RJ45CK4748
OK: 1000.png | RAW | RJ14CG6581 → RJ14CG6581
OK: 1001.png | RAW | RJ14CK0305 → RJ14CK0305
OK: 101.png | RAW | "RJ14CZ0391 → RJ14CZ0391
OK: 102.png | RAW | RJ45CH4583 → RJ45CH4583
OK: 103.png | RAW | RJ45CJ9969 → RJ45CJ9969
OK: 104.png | RAW | RJ14LC6763 → RJ14LC6763
OK: 105.png | RAW | FRJ31CF7171 → RJ31CF7171
OK: 106.png | RAW | RJ08CA6767 → RJ08CA6767
OK: 107.png | RAW | -RJ45CN9693 → RJ45CN9693
OK: 108.png | RAW | RJ45CK7827 → RJ45CK7827
OK: 109.png | RAW | FRJ45CQ1702 → RJ45CQ1702
OK: 111.png | RAW | RJ14 CF 9928 → RJ14CF9928
OK: 112.png | RAW | RJ45C09137 → RJ45CQ9137
OK: 113.png | RAW | RJ14CY6039 → RJ14CY6039
OK: 114.png | RAW | 9094 → RJ90940000
OK: 115.png | RAW | RJ14VC0628 → RJ14VC0628
OK: 116.png | RAW | RJ14 CD 4810 → RJ14CD4810
OK: 118.png | RAW | RJ14XC6909 → RJ14XC6909
OK: 119.png | RAW | RJ01CB7236 → RJ01CB7236
OK: 120.png | RAW | RJ45CN2934 → RJ45CN2934
OK: 122.png | RAW | RJ45C00611 → RJ45CQ0611
OK: 123.png | RAW | PRJ14C7

#EVALUATION RESULT (RAW REAL-ESRGAN)#

In [None]:
import csv, re

def clean_raw(text):
    return re.sub(r"[^A-Z0-9]", "", str(text).upper())

def normalize_rj_fixedpos(raw_text):
    t = clean_raw(raw_text)
    if "RJ" in t:
        t = t[t.find("RJ"):]
    else:
        t = "RJ" + t
    t = (t + "0"*10)[:10]
    chars = list(t)
    chars[0], chars[1] = "R", "J"

    DIGIT_IDX = {2,3,6,7,8,9}
    ALPHA_IDX = {4,5}
    TO_DIGIT = {'O':'0','Q':'0','D':'0','I':'1','L':'1','Z':'2','S':'5','B':'8'}
    TO_ALPHA = {'0':'O','1':'I','2':'Z','5':'S','8':'B'}

    for i in range(10):
        if i in DIGIT_IDX and chars[i] in TO_DIGIT:
            chars[i] = TO_DIGIT[chars[i]]
        elif i in ALPHA_IDX and chars[i] in TO_ALPHA:
            chars[i] = TO_ALPHA[chars[i]]
    return "".join(chars)

def levenshtein(a, b):
    if a == b: return 0
    if not a: return len(b)
    if not b: return len(a)
    prev = list(range(len(b)+1))
    for i, ca in enumerate(a, 1):
        cur = [i]
        for j, cb in enumerate(b, 1):
            cur.append(min(cur[j-1]+1, prev[j]+1, prev[j-1]+(ca!=cb)))
        prev = cur
    return prev[-1]

# --- LOAD GT ---
GT_PATH = r"ground truth.txt"   # ganti jika perlu
gt = {}
with open(GT_PATH, "r", encoding="utf-8", errors="ignore") as f:
    for line in f:
        if "\t" in line:
            img, plate = line.strip().split("\t")
            fname = img.split("/")[-1]
            gt[fname] = normalize_rj_fixedpos(plate)

# --- LOAD CSV ---
CSV_PATH = r"Result_crop_REAL-ESRGAN.csv"  # <-- CSV hasil pipeline
rows = []
with open(CSV_PATH, newline="", encoding="utf-8", errors="replace") as f:
    rows = list(csv.DictReader(f))

total = unreadable = readable = correct_e2e = 0
recog_total = recog_correct = 0
char_sum = 0.0

for r in rows:
    fname = r.get("File_Name","").strip()
    if fname not in gt: continue
    total += 1

    raw = r.get("Raw_OCR","").strip()
    conf_raw = r.get("Conf_0to1","").strip()
    try:
        conf = float(conf_raw)
    except:
        conf = 1.0 if raw != "" else 0.0

    if raw == "" or conf == 0.0:
        unreadable += 1
        continue

    readable += 1
    recog_total += 1

    pred = normalize_rj_fixedpos(r.get("Final_Plate",""))
    gt_plate = gt[fname]

    if pred == gt_plate:
        recog_correct += 1
        correct_e2e += 1

    dist = levenshtein(pred, gt_plate)
    char_sum += (1 - dist/10)

print("\n=== EVALUATION RESULT (RAW REAL-ESRGAN) ===")
print("Total samples       :", total)
print("Unreadable (OCR=∅)  :", unreadable)
print("Readable            :", readable)
print("----------------------------------")
print(f"Readability Rate          : {readable/total*100:.2f}%")
print(f"End-to-End Exact Accuracy : {correct_e2e/total*100:.2f}%")
if recog_total:
    print(f"Recognition Exact Acc     : {recog_correct/recog_total*100:.2f}%")
    print(f"Average Character Acc     : {char_sum/recog_total*100:.2f}%")



=== EVALUATION RESULT (RAW REAL-ESRGAN) ===
Total samples       : 750
Unreadable (OCR=∅)  : 1
Readable            : 749
----------------------------------
Readability Rate          : 99.87%
End-to-End Exact Accuracy : 84.67%
Recognition Exact Acc     : 84.78%
Average Character Acc     : 96.17%


##

#PaddleOCR Adaptive Enhancement#

In [None]:
import os
import csv
import re
import cv2
from paddleocr import PaddleOCR

# ============================================================
# 0) CONFIG
# ============================================================
INPUT_FOLDER = r"D:\Uniska\~Disertasi\~Projek Jurnal\sr_crop-RJ"   # <-- ganti
OUT_CSV      = r"result_adaptive_initial.csv"                      # <-- output untuk Adaptive (initial)

# ============================================================
# 1) OCR INIT
# ============================================================
ocr = PaddleOCR(
    use_angle_cls=True,
    lang='en',
    show_log=False,
    det_db_thresh=0.3
)

# ============================================================
# 2) BASIC HELPERS
# ============================================================
def clean_raw(text: str) -> str:
    return re.sub(r"[^A-Z0-9]", "", str(text).upper())

def pick_best_line(result):
    """
    Ambil 1 baris terbaik berdasarkan confidence.
    Robust: aman untuk None / [] / [[...]].
    """
    if result is None:
        return "", 0.0
    if not isinstance(result, list) or len(result) == 0:
        return "", 0.0

    # PaddleOCR umum: result = [ [line1, line2, ...] ]
    lines = result[0] if (len(result) == 1 and isinstance(result[0], list)) else result
    if not lines:
        return "", 0.0

    best_text, best_conf = "", 0.0
    for line in lines:
        try:
            txt = line[1][0]
            conf = float(line[1][1])
        except Exception:
            continue
        if txt and conf > best_conf:
            best_text, best_conf = str(txt), conf
    return best_text, best_conf

# ============================================================
# 3) VARIANTS (Adaptive Enhancement) -> untuk Adaptive (initial)
#    (tetap pakai varian ringan: RAW, CLAHE-light, SR2x+CLAHE-light)
# ============================================================
def clahe_light(bgr, clip=2.0, grid=(8, 8)):
    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=clip, tileGridSize=grid)
    enhanced = clahe.apply(gray)
    return cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)

def sr2x_bicubic(bgr):
    h, w = bgr.shape[:2]
    return cv2.resize(bgr, (w * 2, h * 2), interpolation=cv2.INTER_CUBIC)

def sr2x_plus_clahe(bgr, clip=2.0, grid=(8, 8)):
    up = sr2x_bicubic(bgr)
    return clahe_light(up, clip=clip, grid=grid)

def make_variants(img):
    return [
        ("RAW", img),
        ("CLAHE-light", clahe_light(img, clip=2.0, grid=(8, 8))),
        ("SR2x+CLAHE-light", sr2x_plus_clahe(img, clip=2.0, grid=(8, 8))),
    ]

# ============================================================
# 4) NORMALIZATION (Optional)
#    Untuk Adaptive (initial) di paper biasanya:
#    - hanya pilih hasil OCR tertinggi (confidence-based)
#    - boleh simpan RAW_OCR cleaned untuk evaluasi
#
#    Jika evaluasi Anda butuh format fixed 10 (RJ...), Anda bisa aktifkan
#    normalize_rj_fixedpos() di bawah. Kalau tidak perlu, set USE_NORMALIZE=False.
# ============================================================
USE_NORMALIZE = True  # <-- set False jika mau pure raw OCR (tanpa normalisasi)

def normalize_rj_fixedpos(raw_text: str) -> str:
    """
    Versi ringan (tanpa plate-aware scoring/gating):
    - anchor RJ
    - enforce length 10
    - casting per slot (digit/alpha)
    - pasangan [4-5] OC/CO/0C/C0 -> QC/CQ (ini masih "normalisasi", bukan selection)
    """
    t0 = clean_raw(raw_text)

    if "RJ" in t0:
        t = t0[t0.find("RJ"):]
    else:
        t = "RJ" + t0

    if len(t) < 10:
        t = t.ljust(10, "0")
    else:
        t = t[:10]

    chars = list(t)
    chars[0], chars[1] = "R", "J"

    digit_idx = {2, 3, 6, 7, 8, 9}
    alpha_idx = {4, 5}

    to_digit = {"O": "0", "Q": "0", "D": "0", "I": "1", "L": "1", "Z": "2", "S": "5", "B": "8"}
    to_alpha = {"0": "O", "1": "I", "2": "Z", "5": "S", "8": "B"}

    for i in range(10):
        c = chars[i]
        if i in digit_idx and c in to_digit:
            chars[i] = to_digit[c]
        elif i in alpha_idx and c in to_alpha:
            chars[i] = to_alpha[c]

    pair = chars[4] + chars[5]
    pair_map = {"OC": ("Q", "C"), "CO": ("C", "Q"), "0C": ("Q", "C"), "C0": ("C", "Q")}
    if pair in pair_map:
        chars[4], chars[5] = pair_map[pair]

    return "".join(chars)

# ============================================================
# 5) ADAPTIVE (INITIAL) SELECTION
#    Pilih kandidat terbaik hanya berdasarkan CONFIDENCE OCR
#    (tanpa plate-aware scoring, tanpa gating antar-variant).
# ============================================================
def select_best_initial_confidence(variant_results):
    """
    variant_results: list of (variant_name, paddle_result)
    Return dict best or None
    """
    best = None
    for vname, pres in variant_results:
        raw_text, conf = pick_best_line(pres)
        if not raw_text:
            continue

        final_text = normalize_rj_fixedpos(raw_text) if USE_NORMALIZE else clean_raw(raw_text)

        cand = {
            "variant": vname,
            "raw_text": raw_text,
            "final_plate": final_text,
            "conf": float(conf),
        }

        if (best is None) or (cand["conf"] > best["conf"]):
            best = cand

    return best

# ============================================================
# 6) MAIN LOOP -> CSV
# ============================================================
image_files = sorted([
    f for f in os.listdir(INPUT_FOLDER)
    if f.lower().endswith((".png", ".jpg", ".jpeg"))
])

with open(OUT_CSV, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow([
        "File_Name", "Variant", "Raw_OCR", "Final_Text", "Conf_0to1"
    ])

    for fn in image_files:
        img_path = os.path.join(INPUT_FOLDER, fn)
        img = cv2.imread(img_path)

        if img is None:
            w.writerow([fn, "", "", "", 0])
            print("BAD IMAGE:", fn)
            continue

        variant_results = []
        for vname, vimg in make_variants(img):
            try:
                pres = ocr.ocr(vimg, cls=True)
            except Exception:
                pres = None
            variant_results.append((vname, pres))

        best = select_best_initial_confidence(variant_results)

        if best is None:
            w.writerow([fn, "", "", "", 0])
            print("NO TEXT:", fn)
        else:
            w.writerow([
                fn,
                best["variant"],
                best["raw_text"],
                best["final_plate"],
                round(best["conf"], 6),
            ])
            print(f"OK: {fn} | {best['variant']} | {best['raw_text']} → {best['final_plate']}")

print("DONE. Saved:", OUT_CSV)


OK: 100.png | SR2x+CLAHE-light | RJ45CK4748 → RJ45CK4748
OK: 1000.png | SR2x+CLAHE-light | RJ14CG6581 → RJ14CG6581
OK: 1001.png | CLAHE-light | RJ14CK8305 → RJ14CK8305
OK: 101.png | CLAHE-light | RJ14CZ0391 → RJ14CZ0391
OK: 102.png | SR2x+CLAHE-light | RJ45CH4583 → RJ45CH4583
OK: 103.png | SR2x+CLAHE-light | FRJ45CJ9969 → RJ45CJ9969
OK: 104.png | SR2x+CLAHE-light | RJ14LC6763 → RJ14LC6763
OK: 105.png | SR2x+CLAHE-light | RJ31CF7171 → RJ31CF7171
OK: 106.png | SR2x+CLAHE-light | RJ08CA6767 → RJ08CA6767
OK: 107.png | SR2x+CLAHE-light | RJ45CN9693 → RJ45CN9693
OK: 108.png | SR2x+CLAHE-light | RJ45CK7827 → RJ45CK7827
OK: 109.png | RAW | RJ45CQ1702 → RJ45CQ1702
OK: 111.png | RAW | RJ14CF 9928 → RJ14CF9928
OK: 112.png | SR2x+CLAHE-light | RJ45C09137 → RJ45CQ9137
OK: 113.png | CLAHE-light | RJ14CY6039 → RJ14CY6039
OK: 114.png | RAW | RJ14 CG 9094 → RJ14CG9094
OK: 115.png | RAW | FRJ14VC0628 → RJ14VC0628
OK: 116.png | SR2x+CLAHE-light | RJ14CD4810 → RJ14CD4810
OK: 118.png | SR2x+CLAHE-light | R

#EVALUATION (NO NUMPY) - ADAPTIVE (INITIAL)#

In [None]:
import csv, re

# ------------------------------------------------------
# 1) Same helpers as pipeline
# ------------------------------------------------------
def clean_raw(text):
    return re.sub(r"[^A-Z0-9]", "", str(text).upper())

def normalize_rj_fixedpos(raw_text):
    """
    HARUS sama dengan normalize_rj_fixedpos() di script Adaptive (initial).
    (anchor RJ + enforce length 10 + casting digit/alpha + pair fix QC/CQ)
    """
    t0 = clean_raw(raw_text)

    if "RJ" in t0:
        t = t0[t0.find("RJ"):]
    else:
        t = "RJ" + t0

    if len(t) < 10:
        t = t.ljust(10, "0")
    else:
        t = t[:10]

    chars = list(t)
    chars[0], chars[1] = "R", "J"

    DIGIT_IDX = {2, 3, 6, 7, 8, 9}
    ALPHA_IDX = {4, 5}
    TO_DIGIT = {'O': '0', 'Q': '0', 'D': '0', 'I': '1', 'L': '1', 'Z': '2', 'S': '5', 'B': '8'}
    TO_ALPHA = {'0': 'O', '1': 'I', '2': 'Z', '5': 'S', '8': 'B'}

    for i in range(10):
        if i in DIGIT_IDX and chars[i] in TO_DIGIT:
            chars[i] = TO_DIGIT[chars[i]]
        elif i in ALPHA_IDX and chars[i] in TO_ALPHA:
            chars[i] = TO_ALPHA[chars[i]]

    # pair fix [4-5]
    pair = chars[4] + chars[5]
    pair_map = {"OC": ("Q", "C"), "CO": ("C", "Q"), "0C": ("Q", "C"), "C0": ("C", "Q")}
    if pair in pair_map:
        chars[4], chars[5] = pair_map[pair]

    return "".join(chars)

def levenshtein(a, b):
    if a == b: return 0
    if not a: return len(b)
    if not b: return len(a)
    prev = list(range(len(b)+1))
    for i, ca in enumerate(a, 1):
        cur = [i]
        for j, cb in enumerate(b, 1):
            cur.append(min(cur[j-1]+1, prev[j]+1, prev[j-1]+(ca!=cb)))
        prev = cur
    return prev[-1]

# ------------------------------------------------------
# 2) Load Ground Truth
#    GT format: "filename<TAB>plate"
# ------------------------------------------------------
GT_PATH = r"ground truth.txt"  # <-- ganti jika perlu
gt = {}
with open(GT_PATH, "r", encoding="utf-8", errors="ignore") as f:
    for line in f:
        line = line.strip()
        if not line or "\t" not in line:
            continue
        img, plate = line.split("\t", 1)
        fname = img.split("/")[-1].split("\\")[-1]
        gt[fname] = normalize_rj_fixedpos(plate)

# ------------------------------------------------------
# 3) Load CSV Adaptive (initial)
#    CSV columns from script: File_Name, Variant, Raw_OCR, Final_Text, Conf_0to1
# ------------------------------------------------------
CSV_PATH = r"result_adaptive_initial.csv"  # <-- CSV hasil Adaptive (initial)
with open(CSV_PATH, newline="", encoding="utf-8", errors="replace") as f:
    rows = list(csv.DictReader(f))

# ------------------------------------------------------
# 4) Evaluation Metrics
#    - Readability: raw OCR not empty and conf > 0
#    - Exact Accuracy: pred == GT (normalized to fixed 10)
#    - Character Accuracy: 1 - Levenshtein/10
# ------------------------------------------------------
total = 0
unreadable = 0
readable = 0

correct_e2e = 0       # exact / total
recog_total = 0       # readable count
recog_correct = 0     # exact among readable
char_sum = 0.0        # avg char acc among readable

for r in rows:
    fname = (r.get("File_Name") or "").strip()
    if not fname or fname not in gt:
        continue

    total += 1

    raw = (r.get("Raw_OCR") or "").strip()
    conf_raw = (r.get("Conf_0to1") or "").strip()
    try:
        conf = float(conf_raw)
    except:
        conf = 1.0 if raw != "" else 0.0

    # Readability criterion (selaras dengan pipeline)
    if raw == "" or conf <= 0.0:
        unreadable += 1
        continue

    readable += 1
    recog_total += 1

    # pred di CSV sudah Final_Text (dari pipeline)
    pred_csv = (r.get("Final_Text") or "").strip()

    # Untuk aman: normalize lagi ke fixed 10 (selaras GT)
    pred = normalize_rj_fixedpos(pred_csv)
    gt_plate = gt[fname]

    if pred == gt_plate:
        recog_correct += 1
        correct_e2e += 1

    dist = levenshtein(pred, gt_plate)
    char_sum += (1 - dist/10.0)

# ------------------------------------------------------
# 5) Print Results
# ------------------------------------------------------
print("\n=== EVALUATION RESULT (ADAPTIVE INITIAL) ===")
print("Total samples       :", total)
print("Unreadable (OCR=∅)  :", unreadable)
print("Readable            :", readable)
print("----------------------------------")

if total > 0:
    print(f"Readability Rate          : {readable/total*100:.2f}%")
    print(f"End-to-End Exact Accuracy : {correct_e2e/total*100:.2f}%")
else:
    print("Readability Rate          : 0.00%")
    print("End-to-End Exact Accuracy : 0.00%")

if recog_total:
    print(f"Recognition Exact Acc     : {recog_correct/recog_total*100:.2f}%")
    print(f"Average Character Acc     : {char_sum/recog_total*100:.2f}%")
else:
    print("Recognition Exact Acc     : 0.00%")
    print("Average Character Acc     : 0.00%")




=== EVALUATION RESULT (ADAPTIVE INITIAL) ===
Total samples       : 750
Unreadable (OCR=∅)  : 0
Readable            : 750
----------------------------------
Readability Rate          : 100.00%
End-to-End Exact Accuracy : 90.93%
Recognition Exact Acc     : 90.93%
Average Character Acc     : 97.04%
