In [94]:
import cv2
import numpy as np
import os, glob, time
import tensorflow as tf
import keras
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd, string
from ultralytics import YOLO
from concurrent.futures import ThreadPoolExecutor

In [95]:
corners_model = YOLO("detect_corners_n.pt")
info_model = YOLO("detect_info_n.pt")
predict_id_model = tf.keras.models.load_model('model_id_1.keras', compile=False, safe_mode=False)
predict_dob_model = tf.keras.models.load_model('dob_model_1_.keras', compile=False, safe_mode=False) 

In [96]:
IMG_DIR = "test_1_img"
WARP_DIR = os.path.join(IMG_DIR, "warped")
REGION_ROOT  = os.path.join(IMG_DIR, "regions")
PREVIEW_DIR = os.path.join(IMG_DIR, "preview")

os.makedirs(WARP_DIR, exist_ok=True)
os.makedirs(REGION_ROOT, exist_ok=True)

class_map_corners = {
    2: 'top_left',
    3: 'top_right',
    1: 'bottom_right',
    0: 'bottom_left'
}
class_map_info = {
    2: "name",
    1: "id",
    0: "dob"
}
# Vocabulary & encoder
char_list   = string.ascii_letters + string.digits
blank_index = len(char_list)

BLANK = len(char_list)
index_to_char = {i: c for i, c in enumerate(char_list)}

In [97]:
def find_miss_corner(coord):
    # must match this exact order:
    keys = ['top_left','top_right','bottom_left','bottom_right']
    for i,k in enumerate(keys):
        if k not in coord:
            return i
    return -1

def calculate_missed_coord_corner(coord):
    idx = find_miss_corner(coord)
    # 0 → top_left missing
    if idx == 0:
        m = (np.array(coord['top_right']) + np.array(coord['bottom_left'])) / 2
        coord['top_left'] = (2*m - coord['bottom_right']).tolist()
    # 1 → top_right missing
    elif idx == 1:
        m = (np.array(coord['top_left']) + np.array(coord['bottom_right'])) / 2
        coord['top_right'] = (2*m - coord['bottom_left']).tolist()
    # 2 → bottom_left missing
    elif idx == 2:
        m = (np.array(coord['top_left']) + np.array(coord['bottom_right'])) / 2
        coord['bottom_left'] = (2*m - coord['top_right']).tolist()
    # 3 → bottom_right missing
    elif idx == 3:
        m = (np.array(coord['bottom_left']) + np.array(coord['top_right'])) / 2
        coord['bottom_right'] = (2*m - coord['top_left']).tolist()
    return coord

def perspective_transform(image, src_pts):
    dst_pts = np.float32([[0,0],[500,0],[500,300],[0,300]])
    M = cv2.getPerspectiveTransform(src_pts, dst_pts)
    return cv2.warpPerspective(image, M, (500,300))

def process_image_corner(img_path):
   
    fname = os.path.basename(img_path)
    img_np = cv2.imread(img_path)

    res     = corners_model(img_np, imgsz=640-32, verbose=False)[0]
    boxes   = res.boxes.xyxy.cpu().numpy()
    scores  = res.boxes.conf.cpu().numpy()
    classes = res.boxes.cls.cpu().numpy().astype(int)


    # Select best-confidence box for each corner
    coord_dict = {}
    for cls_id, bbox, conf in zip(classes, boxes, scores):
        corner = class_map_corners.get(int(cls_id))
        if not corner:
            continue
        prev = coord_dict.get(corner)
        if prev is None or conf > prev[1]:
            coord_dict[corner] = (bbox, conf)

    # Calculate center points
    centers = {
        name: ((b[0]+b[2])/2, (b[1]+b[3])/2)
        for name,(b,_) in coord_dict.items()
    }

    if len(centers) < 3:
        print(f"[{fname}] ⚠️ only {len(centers)}/4 corners — saving original unchanged")
        cv2.imwrite(os.path.join(WARP_DIR, fname), img_np)
        return
    if len(centers) == 3:
        centers = calculate_missed_coord_corner(centers)

    # Warp and save
    src = np.float32([
        centers['top_left'],
        centers['top_right'],
        centers['bottom_right'],
        centers['bottom_left']
    ])
    warp = perspective_transform(img_np, src)
    out_name = os.path.splitext(fname)[0] + ".jpg"
    out_path = os.path.join(WARP_DIR, out_name)
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    cv2.imwrite(out_path, warp)


def process_image_infor(img_path):
    fname = os.path.basename(img_path)
    base, ext = os.path.splitext(fname)

    # 1) Run inference
    res     = info_model(img_path, imgsz=480, conf=0.25, verbose=False)[0]
    boxes   = res.boxes.xyxy.cpu().numpy()
    classes = res.boxes.cls.cpu().numpy().astype(int)

    # 2) For each detection, crop and save
    img = cv2.imread(img_path)
    for box, cls_id in zip(boxes, classes):
        field = class_map_info.get(int(cls_id))
        if field is None:
            continue

        x1, y1, x2, y2 = box.astype(int)
        crop = img[y1:y2, x1:x2]

        out_name = f"{base}.jpg"
        out_path = os.path.join(REGION_ROOT, field, out_name)
        os.makedirs(os.path.dirname(out_path), exist_ok=True)
        cv2.imwrite(out_path, crop)


def preprocess(img, target_h=15, max_w=128):
    """
    Resize grayscale img so its height == target_h, then
    scale width, and pad or truncate to max_w. Returns
    a float32 array shape (target_h, max_w, 1) in [0,1].
    """
    h, w = img.shape[:2]

    # 1) Scale so height == target_h
    scale = target_h / float(h)
    new_w = int(w * scale)
    img = cv2.resize(img, (new_w, target_h))

    # 2) Pad or truncate width to max_w
    if new_w < max_w:
        # pad right side with white (255)
        pad = np.ones((target_h, max_w - new_w), dtype=img.dtype) * 255
        img = np.concatenate([img, pad], axis=1)
    else:
        # truncate any extras on the right
        img = img[:, :max_w]

    # 3) Normalize to [0,1] and add channel dim
    img = img.astype('float32') / 255.0
    return img[..., np.newaxis]  # shape (32, max_w, 1)
char_to_idx = {c:i for i,c in enumerate(char_list)}
def encode_to_labels(txt):
    return [char_to_idx[c] for c in txt if c in char_to_idx]

def predict_texts(model, paths, target_h):
    imgs, bases = [], []
    for p in paths:
        raw = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
        img = preprocess(raw, target_h=target_h, max_w=128)
        imgs.append(img)
        bases.append(os.path.splitext(os.path.basename(p))[0])
    imgs = np.stack(imgs, axis=0)

    y_pred = model.predict(imgs)
    input_len = np.full((y_pred.shape[0],), y_pred.shape[1], dtype='int32')
    decoded, _ = K.ctc_decode(y_pred, input_length=input_len, greedy=True)
    decoded = decoded[0].numpy()

    texts = []
    for seq in decoded:
        s = ''.join(index_to_char[i] for i in seq if 0 <= i < len(char_list))
        texts.append(s)
    return bases, texts

def format_dob(s):
    return f"{s[:2]}/{s[2:4]}/{s[4:8]}{s[8:]}" if len(s) >= 8 else s

In [98]:
def predict():


    #detect 4 corners and save warped images
    t1 = time.perf_counter()
    img_paths = sorted(glob.glob(os.path.join(IMG_DIR, "*.*")))
    with ThreadPoolExecutor(max_workers=6) as executor:
        executor.map(process_image_corner, img_paths )
    print(f"✅ Corners & warp done in {time.perf_counter() - t1:.2f} sec")

    #detect info and save regions
    t2 = time.perf_counter()
    image_paths = sorted(glob.glob(os.path.join(WARP_DIR, "*.*")))

    with ThreadPoolExecutor(max_workers=6) as executor:
        executor.map(process_image_infor, image_paths)
    print(f"✅ Info detection done in {time.perf_counter() - t2:.2f} sec")

    #Predict ID and DOB and save to CSV
    t3 = time.perf_counter()
    id_paths = sorted(glob.glob(os.path.join(REGION_ROOT, "id","*.jpg")))
    id_bases, id_preds = predict_texts(predict_id_model, id_paths, target_h=15)

    dob_paths = sorted(glob.glob(os.path.join(REGION_ROOT, "dob","*.jpg")))
    _, dob_preds = predict_texts(predict_dob_model, dob_paths, target_h=20)

    dob_preds = [format_dob(s) for s in dob_preds]
    df = pd.DataFrame({'image_file': id_bases, 'id': id_preds, 'dob': dob_preds})
    df.to_csv('predictions.csv', index=False)
    print(f"✅ Predictions done and saved in {time.perf_counter() - t3:.2f} sec")


In [135]:
predict()

✅ Corners & warp done in 0.06 sec
✅ Info detection done in 0.03 sec
✅ Predictions done and saved in 0.10 sec
