In [1]:
import cv2
import numpy as np
import glob
import os, glob, cv2, numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd, string
import os, glob, cv2
import numpy as np
from ultralytics import YOLO


In [2]:
# ── Helper functions ─────────────────────────────────────────────────────────

def find_miss_corner(coord):
    # must match this exact order:
    keys = ['top_left','top_right','bottom_left','bottom_right']
    for i,k in enumerate(keys):
        if k not in coord:
            return i
    return -1

def calculate_missed_coord_corner(coord):
    idx = find_miss_corner(coord)
    # 0 → top_left missing
    if idx == 0:
        m = (np.array(coord['top_right']) + np.array(coord['bottom_left'])) / 2
        coord['top_left'] = (2*m - coord['bottom_right']).tolist()
    # 1 → top_right missing
    elif idx == 1:
        m = (np.array(coord['top_left']) + np.array(coord['bottom_right'])) / 2
        coord['top_right'] = (2*m - coord['bottom_left']).tolist()
    # 2 → bottom_left missing
    elif idx == 2:
        m = (np.array(coord['top_left']) + np.array(coord['bottom_right'])) / 2
        coord['bottom_left'] = (2*m - coord['top_right']).tolist()
    # 3 → bottom_right missing
    elif idx == 3:
        m = (np.array(coord['bottom_left']) + np.array(coord['top_right'])) / 2
        coord['bottom_right'] = (2*m - coord['top_left']).tolist()
    return coord

def perspective_transform(image, src_pts):
    dst_pts = np.float32([[0,0],[500,0],[500,300],[0,300]])
    M = cv2.getPerspectiveTransform(src_pts, dst_pts)
    return cv2.warpPerspective(image, M, (500,300))

In [None]:
corners_model = YOLO("detect_corners_n.pt")

In [None]:
import glob, os, cv2
import numpy as np
from concurrent.futures import ThreadPoolExecutor

IMG_DIR  = "testset1"
PREV_DIR = os.path.join(IMG_DIR, "4_corners")
WARP_DIR = os.path.join(IMG_DIR, "warped")

os.makedirs(PREV_DIR, exist_ok=True)
os.makedirs(WARP_DIR, exist_ok=True)

class_map = {
    2: 'top_left',
    3: 'top_right',
    1: 'bottom_right',
    0: 'bottom_left'
}

def process_image(img_path):
    
    fname = os.path.basename(img_path)
    img_np = cv2.imread(img_path)

    res     = corners_model(img_np, imgsz=640-32, verbose=False)[0]
    boxes   = res.boxes.xyxy.cpu().numpy()
    scores  = res.boxes.conf.cpu().numpy()
    classes = res.boxes.cls.cpu().numpy().astype(int)

    # Save annotated preview
    annotated = res.plot()
    cv2.imwrite(os.path.join(PREV_DIR, fname), annotated)

    # Select best-confidence box for each corner
    coord_dict = {}
    for cls_id, bbox, conf in zip(classes, boxes, scores):
        corner = class_map.get(int(cls_id))
        if not corner:
            continue
        prev = coord_dict.get(corner)
        if prev is None or conf > prev[1]:
            coord_dict[corner] = (bbox, conf)

    # Calculate center points
    centers = {
        name: ((b[0]+b[2])/2, (b[1]+b[3])/2)
        for name,(b,_) in coord_dict.items()
    }

    if len(centers) < 3:
        print(f"[{fname}] ⚠️ only {len(centers)}/4 corners — saving original unchanged")
        count+=1
        cv2.imwrite(os.path.join(WARP_DIR, fname), img_np)
        return
    if len(centers) == 3:
        centers = calculate_missed_coord_corner(centers)

    # Warp and save
    src = np.float32([
        centers['top_left'],
        centers['top_right'],
        centers['bottom_right'],
        centers['bottom_left']
    ])
    warp = perspective_transform(img_np, src)
    out_name = os.path.splitext(fname)[0] + ".jpg"
    cv2.imwrite(os.path.join(WARP_DIR, out_name), warp)

# Run in parallel (4 threads)
img_paths = sorted(glob.glob(os.path.join(IMG_DIR, "*.*")))
with ThreadPoolExecutor(max_workers=4) as executor:
    executor.map(process_image, img_paths)

print("✅ All images processed.")

[img1278.jpg] ⚠️ only 2/4 corners — saving original unchanged
[img1280.jpg] ⚠️ only 2/4 corners — saving original unchanged
[img1283.jpg] ⚠️ only 2/4 corners — saving original unchanged
[img1287.jpg] ⚠️ only 2/4 corners — saving original unchanged
[img1294.jpg] ⚠️ only 2/4 corners — saving original unchanged
[img1297.jpg] ⚠️ only 0/4 corners — saving original unchanged
[img1299.jpg] ⚠️ only 2/4 corners — saving original unchanged
[img1333.jpg] ⚠️ only 2/4 corners — saving original unchanged
[img1375.jpg] ⚠️ only 0/4 corners — saving original unchanged
[img1621.jpg] ⚠️ only 2/4 corners — saving original unchanged
[img932.jpg] ⚠️ only 2/4 corners — saving original unchanged
✅ All images processed.


In [214]:
# 1) Load your info-detection model
info_model = YOLO("detect_info_n.pt")

# 2) Folders
WARP_DIR     = "testset1/warped"
REGION_ROOT  = "testset1/regions"
os.makedirs(REGION_ROOT, exist_ok=True)

# 3) Map model class IDs → field names
#    adjust these IDs to whatever your model uses
class_map_info = {
    2: "name",
    1: "id",
    0: "dob"
}

# Create one subfolder per field
for field in class_map_info.values():
    os.makedirs(os.path.join(REGION_ROOT, field), exist_ok=True)

In [216]:
def process_image(img_path):
    fname = os.path.basename(img_path)
    base, ext = os.path.splitext(fname)

    # 1) Run inference
    res     = info_model(img_path, imgsz=480, conf=0.25, verbose=False)[0]
    boxes   = res.boxes.xyxy.cpu().numpy()
    classes = res.boxes.cls.cpu().numpy().astype(int)

    # 2) For each detection, crop and save
    img = cv2.imread(img_path)
    for box, cls_id in zip(boxes, classes):
        field = class_map_info.get(int(cls_id))
        if field is None:
            continue

        x1, y1, x2, y2 = box.astype(int)
        crop = img[y1:y2, x1:x2]

        out_name = f"{base}.jpg"
        out_path = os.path.join(REGION_ROOT, field, out_name)
        os.makedirs(os.path.dirname(out_path), exist_ok=True)
        cv2.imwrite(out_path, crop)

# Use thread pool to process multiple images concurrently
image_paths = sorted(glob.glob(os.path.join(WARP_DIR, "*.*")))

with ThreadPoolExecutor(max_workers=6) as executor:
    executor.map(process_image, image_paths)

print("\n✅ Done! Cropped field images in:", REGION_ROOT)



✅ Done! Cropped field images in: testset1/regions


In [226]:
import cv2
import numpy as np

def preprocess(img, target_h=15, max_w=128):
    """
    Resize grayscale img so its height == target_h, then
    scale width, and pad or truncate to max_w. Returns
    a float32 array shape (target_h, max_w, 1) in [0,1].
    """
    h, w = img.shape[:2]

    # 1) Scale so height == target_h
    scale = target_h / float(h)
    new_w = int(w * scale)
    img = cv2.resize(img, (new_w, target_h))

    # 2) Pad or truncate width to max_w
    if new_w < max_w:
        # pad right side with white (255)
        pad = np.ones((target_h, max_w - new_w), dtype=img.dtype) * 255
        img = np.concatenate([img, pad], axis=1)
    else:
        # truncate any extras on the right
        img = img[:, :max_w]

    # 3) Normalize to [0,1] and add channel dim
    img = img.astype('float32') / 255.0
    return img[..., np.newaxis]  # shape (32, max_w, 1)

# Vocabulary & encoder
char_list   = string.ascii_letters + string.digits
blank_index = len(char_list)
char_to_idx = {c:i for i,c in enumerate(char_list)}

def encode_to_labels(txt):
    return [char_to_idx[c] for c in txt if c in char_to_idx]


In [254]:
import glob
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt

In [256]:
# 2) Now load via standalone Keras
import keras
model = tf.keras.models.load_model('model_id_1.keras', compile=False, safe_mode=False)

In [243]:
import glob
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt

# 3) You can now do inference as usual
#    e.g. y_pred = model.predict(batch_of_images)


# 3) Prepare your mappings
BLANK = len(char_list)
index_to_char = {i: c for i, c in enumerate(char_list)}

# 4) Load + preprocess all validation images
valid_paths = sorted(glob.glob('testset1/regions/id/*.jpg'))
imgs, bases = [], []
for p in valid_paths:
    raw = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
    img = preprocess(raw, target_h=15, max_w=128)   # → (15, w, 1)
    imgs.append(img)
    bases.append(os.path.splitext(os.path.basename(p))[0])
imgs = np.stack(imgs, axis=0)    # → (N_valid, 15, w, 1)

# 5) Predict the per-timestep softmax
y_pred = model.predict(imgs)  
#    shape = (N_valid, time_steps, num_classes)

# 6) Build input lengths for CTC
input_len = np.ones((y_pred.shape[0],), dtype='int32') * y_pred.shape[1]

# 7) Greedy CTC decode
decoded, _ = K.ctc_decode(    
    y_pred,
    input_length=input_len,
    greedy=True
)
decoded = decoded[0].numpy()     # → (N_valid, ≤time_steps)

# 8) Convert integer sequences back to strings
pred_texts = []

for seq in decoded:
    chars = [index_to_char[i] for i in seq if 0 <= i < len(char_list)]
    pred_texts.append(''.join(chars))

# for path, base, pred in zip(valid_paths, bases, pred_texts):
#     img = cv2.imread(path)
#     rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
#     plt.figure()
#     plt.imshow(rgb)              # display as grayscale
#     plt.title(f"Predicted: {pred}")
#     plt.axis('off')
#     plt.show()

print(pred_texts)


['052201008654', '094204000594', '049300009355', '066203002942', '042203008521', '048203006839', '034203011524', '082203007927', '049300009355', '058304002184', '033206007176', '086203000075', '058304000867', '082203000690', '056201009956', '075203019088', '075178009364', '095202002441', '033206007176', '024202006474', '056205011350', '094204000594', '08620300075', '049303000052', '066203002942', '080203009952', '042205007349', '060203003122', '080203009952', '022190002155', '024202006474', '067305005161', '022206004066', '075303000545', '067305005161', '058205000955', '054187010523', '075303000545', '079206032383', '079203034457', '079308045547', '079203021222', '022206004066', '086203009857', '042168010024', '049203011774', '082203019158', '089203011696', '042203008521', '040203008081', '048304006054', '033089011981', '089203011672', '079089000970', '079203030140', '067203000435', '082203000690', '079203021222', '075087004519', '075203019088', '042168010024', '075203019088', '0262050

In [246]:
import tensorflow as tf
from tensorflow.keras import backend as K

# 1.1) Enable XLA
tf.config.optimizer.set_jit(True)

# 1.2) Wrap your model call + CTC‐decode in a tf.function
@tf.function(input_signature=[tf.TensorSpec([None,20,128,1], tf.float32)])
def infer_and_decode_dob(batch_images):
    # batch_images: [B,15,128,1]
    y = model(batch_images, training=False)  # (B, T, C)
    # build a length vector [T,T,...]
    lengths = tf.fill([tf.shape(y)[0]], tf.shape(y)[1])
    decoded, _ = K.ctc_decode(y, input_length=lengths, greedy=True)
    return decoded[0]   # shape: (B, ≤T)

In [250]:
import glob
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt

# 2) Now load via standalone Keras
import keras
model = tf.keras.models.load_model('dob_model_1_.keras', compile=False, safe_mode=False)

# 3) You can now do inference as usual
#    e.g. y_pred = model.predict(batch_of_images)


# 3) Prepare your mappings
BLANK = len(char_list)
index_to_char = {i: c for i, c in enumerate(char_list)}

# 4) Load + preprocess all validation images
valid_paths = sorted(glob.glob('testset1/regions/dob/*.jpg'))
imgs, bases = [], []
for p in valid_paths:
    raw = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
    img = preprocess(raw, target_h=20, max_w=128)   # → (15, w, 1)
    imgs.append(img)
    bases.append(os.path.splitext(os.path.basename(p))[0])
imgs = np.stack(imgs, axis=0)    # → (N_valid, 15, w, 1)

# 5) Predict the per-timestep softmax
batch_size = 13
all_seqs = []
for i in range(0, len(imgs), batch_size):
    batch = imgs[i:i+batch_size].astype('float32')
    seqs = infer_and_decode_dob(tf.constant(batch)).numpy()  # one graph launch
    all_seqs.extend(seqs)  
#    shape = (N_valid, time_steps, num_classes)

    # → (N_valid, ≤time_steps)

# 8) Convert integer sequences back to strings
pred_texts = []

for seq in all_seqs:
    # turn indices → raw digit string
    s = ''.join(index_to_char[i] for i in seq if 0 <= i < len(char_list))

    # if we have at least 8 digits, format the DOB portion
    if len(s) >= 8:
        dd, mm, yyyy = s[:2], s[2:4], s[4:8]
        rest = s[8:]   # any extra trailing digits
        s = f"{dd}/{mm}/{yyyy}{rest}"

    pred_texts.append(s)

print(pred_texts)


['24/03/2001', '05/09/2004', '14/05/2000', '09/12/2003', '10/07/2003', '13/04/2003', '27/02/2003', '23/08/2003', '14/05/2000', '31/10/2004', '19/06/2006', '21/11/2003', '28/10/2004', '26/05/2003', '20/06/2001', '17/11/2003', '20/10/1978', '17/11/2002', '19/06/2006', '07/03/2002', '25/11/2005', '05/09/2004', '21/11/2003', '17/04/2003', '09/12/2003', '27/10/2003', '29/07/2005', '28/05/2003', '27/10/2003', '23/05/1990', '07/03/2002', '04/03/2005', '03/12/2006', '19/03/2003', '04/03/2005', '20/70/62005', '10/06/1987', '19/03/2003', '26/08/2006', '22/02/2003', '08/05/2008', '03/10/2003', '03/12/2006', '9122003', '09/09/1968', '27/10/2003', '22/11/2003', '25/11/2003', '10/07/2003', '05/02/2003', '20/11/2004', '12/07/1989', '02/01/02003', '01/10/1989', '14/04/2003', '30/04/2003', '26/05/2003', '03/10/2003', '03/11/1987', '17/11/2003', '09/09/1968', '17/11/2003', '23/08/2005', '15/11/2003', '21/10/2003', '10/07/2003', '14/04/2003', '04/12/2003', '14/05/2003', '0712003', '02/01/2003', '22/03/20