
# Answer Sheet — Group by Question Number **with Horizontal Line Stops** (EasyOCR, EXIF-safe)

Improved grouping:
- Detects **question numbers** in the **left strip** and starts a new group.
- Also detects **horizontal separator lines**. If a line appears after a question, that line **ends the current group**, even if the next question number is farther down.
- Transparent overlay; EXIF-aware; no page warp by default.

Outputs: `debug_overlay.png`, `crops/word_####.png`, `words.json`, `groups_index.json`, `groups/Q###/` folders.



## Install (run locally)
```bash
pip install easyocr opencv-python numpy pillow
# (Optional GPU) install torch+CUDA first, then: pip install easyocr
```


In [8]:

from pathlib import Path

# --- Paths ---
INPUT_PATH = Path(r"E:\EvaluationAI\Dataset\29.jpg")
OUTPUT_DIR = Path(r"E:\EvaluationAI\autoevalaioutputs5")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# --- Orientation & alignment ---
RESPECT_EXIF       = True
ALIGN_TO_A4        = False
A4_SIZE_PX         = (2480, 3508)

# --- Detection (EasyOCR/CRAFT) ---
USE_GPU            = False
TEXT_THRESHOLD     = 0.7
LINK_THRESHOLD     = 0.4
LOW_TEXT           = 0.4
MIN_SIZE           = 5
WORD_PAD           = 2
MIN_AREA_PX        = 30

# --- Left strip recognition (anchors) ---
LEFT_STRIP_R_INIT  = 0.32   # scan up to 32% width for numbers
ANCHOR_CONF_MIN    = 0.32
ANCHOR_REGEXES     = [
    r"^\(?\d{1,2}\)?$",
    r"^\(?\d{1,2}\)?[.)]$",
    r"^[Qq]\s*\d{1,2}[.)]?$",
]

# --- Grouping / border ---
LEFT_PAD_PX        = 12
TOP_PAD            = 6
BOTTOM_PAD         = 6

# --- Horizontal line detection ---
MIN_LINE_REL_LEN   = 0.55   # line must span >=55% of width
MAX_LINE_REL_THK   = 0.02   # thickness <=2% of height
MERGE_LINE_TOL_PX  = 12     # merge nearby lines within 12px

print("INPUT_PATH:", INPUT_PATH)
print("OUTPUT_DIR:", OUTPUT_DIR)


INPUT_PATH: E:\EvaluationAI\Dataset\29.jpg
OUTPUT_DIR: E:\EvaluationAI\autoevalaioutputs5


In [9]:

import os, json, math, re
from typing import List, Dict, Any, Tuple, Optional
import cv2
import numpy as np
from PIL import Image, ImageOps

def read_image_exif_bgr(path: Path) -> np.ndarray:
    im = Image.open(path)
    im = ImageOps.exif_transpose(im)
    im = im.convert("RGB")
    arr = np.array(im)
    return cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)

def ensure_img(path: Path) -> np.ndarray:
    if RESPECT_EXIF:
        return read_image_exif_bgr(path)
    img = cv2.imread(str(path))
    if img is None:
        raise FileNotFoundError(f"Could not load image: {path}")
    return img

def save_image(img_bgr: np.ndarray, name: str) -> Path:
    p = OUTPUT_DIR / name
    p.parent.mkdir(parents=True, exist_ok=True)
    cv2.imwrite(str(p), img_bgr)
    return p

def save_json(obj: Any, name: str) -> Path:
    p = OUTPUT_DIR / name
    p.parent.mkdir(parents=True, exist_ok=True)
    with open(p, "w", encoding="utf-8") as f:
        json.dump(obj, f, indent=2, ensure_ascii=False)
    return p

def poly_to_bbox(poly: np.ndarray) -> Tuple[int,int,int,int]:
    xs = poly[:,0]; ys = poly[:,1]
    x0 = int(np.floor(xs.min())); y0 = int(np.floor(ys.min()))
    x1 = int(np.ceil(xs.max()));  y1 = int(np.ceil(ys.max()))
    return x0,y0,x1,y1

def save_crop(img: np.ndarray, bbox_xyxy, path: Path):
    x0,y0,x1,y1 = bbox_xyxy
    h, w = img.shape[:2]
    x0 = max(0,x0); y0 = max(0,y0); x1 = min(w-1, x1); y1 = min(h-1, y1)
    if x1 > x0 and y1 > y0:
        path.parent.mkdir(parents=True, exist_ok=True)
        cv2.imwrite(str(path), img[y0:y1, x0:x1])


### Transparent overlay helpers

In [10]:

def draw_transparent_overlay(base_bgr, word_polys, anchor_boxes, x_border, sep_lines):
    H, W = base_bgr.shape[:2]
    canvas = base_bgr.copy()
    layer  = base_bgr.copy()

    # words (green)
    for poly in word_polys:
        cv2.polylines(layer, [poly.astype(np.int32)], True, (0,255,0), 2, lineType=cv2.LINE_AA)

    # anchor boxes (blue)
    for (x0,y0,x1,y1,text,conf) in anchor_boxes:
        cv2.rectangle(layer, (x0,y0), (x1,y1), (255,128,0), 1, lineType=cv2.LINE_AA)
        cv2.putText(layer, text, (x0, max(0,y0-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,128,0), 1, cv2.LINE_AA)

    # horizontal separators (cyan)
    for y in sep_lines:
        cv2.line(layer, (0, y), (W-1, y), (255, 255, 0), 2, lineType=cv2.LINE_AA)

    # vertical border (orange)
    cv2.line(layer, (x_border, 0), (x_border, H-1), (0,165,255), 2, lineType=cv2.LINE_AA)

    alpha = 0.55
    cv2.addWeighted(layer, alpha, canvas, 1-alpha, 0, canvas)
    return canvas


### EasyOCR detector (boxes) + left-strip recognition

In [11]:

import easyocr

_reader = None
def get_reader():
    global _reader
    if _reader is None:
        _reader = easyocr.Reader(['en'], gpu=USE_GPU, verbose=False)
    return _reader

def detect_words(img_bgr: np.ndarray) -> List[np.ndarray]:
    reader = get_reader()
    rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    results = reader.readtext(
        rgb,
        detail=1,
        paragraph=False,
        min_size=MIN_SIZE,
        text_threshold=TEXT_THRESHOLD,
        low_text=LOW_TEXT,
        link_threshold=LINK_THRESHOLD
    )
    polys: List[np.ndarray] = []
    for item in results:
        poly = np.array(item[0], dtype=np.float32)
        if poly.ndim == 2 and poly.shape[0] >= 4:
            polys.append(poly)
    return polys

def find_number_like(text: str) -> bool:
    t = text.strip()
    pats = [
        r"^\(?\d{1,2}\)?$",
        r"^\(?\d{1,2}\)?[.)]$",
        r"^[Qq]\s*\d{1,2}[.)]?$",
    ]
    return any(re.match(p, t) for p in pats)

def recognize_left_strip(img_bgr: np.ndarray, xr: float) -> List[Tuple[int,int,int,int,str,float]]:
    H, W = img_bgr.shape[:2]
    x_max = int(max(1, min(W-1, xr * W)))
    crop = img_bgr[:, :x_max].copy()
    reader = get_reader()
    rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
    results = reader.readtext(
        rgb,
        detail=1,
        paragraph=False,
        min_size=MIN_SIZE,
        text_threshold=TEXT_THRESHOLD,
        low_text=LOW_TEXT,
        link_threshold=LINK_THRESHOLD
    )
    out = []
    for poly, text, conf in results:
        if conf < ANCHOR_CONF_MIN:
            continue
        if not find_number_like(text):
            continue
        poly = np.array(poly, dtype=np.float32)
        x0,y0,x1,y1 = poly_to_bbox(poly)
        out.append((x0, y0, x1, y1, text, float(conf)))
    return out


### Horizontal separator line detection

In [12]:

def detect_horizontal_separators(img_bgr: np.ndarray,
                                 min_rel_len=MIN_LINE_REL_LEN,
                                 max_rel_thk=MAX_LINE_REL_THK,
                                 merge_tol=MERGE_LINE_TOL_PX) -> List[int]:
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    g = cv2.GaussianBlur(gray, (5,5), 0)
    # adaptive binary (text/lines white on dark background)
    bw = cv2.adaptiveThreshold(g, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                               cv2.THRESH_BINARY_INV, 35, 10)
    H, W = bw.shape[:2]
    klen = max(30, int(W * 0.35))
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (klen, 1))
    morph = cv2.morphologyEx(bw, cv2.MORPH_OPEN, kernel, iterations=1)
    morph = cv2.dilate(morph, np.ones((3,3), np.uint8), iterations=1)

    # contours as candidate lines
    cnts = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
    ys = []
    for c in cnts:
        x,y,w,h = cv2.boundingRect(c)
        if w >= min_rel_len * W and h <= max_rel_thk * H:
            ys.append(int(y + h/2))

    if not ys:
        # fallback with Hough on edges
        edges = cv2.Canny(g, 50, 150, apertureSize=3)
        lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=160,
                                minLineLength=int(min_rel_len*W),
                                maxLineGap=12)
        if lines is not None:
            for l in lines[:,0]:
                x1,y1,x2,y2 = l.tolist()
                if abs(y1 - y2) <= int(max_rel_thk * H):
                    ys.append(int((y1+y2)/2))

    if not ys:
        return []

    ys = sorted(ys)
    merged = []
    for y in ys:
        if not merged or abs(y - merged[-1]) > merge_tol:
            merged.append(y)
        else:
            merged[-1] = int((merged[-1] + y)/2)
    return merged


## Pipeline: detect → left numbers → separators → border → grouping

In [13]:

def run_pipeline():
    img = ensure_img(INPUT_PATH)
    H, W = img.shape[:2]

    # 1) Word polygons
    polys = detect_words(img)

    # 2) Left-strip numbers (anchors)
    anchors_raw = recognize_left_strip(img, LEFT_STRIP_R_INIT)
    anchors_raw.sort(key=lambda t: (t[1]+t[3]) / 2.0)

    # dedupe nearby anchors
    deduped = []
    for a in anchors_raw:
        cy = (a[1]+a[3]) / 2.0
        if deduped and abs(((deduped[-1][1]+deduped[-1][3])/2.0) - cy) < 18:
            prev = deduped[-1]
            prev_area = (prev[2]-prev[0])*(prev[3]-prev[1])
            curr_area = (a[2]-a[0])*(a[3]-a[1])
            if curr_area > prev_area:
                deduped[-1] = a
        else:
            deduped.append(a)
    anchors_raw = deduped

    # 3) All word records (+ crops)
    words = []
    crops_dir = OUTPUT_DIR / "crops"
    idx = 1
    for poly in polys:
        x0,y0,x1,y1 = poly_to_bbox(poly)
        if (x1-x0)*(y1-y0) < MIN_AREA_PX:
            continue
        cx = (x0+x1)/2.0; cy = (y0+y1)/2.0
        fname = f"word_{idx:04d}.png"
        save_crop(img, (x0,y0,x1,y1), crops_dir / fname)
        words.append({
            "id": idx,
            "poly": poly.round(2).tolist(),
            "bbox_xyxy": [int(x0),int(y0),int(x1),int(y1)],
            "center": [float(cx), float(cy)],
            "size": [int(x1-x0), int(y1-y0)],
            "crop_path": str((crops_dir / fname).as_posix())
        })
        idx += 1

    # 4) Horizontal separator lines
    seps = detect_horizontal_separators(img)

    # 5) Border: to the right of question numbers if present; else safe heuristic
    if anchors_raw:
        right_edges = [a[2] for a in anchors_raw]
        x_border = int(np.percentile(np.array(right_edges), 95) + LEFT_PAD_PX)
        x_border = min(max(x_border, int(0.08*W)), int(0.5*W))
    else:
        xs = [w["center"][0] for w in words if w["center"][0] <= 0.5*W]
        x_border = int((np.percentile(xs, 95) if xs else 0.28*W) + LEFT_PAD_PX)

    # 6) Build anchor structs
    anchors = []
    for (x0,y0,x1,y1,text,conf) in anchors_raw:
        cx = (x0+x1)/2.0; cy = (y0+y1)/2.0
        anchors.append({"bbox_xyxy":[x0,y0,x1,y1], "center":[float(cx), float(cy)], "text":text, "conf":conf})
    anchors.sort(key=lambda a: a["center"][1])

    # 7) Overlay
    overlay = draw_transparent_overlay(
        img,
        [np.array(w["poly"], dtype=np.float32) for w in words],
        [(a["bbox_xyxy"][0],a["bbox_xyxy"][1],a["bbox_xyxy"][2],a["bbox_xyxy"][3], a["text"], a["conf"]) for a in anchors],
        x_border,
        seps
    )
    save_image(overlay, "debug_overlay.png")

    # 8) Group using separators as hard stops
    groups = []
    prev_bottom = 0
    for i, anc in enumerate(anchors):
        a_y = int(anc["center"][1])

        # next anchor y (if any)
        next_a_y = int(anchors[i+1]["center"][1]) if i+1 < len(anchors) else H-1

        # first separator line below current anchor
        sep_candidates = [y for y in seps if y > a_y + 8]   # a small offset
        sep_y = min(sep_candidates) if sep_candidates else None

        # choose end boundary: the **earliest** among next separator, mid to next anchor, bottom
        mid_to_next_anchor = int((a_y + next_a_y)/2)
        y_end = min([v for v in [sep_y, mid_to_next_anchor, H-1] if v is not None])

        # start boundary is previous group's end (with padding)
        y_start = max(0, prev_bottom + TOP_PAD)
        y_end   = max(y_start+1, y_end - BOTTOM_PAD)  # ensure increasing

        grp_dir = OUTPUT_DIR / "groups" / f"Q{i+1:03d}"
        grp_dir.mkdir(parents=True, exist_ok=True)

        ax0,ay0,ax1,ay1 = anc["bbox_xyxy"]
        save_crop(img, (ax0,ay0,ax1,ay1), grp_dir / "anchor.png")

        pack = {"group": f"Q{i+1:03d}", "y_range": [int(y_start), int(y_end)],
                "anchor_text": anc["text"], "anchor_conf": anc["conf"],
                "anchor_box": anc["bbox_xyxy"], "words": []}

        for w in words:
            cx, cy = w["center"]
            if cx > x_border and y_start <= cy <= y_end:
                src = Path(w["crop_path"]); dst = grp_dir / Path(src).name
                im = cv2.imread(str(src))
                if im is not None: cv2.imwrite(str(dst), im)
                pack["words"].append({**w, "group_path": str(dst.as_posix())})
        groups.append(pack)
        prev_bottom = y_end

    if not anchors:
        # fallback single band using separators only
        bands = [0] + seps + [H-1]
        for bi in range(len(bands)-1):
            y0 = bands[bi] + TOP_PAD
            y1 = bands[bi+1] - BOTTOM_PAD
            if y1 <= y0: continue
            grp_dir = OUTPUT_DIR / "groups" / f"Q{bi+1:03d}"
            grp_dir.mkdir(parents=True, exist_ok=True)
            pack = {"group": f"Q{bi+1:03d}", "y_range": [int(y0), int(y1)],
                    "anchor_text": None, "anchor_conf": None, "anchor_box": None, "words": []}
            for w in words:
                cx, cy = w["center"]
                if cx > x_border and y0 <= cy <= y1:
                    src = Path(w["crop_path"]); dst = grp_dir / Path(src).name
                    im = cv2.imread(str(src))
                    if im is not None: cv2.imwrite(str(dst), im)
                    pack["words"].append({**w, "group_path": str(dst.as_posix())})
            groups.append(pack)

    save_json({"border_x": x_border, "width": W, "height": H, "words": words}, "words.json")
    save_json({"anchors": anchors, "separators": seps, "groups": groups}, "groups_index.json")

    print(f"Border x = {x_border} | anchors={len(anchors)} | separators={len(seps)} | words={len(words)}")
    for g in groups:
        print(f"{g['group']} | y_range={g['y_range']} | anchor={g.get('anchor_text')} | words={len(g['words'])}")

run_pipeline()


Border x = 1281 | anchors=0 | separators=1 | words=88
Q001 | y_range=[6, 1165] | anchor=None | words=13
Q002 | y_range=[1177, 4009] | anchor=None | words=37


## Inspect outputs

In [14]:

from pprint import pprint
files = sorted([p.as_posix() for p in OUTPUT_DIR.rglob("*") if p.is_file()])
pprint(files[:60])
print("... total files:", len(files))


['E:/EvaluationAI/autoevalaioutputs5/crops/word_0001.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0002.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0003.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0004.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0005.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0006.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0007.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0008.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0009.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0010.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0011.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0012.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0013.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0014.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0015.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0016.png',
 'E:/EvaluationAI/autoevalaioutputs5/crops/word_0017.png