
# Answer Sheet — Word Segmentation & Grouping (**EasyOCR CRAFT**, No Paddle)

This notebook segments **every word** using **EasyOCR**'s CRAFT detector (recognition ignored) and groups right-side words by **left-column anchors** separated by a vertical **border**.

**Highlights**
- **EXIF-aware** image loading (fixes rotation mismatches)
- Optional page **warp off** (so overlays don't shift/hide tokens)
- **Transparent** overlay (no boxes covering text)
- Auto or fixed **border**; outputs per-anchor groups (`groups/Q001`, `Q002`, …)
- Saves: `debug_overlay.png`, `crops/word_####.png`, `words.json`, `groups_index.json`



## Install (run locally, not here)
```bash
pip install easyocr opencv-python numpy pillow
# Optional GPU (install torch+CUDA first)
# pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
# then: pip install easyocr
```


In [6]:

from pathlib import Path

# --- Paths ---
INPUT_PATH = Path(r"E:\EvaluationAI\Dataset\30.jpg")     # change if needed
OUTPUT_DIR = Path(r"E:\EvaluationAI\autoevalaioutputs30")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# --- Orientation & alignment ---
RESPECT_EXIF       = True     # read with EXIF rotation
ALIGN_TO_A4        = False    # keep False to avoid overlay shifts while debugging
A4_SIZE_PX         = (2480, 3508)  # (W, H) if you later enable warping

# --- Detection params (EasyOCR -> CRAFT under the hood) ---
USE_GPU            = False
TEXT_THRESHOLD     = 0.7
LINK_THRESHOLD     = 0.4
LOW_TEXT           = 0.4
MIN_SIZE           = 5         # min text height
WORD_PAD           = 2         # pad around each bbox
MIN_AREA_PX        = 30        # drop tiny boxes

# --- Border / anchors ---
BORDER_MODE        = "fixed"   # "fixed" or "auto"
FIXED_BORDER_RATIO = 0.26      # 26% of width; tune per template
LEFT_PAD_PX        = 12        # when auto: push border slightly right of anchors

# --- Grouping pads ---
TOP_PAD            = 6
BOTTOM_PAD         = 6

print("INPUT_PATH:", INPUT_PATH)
print("OUTPUT_DIR:", OUTPUT_DIR)


INPUT_PATH: E:\EvaluationAI\Dataset\30.jpg
OUTPUT_DIR: E:\EvaluationAI\autoevalaioutputs30


In [7]:

import os, json, math
from typing import List, Dict, Any, Tuple, Optional
import cv2
import numpy as np
from PIL import Image, ImageOps

def read_image_exif_bgr(path: Path) -> np.ndarray:
    im = Image.open(path)
    im = ImageOps.exif_transpose(im)  # respect EXIF orientation
    im = im.convert("RGB")
    arr = np.array(im)                # RGB
    return cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)

def ensure_img(path: Path) -> np.ndarray:
    if RESPECT_EXIF:
        return read_image_exif_bgr(path)
    img = cv2.imread(str(path))
    if img is None:
        raise FileNotFoundError(f"Could not load image: {path}")
    return img

def save_image(img_bgr: np.ndarray, name: str) -> Path:
    p = OUTPUT_DIR / name
    p.parent.mkdir(parents=True, exist_ok=True)
    cv2.imwrite(str(p), img_bgr)
    return p

def save_json(obj: Any, name: str) -> Path:
    p = OUTPUT_DIR / name
    p.parent.mkdir(parents=True, exist_ok=True)
    with open(p, "w", encoding="utf-8") as f:
        json.dump(obj, f, indent=2, ensure_ascii=False)
    return p

def poly_to_bbox(poly: np.ndarray) -> Tuple[int,int,int,int]:
    xs = poly[:,0]; ys = poly[:,1]
    x0 = int(np.floor(xs.min())); y0 = int(np.floor(ys.min()))
    x1 = int(np.ceil(xs.max()));  y1 = int(np.ceil(ys.max()))
    return x0,y0,x1,y1

def save_crop(img: np.ndarray, bbox_xyxy, path: Path):
    x0,y0,x1,y1 = bbox_xyxy
    h, w = img.shape[:2]
    x0 = max(0,x0); y0 = max(0,y0); x1 = min(w-1, x1); y1 = min(h-1, y1)
    if x1 > x0 and y1 > y0:
        path.parent.mkdir(parents=True, exist_ok=True)
        cv2.imwrite(str(path), img[y0:y1, x0:x1])

def draw_transparent_boxes(base_bgr, polys, anchors, x_border):
    H, W = base_bgr.shape[:2]
    canvas = base_bgr.copy()
    layer  = base_bgr.copy()

    # all word polygons (green)
    for poly in polys:
        cv2.polylines(layer, [poly.astype(np.int32)], True, (0,255,0), 2, lineType=cv2.LINE_AA)

    # anchors (red thin)
    for a in anchors:
        x0,y0,x1,y1 = a["bbox_xyxy"]
        cv2.rectangle(layer, (x0,y0), (x1,y1), (0,0,255), 1, lineType=cv2.LINE_AA)

    # border (orange)
    cv2.line(layer, (x_border, 0), (x_border, H-1), (0,165,255), 2, lineType=cv2.LINE_AA)

    # blend to avoid hiding text
    alpha = 0.55
    cv2.addWeighted(layer, alpha, canvas, 1-alpha, 0, canvas)
    return canvas

def auto_border_from_left(words, W, pad_px=12):
    xs = [w["center"][0] for w in words if w["center"][0] <= 0.5*W]
    if not xs:
        return int(0.28 * W)
    left_edge = int(np.percentile(xs, 95)) + pad_px
    return min(max(left_edge, int(0.10*W)), int(0.80*W))


## EasyOCR detector (boxes only)

In [8]:

import easyocr

_reader = None
def get_reader():
    global _reader
    if _reader is None:
        _reader = easyocr.Reader(['en'], gpu=USE_GPU, verbose=False)
    return _reader

def run_easyocr_detector(img_bgr: np.ndarray) -> List[np.ndarray]:
    reader = get_reader()
    rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    results = reader.readtext(
        rgb,
        detail=1,               # [[poly, text, conf], ...]
        paragraph=False,
        min_size=MIN_SIZE,
        text_threshold=TEXT_THRESHOLD,
        low_text=LOW_TEXT,
        link_threshold=LINK_THRESHOLD
    )
    polys: List[np.ndarray] = []
    for item in results:
        poly = np.array(item[0], dtype=np.float32)
        if poly.ndim == 2 and poly.shape[0] >= 4:
            polys.append(poly)
    return polys


## Run segmentation → border → grouping

In [9]:

def run_pipeline():
    img = ensure_img(INPUT_PATH)

    H, W = img.shape[:2]
    polys = run_easyocr_detector(img)

    words = []
    crops_dir = OUTPUT_DIR / "crops"
    idx = 1
    for poly in polys:
        x0,y0,x1,y1 = poly_to_bbox(poly)
        if (x1-x0)*(y1-y0) < MIN_AREA_PX:
            continue
        cx = (x0+x1)/2.0; cy = (y0+y1)/2.0
        fname = f"word_{idx:04d}.png"
        save_crop(img, (x0,y0,x1,y1), crops_dir / fname)
        words.append({
            "id": idx,
            "poly": poly.round(2).tolist(),
            "bbox_xyxy": [int(x0),int(y0),int(x1),int(y1)],
            "center": [float(cx), float(cy)],
            "size": [int(x1-x0), int(y1-y0)],
            "crop_path": str((crops_dir / fname).as_posix())
        })
        idx += 1

    # border
    if BORDER_MODE == "fixed":
        x_border = int(FIXED_BORDER_RATIO * W)
    else:
        x_border = auto_border_from_left(words, W, pad_px=LEFT_PAD_PX)

    # anchors
    anchors = [w for w in words if w["center"][0] <= x_border]
    anchors.sort(key=lambda w: w["center"][1])

    # overlay
    overlay = draw_transparent_boxes(
        img,
        [np.array(w["poly"], dtype=np.float32) for w in words],
        anchors,
        x_border
    )
    save_image(overlay, "debug_overlay.png")

    # grouping
    groups = []
    if anchors:
        edges_y = [0] + [int(a["center"][1]) for a in anchors] + [H-1]
        bounds = []
        for i in range(1, len(edges_y)-1):
            y_mid_prev = int((edges_y[i-1] + edges_y[i]) / 2)
            y_mid_next = int((edges_y[i]   + edges_y[i+1]) / 2)
            y0 = max(0, y_mid_prev + TOP_PAD)
            y1 = min(H-1, y_mid_next - BOTTOM_PAD)
            bounds.append((i, y0, y1))

        for gi, y0, y1 in bounds:
            grp_dir = OUTPUT_DIR / "groups" / f"Q{gi:03d}"
            grp_dir.mkdir(parents=True, exist_ok=True)

            anc = anchors[gi-1]
            ax0,ay0,ax1,ay1 = anc["bbox_xyxy"]
            save_crop(img, (ax0,ay0,ax1,ay1), grp_dir / "anchor.png")

            pack = {"group": f"Q{gi:03d}", "y_range": [y0,y1], "anchor_id": anc["id"], "words": []}
            for w in words:
                cx, cy = w["center"]
                if cx > x_border and y0 <= cy <= y1:
                    src = Path(w["crop_path"]); dst = grp_dir / Path(src).name
                    im = cv2.imread(str(src))
                    if im is not None: cv2.imwrite(str(dst), im)
                    pack["words"].append({**w, "group_path": str(dst.as_posix())})
            groups.append(pack)
    else:
        grp_dir = OUTPUT_DIR / "groups" / "Q001"
        grp_dir.mkdir(parents=True, exist_ok=True)
        pack = {"group": "Q001", "y_range": [0,H-1], "anchor_id": None, "words": []}
        for w in words:
            cx = w["center"][0]
            if cx > x_border:
                src = Path(w["crop_path"]); dst = grp_dir / Path(src).name
                im = cv2.imread(str(src))
                if im is not None: cv2.imwrite(str(dst), im)
                pack["words"].append({**w, "group_path": str(dst.as_posix())})
        groups.append(pack)

    save_json({"border_x": x_border, "width": W, "height": H, "words": words}, "words.json")
    save_json({"groups": groups}, "groups_index.json")

    print(f"Border x = {x_border} (W={W}) | anchors={len(anchors)} | total words={len(words)}")
    for g in groups:
        print(f"{g['group']}: {len(g['words'])} right-side words; y_range={g['y_range']}")

run_pipeline()


Border x = 636 (W=2448) | anchors=14 | total words=79
Q001: 5 right-side words; y_range=[257, 713]
Q002: 9 right-side words; y_range=[725, 1034]
Q003: 5 right-side words; y_range=[1046, 1246]
Q004: 4 right-side words; y_range=[1258, 1416]
Q005: 0 right-side words; y_range=[1428, 1569]
Q006: 1 right-side words; y_range=[1581, 1853]
Q007: 12 right-side words; y_range=[1865, 2146]
Q008: 3 right-side words; y_range=[2158, 2343]
Q009: 5 right-side words; y_range=[2355, 2602]
Q010: 6 right-side words; y_range=[2614, 2886]
Q011: 3 right-side words; y_range=[2898, 3190]
Q012: 10 right-side words; y_range=[3202, 3461]
Q013: 0 right-side words; y_range=[3473, 3555]
Q014: 2 right-side words; y_range=[3567, 3721]


## Inspect outputs

In [10]:

from pprint import pprint
files = sorted([p.as_posix() for p in OUTPUT_DIR.rglob("*") if p.is_file()])
pprint(files[:60])
print("... total files:", len(files))


['E:/EvaluationAI/autoevalaioutputs30/crops/word_0001.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0002.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0003.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0004.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0005.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0006.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0007.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0008.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0009.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0010.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0011.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0012.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0013.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0014.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0015.png',
 'E:/EvaluationAI/autoevalaioutputs30/crops/word_0016.png',
 'E:/EvaluationAI/autoevalaioutputs30/cr