# FINAL EAST Implementation (70.78% F1)

**This implementation achieves:**
- Precision: 68.79%
- Recall: 72.89%
- **F1-Score: 70.78%** ✅

**Processing all 500 ICDAR 2015 images**

## 1. Imports and Configuration

In [1]:
import os
import glob
import cv2
import numpy as np
from tqdm.notebook import tqdm

print("✓ Imports successful")

✓ Imports successful


In [2]:
# Configuration
TARGET_DIM = 1280
CONFIDENCE_THRESH = 0.8
NMS_THRESH = 0.2
MEAN_VALS = (123.68, 116.78, 103.94)

# EAST output layers
EAST_OUTPUT_LAYERS = [
    "feature_fusion/Conv_7/Sigmoid",
    "feature_fusion/concat_3"
]

# Paths (adjusted for your structure)
EAST_MODEL_PATH = "models/frozen_east_text_detection.pb"
ICDAR_IMAGE_DIR = "data/icdar2015/test_images"
ICDAR_GT_DIR = "icdar_eval/gt"
EAST_OUTPUT_DIR = "outputs/east_final_results"

os.makedirs(EAST_OUTPUT_DIR, exist_ok=True)

print(f"Configuration:")
print(f"  Model: {EAST_MODEL_PATH}")
print(f"  Images: {ICDAR_IMAGE_DIR}")
print(f"  GT: {ICDAR_GT_DIR}")
print(f"  Output: {EAST_OUTPUT_DIR}")
print(f"  Target dim: {TARGET_DIM}px")
print(f"  Confidence: {CONFIDENCE_THRESH}")
print(f"  NMS: {NMS_THRESH}")

Configuration:
  Model: models/frozen_east_text_detection.pb
  Images: data/icdar2015/test_images
  GT: icdar_eval/gt
  Output: outputs/east_final_results
  Target dim: 1280px
  Confidence: 0.8
  NMS: 0.2


## 2. Load EAST Model

In [3]:
if not os.path.isfile(EAST_MODEL_PATH):
    raise FileNotFoundError(f"EAST model not found at: {EAST_MODEL_PATH}")

net = cv2.dnn.readNet(EAST_MODEL_PATH)
print("✓ EAST model loaded successfully!")

✓ EAST model loaded successfully!


## 3. Preprocessing Functions

In [4]:
def letterbox_image(image, target_dim=TARGET_DIM):
    """
    Resize image preserving aspect ratio, pad to make divisible by 32.
    """
    orig_h, orig_w = image.shape[:2]
    longest_side = max(orig_h, orig_w)
    scale = target_dim / float(longest_side)

    new_w = int(round(orig_w * scale))
    new_h = int(round(orig_h * scale))

    resized = cv2.resize(image, (new_w, new_h))

    pad_w = (32 - (new_w % 32)) % 32
    pad_h = (32 - (new_h % 32)) % 32

    padded_w = new_w + pad_w
    padded_h = new_h + pad_h

    padded = np.zeros((padded_h, padded_w, 3), dtype=resized.dtype)
    padded[0:new_h, 0:new_w] = resized

    return padded, scale, pad_w, pad_h, orig_h, orig_w


def make_blob(padded_image):
    """
    Convert padded image to blob for EAST.
    """
    h, w = padded_image.shape[:2]
    blob = cv2.dnn.blobFromImage(
        padded_image,
        scalefactor=1.0,
        size=(w, h),
        mean=MEAN_VALS,
        swapRB=True,
        crop=False
    )
    return blob

print("✓ Preprocessing functions defined")

✓ Preprocessing functions defined


## 4. Decoding Functions (Rotated Boxes)

In [5]:
def decode_rotated_predictions(scores, geometry, conf_threshold):
    """
    Decode EAST output into rotated rectangles.
    """
    assert scores.shape[0] == 1
    assert geometry.shape[0] == 1

    height = scores.shape[2]
    width  = scores.shape[3]

    boxes = []
    confidences = []

    for y in range(height):
        scores_data  = scores[0, 0, y]
        x0_data      = geometry[0, 0, y]
        x1_data      = geometry[0, 1, y]
        x2_data      = geometry[0, 2, y]
        x3_data      = geometry[0, 3, y]
        angles_data  = geometry[0, 4, y]

        for x in range(width):
            score = scores_data[x]
            if score < conf_threshold:
                continue

            angle = angles_data[x]
            cos_a = np.cos(angle)
            sin_a = np.sin(angle)

            h = x0_data[x] + x2_data[x]
            w = x1_data[x] + x3_data[x]

            offset_x = x * 4.0
            offset_y = y * 4.0

            offset = (
                offset_x + cos_a * x1_data[x] + sin_a * x2_data[x],
                offset_y - sin_a * x1_data[x] + cos_a * x2_data[x],
            )

            p1x = -sin_a * h + offset[0]
            p1y = -cos_a * h + offset[1]
            p3x = -cos_a * w + offset[0]
            p3y =  sin_a * w + offset[1]

            cx = 0.5 * (p1x + p3x)
            cy = 0.5 * (p1y + p3y)

            angle_deg = -angle * 180.0 / np.pi

            boxes.append(((cx, cy), (w, h), angle_deg))
            confidences.append(float(score))

    return boxes, confidences

print("✓ Decoding functions defined")

✓ Decoding functions defined


## 5. Post-processing Functions

In [6]:
def map_rotated_boxes_to_original(boxes, indices, scale, pad_w, pad_h,
                                  orig_w, orig_h, shrink_ratio=0.0):
    """
    Map rotated rects from padded coords back to original image.
    """
    polygons = []
    if len(indices) == 0:
        return polygons

    if hasattr(indices, "flatten"):
        indices = indices.flatten()

    for idx in indices:
        rot_rect = boxes[idx]
        pts = cv2.boxPoints(rot_rect)

        pts[:, 0] /= scale
        pts[:, 1] /= scale

        if shrink_ratio > 0.0:
            center = pts.mean(axis=0, keepdims=True)
            pts = center + (pts - center) * (1.0 - shrink_ratio)

        pts[:, 0] = np.clip(pts[:, 0], 0, orig_w - 1)
        pts[:, 1] = np.clip(pts[:, 1], 0, orig_h - 1)

        polygons.append(pts.astype(np.float32))

    return polygons

print("✓ Post-processing functions defined")

✓ Post-processing functions defined


## 6. Main Detection Pipeline

In [7]:
def detect_text_polygons(image, net,
                         conf_threshold=CONFIDENCE_THRESH,
                         nms_threshold=NMS_THRESH,
                         shrink_ratio=0.0):
    """
    Complete EAST pipeline with rotated boxes.
    """
    padded, scale, pad_w, pad_h, orig_h, orig_w = letterbox_image(image, TARGET_DIM)
    blob = make_blob(padded)

    net.setInput(blob)
    scores, geometry = net.forward(EAST_OUTPUT_LAYERS)

    boxes, confidences = decode_rotated_predictions(scores, geometry, conf_threshold)
    indices = cv2.dnn.NMSBoxesRotated(boxes, confidences,
                                      conf_threshold, nms_threshold)

    polygons = map_rotated_boxes_to_original(
        boxes, indices, scale, pad_w, pad_h, orig_w, orig_h,
        shrink_ratio=shrink_ratio
    )

    kept_confidences = []
    if len(indices) > 0 and hasattr(indices, "flatten"):
        for idx in indices.flatten():
            kept_confidences.append(confidences[idx])

    return polygons, kept_confidences

print("✓ Main detection pipeline defined")

✓ Main detection pipeline defined


## 7. Run Detection on All 500 Images

In [8]:
# Get all images
image_paths = glob.glob(os.path.join(ICDAR_IMAGE_DIR, '*'))
image_paths = [
    p for p in image_paths
    if p.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))
]

print(f"Found {len(image_paths)} images")
print(f"Processing all images...\n")

total_detections = 0

for img_path in tqdm(image_paths, desc="Processing EAST"):
    image = cv2.imread(img_path)
    if image is None:
        continue

    # Run detection
    polygons, confs = detect_text_polygons(image, net)

    # Save predictions
    img_name = os.path.basename(img_path)
    base_name, _ = os.path.splitext(img_name)

    # Save as img_X_east_boxes.txt (for ensemble compatibility)
    pred_txt_path = os.path.join(EAST_OUTPUT_DIR, f"{base_name}_east_boxes.txt")
    with open(pred_txt_path, 'w', encoding='utf-8') as f:
        for pred_poly, conf in zip(polygons, confs):
            pts = pred_poly.astype(np.int32)
            coords = ','.join([f"{int(x)},{int(y)}" for pt in pts for x, y in [pt]])
            f.write(f"{coords},{conf:.4f}\n")

    # Save visualization
    result_img = image.copy()
    for poly in polygons:
        pts = poly.astype(np.int32)
        cv2.polylines(result_img, [pts], isClosed=True, color=(0, 255, 0), thickness=2)

    result_path = os.path.join(EAST_OUTPUT_DIR, f"{base_name}_east_result.jpg")
    cv2.imwrite(result_path, result_img)

    total_detections += len(polygons)

print(f"\n✓ Detection Complete!")
print(f"Total detections: {total_detections}")
print(f"Average per image: {total_detections / len(image_paths):.1f}")
print(f"Results saved to: {EAST_OUTPUT_DIR}")

Found 500 images
Processing all images...



Processing EAST:   0%|          | 0/500 [00:00<?, ?it/s]


✓ Detection Complete!
Total detections: 2820
Average per image: 5.6
Results saved to: outputs/east_final_results


## 8. Evaluation Functions

In [9]:
def load_icdar_gt_file(gt_path):
    """Load ICDAR GT file."""
    care_polygons = []
    dontcare_polygons = []

    if not os.path.isfile(gt_path):
        return care_polygons, dontcare_polygons

    with open(gt_path, 'r', encoding='utf-8-sig') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            parts = line.split(',')
            if len(parts) < 9:
                continue

            try:
                coords = [float(x.strip('\ufeff')) for x in parts[:8]]
            except:
                continue

            transcription = ",".join(parts[8:]).strip().strip('"')

            poly = np.array([
                [coords[0], coords[1]],
                [coords[2], coords[3]],
                [coords[4], coords[5]],
                [coords[6], coords[7]],
            ], dtype=np.float32)

            if transcription == "###":
                dontcare_polygons.append(poly)
            else:
                care_polygons.append(poly)

    return care_polygons, dontcare_polygons


def polygon_iou(poly1, poly2, img_h, img_w):
    """Compute IoU between two polygons using rasterization."""
    poly1_int = poly1.copy()
    poly2_int = poly2.copy()

    poly1_int[:, 0] = np.clip(poly1_int[:, 0], 0, img_w - 1)
    poly1_int[:, 1] = np.clip(poly1_int[:, 1], 0, img_h - 1)
    poly2_int[:, 0] = np.clip(poly2_int[:, 0], 0, img_w - 1)
    poly2_int[:, 1] = np.clip(poly2_int[:, 1], 0, img_h - 1)

    poly1_int = poly1_int.astype(np.int32)
    poly2_int = poly2_int.astype(np.int32)

    mask1 = np.zeros((img_h, img_w), dtype=np.uint8)
    mask2 = np.zeros((img_h, img_w), dtype=np.uint8)

    cv2.fillPoly(mask1, [poly1_int], 1)
    cv2.fillPoly(mask2, [poly2_int], 1)

    intersection = np.logical_and(mask1, mask2).sum()
    union = mask1.sum() + mask2.sum() - intersection

    if union == 0:
        return 0.0
    return float(intersection) / float(union)

print("✓ Evaluation functions defined")

✓ Evaluation functions defined


## 9. Run Evaluation

In [10]:
print("="*70)
print("Running ICDAR 2015 Evaluation")
print("="*70)

total_TP = 0
total_FP = 0
total_GT = 0

for img_path in tqdm(image_paths, desc="Evaluating EAST"):
    image = cv2.imread(img_path)
    if image is None:
        continue

    img_h, img_w = image.shape[:2]

    img_name = os.path.basename(img_path)
    base_name, _ = os.path.splitext(img_name)

    gt_name = f"gt_{base_name}.txt"
    gt_path = os.path.join(ICDAR_GT_DIR, gt_name)

    care_polys, dontcare_polys = load_icdar_gt_file(gt_path)
    total_GT += len(care_polys)

    # Load predictions
    pred_txt_path = os.path.join(EAST_OUTPUT_DIR, f"{base_name}_east_boxes.txt")
    pred_polys = []
    if os.path.exists(pred_txt_path):
        with open(pred_txt_path, 'r') as f:
            for line in f:
                parts = line.strip().split(',')
                if len(parts) >= 8:
                    try:
                        coords = [float(x) for x in parts[:8]]
                        poly = np.array([
                            [coords[0], coords[1]],
                            [coords[2], coords[3]],
                            [coords[4], coords[5]],
                            [coords[6], coords[7]]
                        ], dtype=np.float32)
                        pred_polys.append(poly)
                    except:
                        continue

    # Match predictions
    gt_matched = [False] * len(care_polys)

    for pred_poly in pred_polys:
        best_iou = 0.0
        best_gt_idx = -1

        for gt_idx, gt_poly in enumerate(care_polys):
            if gt_matched[gt_idx]:
                continue

            iou = polygon_iou(pred_poly, gt_poly, img_h, img_w)
            if iou > 0.5 and iou > best_iou:
                best_iou = iou
                best_gt_idx = gt_idx

        if best_gt_idx >= 0:
            total_TP += 1
            gt_matched[best_gt_idx] = True
            continue

        is_dontcare = False
        for dc_poly in dontcare_polys:
            iou_dc = polygon_iou(pred_poly, dc_poly, img_h, img_w)
            if iou_dc > 0.5:
                is_dontcare = True
                break

        if not is_dontcare:
            total_FP += 1

# Calculate metrics
TP = total_TP
FP = total_FP
GT = total_GT
FN = GT - TP

precision = TP / (TP + FP) if (TP + FP) > 0 else 0.0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0.0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0

print("\n" + "="*70)
print("ICDAR-15 EAST Evaluation Results")
print("="*70)
print(f"TP   : {TP}")
print(f"FP   : {FP}")
print(f"FN   : {FN}")
print(f"GT   : {GT}")
print(f"\nPrecision: {precision:.4f} ({precision*100:.2f}%)")
print(f"Recall   : {recall:.4f} ({recall*100:.2f}%)")
print(f"F1-score : {f1:.4f} ({f1*100:.2f}%)")
print("="*70)

print("\nExpected Results:")
print("  Precision: 68.79%")
print("  Recall:    72.89%")
print("  F1-Score:  70.78%")

Running ICDAR 2015 Evaluation


Evaluating EAST:   0%|          | 0/500 [00:00<?, ?it/s]


ICDAR-15 EAST Evaluation Results
TP   : 1514
FP   : 687
FN   : 563
GT   : 2077

Precision: 0.6879 (68.79%)
Recall   : 0.7289 (72.89%)
F1-score : 0.7078 (70.78%)

Expected Results:
  Precision: 68.79%
  Recall:    72.89%
  F1-Score:  70.78%


## Summary

**This implementation achieves 70.78% F1!**

Results are saved to `outputs/east_final_results/` with format:
- `img_X_east_boxes.txt` - Detection boxes (for ensemble)
- `img_X_east_result.jpg` - Visualization

**Next:** Use these EAST results + CRAFT outputs for ensemble fusion!