### Note
This notebook is run in Kaggle environment where certain libraries are pre-installed. If you are running this notebook locally, please ensure you have the necessary libraries installed

In this notebook, I have made some modifications to the original code to ensure compatibility with the Kaggle environment (mainly related to file paths and data loading). The core logic and functionality of the code remain unchanged

### Import Libraries

In [1]:
import os
import sys
import shutil
import time
import yaml
import glob
from tqdm import tqdm
from tabulate import tabulate

import cv2
import numpy as np 
import pandas as pd

import torch
from torch.utils.data import DataLoader
from torch import optim

In [2]:
# Merge two folders containing images and text box annotations into a single folder
def merge_folders(src1: str, src2: str, dst: str) -> None:
    os.makedirs(dst, exist_ok=True)

    def copy_all(src):
        for root, dirs, files in os.walk(src):
            rel_path = os.path.relpath(root, src)
            target_root = os.path.join(dst, rel_path)
            os.makedirs(target_root, exist_ok=True)

            for f in files:
                shutil.copy2(
                    os.path.join(root, f),
                    os.path.join(target_root, f)
                )

    copy_all(src1)
    copy_all(src2)

### Clone Repository

In [3]:
merge_folders(
    "/kaggle/input/icdar-2015/ch4_training_localization_transcription_gt", 
    "/kaggle/input/icdar-2015/ch4_training_images", 
    "/kaggle/working/Training"
)

In [4]:
merge_folders(
    "/kaggle/input/icdar-2015/Challenge4_Test_Task1_GT", 
    "/kaggle/input/icdar-2015/ch4_test_images", 
    "/kaggle/working/Testing"
)

In [5]:
!git clone https://github.com/CryAndRRich/locr.git

Cloning into 'locr'...
remote: Enumerating objects: 135, done.[K
remote: Counting objects: 100% (1495/135), done.[K
remote: Compressing objects: 100% (96/96), done.[K
remote: Total 135 (delta 33), reused 125 (delta 27), pack-reused 0 (from 0)[K
Receiving objects: 100% (1495/135), 872.19 KiB | 13.84 MiB/s, done.
Resolving deltas: 100% (33/33), done.


In [6]:
sys.path.append("/kaggle/working/locr/text_detection/EAST")

In [7]:
from models.setup import setup_lanms

if setup_lanms():
    print("\nBUILD SUCCESSFUL!")
    # Check .so file
    so = glob.glob("lanms_cpu*.so")
    print(f"   Shared Object: {so[0] if so else 'Missing'}")
    
    # Test import
    print("\nTesting Import...")
    try:
        import lanms
        if hasattr(lanms, 'merge_quadrangle_n9'):
            print("Successfully imported 'lanms' module. Function found")
            d = np.array([
                    [10, 10, 20, 10, 20, 20, 10, 20, 0.9],
                    [12, 12, 22, 12, 22, 22, 12, 22, 0.8],
                    [30, 30, 40, 30, 40, 40, 30, 40, 0.7],
                    [32, 32, 42, 32, 42, 42, 32, 42, 0.6]
                ], dtype=np.float32)
            res = lanms.merge_quadrangle_n9(d, 0.5)
            print(f"   Success test execution: {res})")
        else:
            print("Import succeeded but function not found")
    except ImportError as e:
        print(f"Import Error: {e}")

Downloading Clipper...
Creating lanms.h...
Creating adaptor.cpp...
Compiling 'lanms_cpu' extension...


lanms_src/clipper.cpp: In function ‘void ClipperLib::InitEdge(ClipperLib::TEdge*, ClipperLib::TEdge*, ClipperLib::TEdge*, const ClipperLib::IntPoint&)’:
  721 |   std::memset(e, 0, sizeof(TEdge));
      |   ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~
lanms_src/clipper.cpp:66:8: note: ‘struct ClipperLib::TEdge’ declared here
   66 | struct TEdge {
      |        ^~~~~


Creating wrapper 'lanms.py'...

BUILD SUCCESSFUL!
   Shared Object: lanms_cpu.cpython-311-x86_64-linux-gnu.so

Testing Import...
C++ NMS loaded via lanms_cpu
Successfully imported 'lanms' module. Function found
   Success test execution: [[10.0, 10.0, 20.0, 10.0, 20.0, 20.0, 10.0, 20.0, 0.8999999761581421], [12.0, 12.0, 22.0, 12.0, 22.0, 22.0, 12.0, 22.0, 0.800000011920929], [30.0, 30.0, 40.0, 30.0, 40.0, 40.0, 30.0, 40.0, 0.699999988079071], [32.0, 32.0, 42.0, 32.0, 42.0, 42.0, 32.0, 42.0, 0.6000000238418579]])


In [8]:
from data.dataset import ICDAR2015Dataset

from models.loss import EASTLoss
from models.east import EAST

from utils.checkpoint import save_checkpoint, load_checkpoint
from utils.evaluation import detect, Evaluator
from utils.visualization import draw_boxes

### Training

In [9]:
def train():
    # Load Configuration
    with open("/kaggle/working/locr/text_detection/EAST/configs/east_config.yaml", "r") as f:
        config = yaml.safe_load(f)

    device = torch.device(config["device"] if torch.cuda.is_available() else "cpu")
    print(f"Running on device: {device}")

    # Prepare Dataset & DataLoader
    train_dataset = ICDAR2015Dataset(
        data_dir=os.path.join("/kaggle/working/", "Training"),
        input_size=config["data"]["input_size"],
        is_train=True
    )
    
    train_loader = DataLoader(
        train_dataset, 
        batch_size=config["data"]["batch_size"], 
        shuffle=True, 
        num_workers=config["data"]["num_workers"],
        drop_last=True,
        pin_memory=True
    )

    # Prepare Model
    model = EAST(
        backbone=config["model"].get("backbone", "resnet50"),
        pretrained=config["model"]["pretrained"]
    ).to(device)

    # Optimizer & Loss
    optimizer = optim.Adam(model.parameters(), lr=config["train"]["lr"])
    
    # Learning Rate Scheduler
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[300, 500], gamma=0.1)
    
    criterion = EASTLoss(lambda_geo=config["train"]["lambda_geometry"])

    # Resume training if needed
    checkpoint_dir = "/kaggle/working/checkpoints/" + config["experiment_name"]
    start_epoch = 0
    
    start_epoch = load_checkpoint("/kaggle/input/east-icdar2015/EAST_ResNet50_ICDAR2015_latest.pth", model, optimizer, scheduler, device)

    # Training Loop
    model.train()
    print("Start Training...")
    
    for epoch in range(start_epoch, 40):
        epoch_loss = 0.0
        start_time = time.time()
        
        for i, (img, gt_score, gt_geo, gt_mask) in enumerate(train_loader):
            img = img.to(device)
            gt_score = gt_score.to(device)
            gt_geo = gt_geo.to(device)
            gt_mask = gt_mask.to(device)

            # Forward
            pred_score, pred_geo = model(img)
            
            # Compute Loss
            loss = criterion((pred_score, pred_geo), (gt_score, gt_geo, gt_mask))
            
            # Backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()

            if (i + 1) % config["train"]["log_interval"] == 0:
                print(f"Epoch [{epoch + 1}/{config['train']['max_epochs']}] | "
                      f"Step [{i + 1}/{len(train_loader)}] | "
                      f"Loss: {loss.item():.4f}")

        scheduler.step()
        avg_loss = epoch_loss / len(train_loader)
        duration = time.time() - start_time
        print(f"End Epoch {epoch + 1}, Avg Loss: {avg_loss:.4f}, Time: {duration:.1f}s")

        # Save Checkpoint
        if (epoch + 1) % 1 == 0:
            state = {
                "epoch": epoch + 1,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "scheduler_state_dict": scheduler.state_dict(),
                "loss": avg_loss
            }
            save_checkpoint(state, checkpoint_dir, f"{config['experiment_name']}_epoch{epoch + 1}.pth")
            save_checkpoint(state, checkpoint_dir, f"{config['experiment_name']}_latest.pth")

In [10]:
train()

Running on device: cpu


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 159MB/s]


Loaded checkpoint from /kaggle/input/east-icdar2015/EAST_ResNet50_ICDAR2015_latest.pth, resuming from epoch 494
Start Training...
Epoch [495/500] | Step [10/62] | Loss: 0.6886
Epoch [495/500] | Step [20/62] | Loss: 0.7306
Epoch [495/500] | Step [30/62] | Loss: 0.6391
Epoch [495/500] | Step [40/62] | Loss: 1.6020
Epoch [495/500] | Step [50/62] | Loss: 1.1085
Epoch [495/500] | Step [60/62] | Loss: 0.6722
End Epoch 495, Avg Loss: 0.9306, Time: 2441.0s
Saved checkpoint to /kaggle/working/checkpoints/EAST_ResNet50_ICDAR2015/EAST_ResNet50_ICDAR2015_epoch495.pth
Saved checkpoint to /kaggle/working/checkpoints/EAST_ResNet50_ICDAR2015/EAST_ResNet50_ICDAR2015_latest.pth
Epoch [496/500] | Step [10/62] | Loss: 0.7195
Epoch [496/500] | Step [20/62] | Loss: 1.7484
Epoch [496/500] | Step [30/62] | Loss: 1.1467
Epoch [496/500] | Step [40/62] | Loss: 0.7571
Epoch [496/500] | Step [50/62] | Loss: 0.7701
Epoch [496/500] | Step [60/62] | Loss: 0.9661
End Epoch 496, Avg Loss: 0.9116, Time: 2392.6s
Saved ch

### Evaluation

In [12]:
def evaluate():
    # Load Config
    with open("/kaggle/working/locr/text_detection/EAST/configs/east_config.yaml", "r") as f:
        config = yaml.safe_load(f)
    
    device = torch.device(config["device"] if torch.cuda.is_available() else "cpu")
    print(f"Evaluating on device: {device}")
    
    # Load Model
    model = EAST(
        backbone=config["model"].get("backbone", "resnet50"),
        pretrained=False
    ).to(device)
    
    # Load Weights
    load_checkpoint("/kaggle/input/east-icdar2015/EAST_ResNet50_ICDAR2015_latest.pth", model, device=device)
    
    # Prepare Dataset
    test_dir = os.path.join("/kaggle/working/", "Testing")
    test_dataset = ICDAR2015Dataset(
        data_dir=test_dir,
        input_size=config["data"]["input_size"],
        is_train=False
    )

    # Setup Input/Output
    output_dir = f"outputs/{config['experiment_name']}"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    img_paths = glob.glob(os.path.join(test_dir, "*.jpg")) + \
                glob.glob(os.path.join(test_dir, "*.png"))
                
    print(f"Found {len(img_paths)} images for evaluation")
    
    # Initialize Evaluator
    eval_config = config.get("eval", {})
    iou_thresh = eval_config.get("iou_thresh", 0.5)
    conf_thresh = eval_config.get("conf_thresh", 0.8)
    nms_thresh = eval_config.get("nms_thresh", 0.2)

    evaluator = Evaluator(iou_thresh=iou_thresh)

    # Inference loop
    for i in tqdm(range(len(test_dataset))):
        img_path = test_dataset.img_files[i]
        filename = os.path.basename(img_path)
        
        # Load Image
        image = cv2.imread(img_path)
        if image is None: 
            continue
        
        # Detect
        boxes = detect(
            model, 
            image, 
            input_size=config["data"]["input_size"], 
            device=device, 
            conf_thresh=conf_thresh, 
            nms_thresh=nms_thresh
        )

        # Load Ground Truth
        gt_polys, gt_tags = test_dataset.load_gt(img_path)
        evaluator.evaluate_image(gt_polys, gt_tags, boxes)
        
        # Draw & Save
        # Boxes output format: [x1, y1, x2, y2, x3, y3, x4, y4, score]
        res_path = os.path.join(output_dir, "res_" + filename)
        draw_boxes(image, boxes, output_path=res_path)
        
        # Save result text file for submitting to ICDAR challenges (if needed)
        txt_path = os.path.join(output_dir, "res_" + os.path.splitext(filename)[0] + ".txt")
        with open(txt_path, "w") as f:
            for box in boxes:
                # Format: x1, y1, x2, y2, x3, y3, x4, y4
                coords = [str(int(x)) for x in box[:8]]
                f.write(",".join(coords) + "\n")

    metrics = evaluator.get_metrics()
    
    results_table = [
        ["Metric", "Value"],
        ["Precision", f"{metrics['precision']}"],
        ["Recall", f"{metrics['recall']}"],
        ["H-mean (F1)", f"{metrics['hmean']}"],
    ]
    
    print("\n" + "=" * 40)
    print(f"Experiment: {config['experiment_name']}")
    print("=" * 40)
    print(tabulate(results_table, headers="firstrow", tablefmt="fancy_grid"))
    print(f"Visualization saved to: {output_dir}")
    print("=" * 40)

    print(f"Evaluation finished. Results saved in {output_dir}")

In [13]:
evaluate()

Evaluating on device: cpu
Loaded checkpoint from /kaggle/input/east-icdar2015/EAST_ResNet50_ICDAR2015_latest.pth, resuming from epoch 500
Found 500 images for evaluation


100%|██████████| 500/500 [05:34<00:00,  1.49it/s]


Experiment: EAST_ResNet50_ICDAR2015
╒═══════════════╤═════════════╕
│ Metric        │       Value │
╞═══════════════╪═════════════╡
│ Precision     │    0.739959 │
├───────────────┼─────────────┤
│ Recall        │    0.692704 │
├───────────────┼─────────────┤
│ H-mean (F1)   │    0.768015 │
╘═══════════════╧═════════════╛
Visualization saved to: outputs/EAST_ResNet50_ICDAR2015
Evaluation finished. Results saved in outputs/EAST_ResNet50_ICDAR2015



