Table Transformer Training Notebook (with Split + Training)

In [1]:
import sys
sys.path.append('table-transformer/detr')
import sys
sys.path.append('table-transformer/detr')
import os
import json
import random
import shutil
import argparse
from pathlib import Path
from PIL import Image
import torch

from engine import train_one_epoch, evaluate
from models import build_model
from datasets.coco import build as build_coco_dataset
import util.misc as utils



  from .autonotebook import tqdm as notebook_tqdm


STEP 1: Prepare Dataset from Mixed Inputs

In [2]:
# --- STEP 1: Prepare Dataset from Mixed Inputs ---
import xml.etree.ElementTree as ET
images_dir = Path("PubTables-1M-Detection_Images_Test")
annotations_dir = Path("PubTables-1M-Detection_Annotations_Test")
coco_out = Path("coco_split")
(coco_out / "train2017").mkdir(parents=True, exist_ok=True)
(coco_out / "val2017").mkdir(exist_ok=True)
(coco_out / "annotations").mkdir(exist_ok=True)

def parse_voc_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    file_name = root.find('filename').text
    size = root.find('size')
    width = int(float(size.find('width').text))
    height = int(float(size.find('height').text))
    anns = []
    for obj in root.findall('object'):
        bbox = obj.find('bndbox')
        xmin = float(bbox.find('xmin').text)
        ymin = float(bbox.find('ymin').text)
        xmax = float(bbox.find('xmax').text)
        ymax = float(bbox.find('ymax').text)
        anns.append({
            'bbox': [xmin, ymin, xmax - xmin, ymax - ymin],
            'category_id': 1
        })
    return file_name, width, height, anns

def split_list(lst, frac=0.8):
    random.shuffle(lst)
    k = int(frac * len(lst))
    return lst[:k], lst[k:]

xml_files = list(annotations_dir.glob("*.xml"))
train_xmls, val_xmls = split_list(xml_files)

def build_coco_from_xmls(xml_list, split_name, start_img=0, start_ann=0):
    coco = {"images": [], "annotations": [], "categories": [{"id": 1, "name": "table"}]}
    img_id, ann_id = start_img, start_ann
    for xml_file in xml_list:
        file_name, w, h, anns = parse_voc_xml(xml_file)
        img_path = images_dir / file_name
        if not img_path.exists():
            continue
        coco["images"].append({"id": img_id, "file_name": file_name, "width": w, "height": h})
        for a in anns:
            x, y, bw, bh = a["bbox"]
            coco["annotations"].append({"id": ann_id, "image_id": img_id, "category_id": 1, "bbox": [x, y, bw, bh], "area": bw*bh, "iscrowd": 0})
            ann_id += 1
        shutil.copy2(img_path, coco_out / split_name / file_name)
        img_id += 1
    return coco, img_id, ann_id

train_coco, next_img, next_ann = build_coco_from_xmls(train_xmls, "train2017")
val_coco, _, _ = build_coco_from_xmls(val_xmls, "val2017", start_img=next_img, start_ann=next_ann)

with open(coco_out / "annotations" / "instances_train2017.json", "w") as f:
    json.dump(train_coco, f)
with open(coco_out / "annotations" / "instances_val2017.json", "w") as f:
    json.dump(val_coco, f)



In [3]:
# --- STEP 2: Training Starts Here ---
import os
import json
import torch
import argparse
from table_transformer.src.models import build_model
from table_transformer.src.datasets import build_coco_dataset
from table_transformer.src.engine import train_one_epoch, evaluate
import table_transformer.src.util.misc as utils

# --- 📁 Config path ---
config_path = "table-transformer/src/detection_config.json"
with open(config_path, 'r') as f:
    config_dict = json.load(f)

config = argparse.Namespace(**config_dict)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_path = str(coco_out)

args_for_dataset = argparse.Namespace(**{**config_dict, 'coco_path': data_path, 'masks': False})

# --- 🧠 Build model ---
model, criterion, postprocessors = build_model(config)
model.to(device)

# --- 📊 Dataset loaders ---
dataset_train = build_coco_dataset(image_set='train', args=args_for_dataset)
dataset_val = build_coco_dataset(image_set='val', args=args_for_dataset)

sampler_train = torch.utils.data.RandomSampler(dataset_train)
sampler_val = torch.utils.data.SequentialSampler(dataset_val)
batch_size = getattr(config, 'batch_size', 2)

data_loader_train = torch.utils.data.DataLoader(
    dataset_train, batch_size=batch_size, sampler=sampler_train, drop_last=True, num_workers=2, collate_fn=utils.collate_fn)
data_loader_val = torch.utils.data.DataLoader(
    dataset_val, batch_size=batch_size, sampler=sampler_val, drop_last=False, num_workers=2, collate_fn=utils.collate_fn)

# --- 🔧 Optimizer & Scheduler ---
param_dicts = [
    {"params": [p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad]},
    {"params": [p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad], "lr": getattr(config, 'lr_backbone', 1e-5)}
]
optimizer = torch.optim.AdamW(param_dicts, lr=getattr(config, 'lr', 1e-4), weight_decay=getattr(config, 'weight_decay', 1e-4))
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=getattr(config, 'lr_drop', 40), gamma=0.1)

output_dir = os.path.join(data_path, "output")
os.makedirs(output_dir, exist_ok=True)
num_epochs = getattr(config, 'epochs', 2)

# --- 📈 Training Loop with Dynamic Config Reload ---
for epoch in range(num_epochs):
    # 🔄 Reload config each epoch
    with open(config_path, 'r') as f:
        updated_config_dict = json.load(f)
    config = argparse.Namespace(**updated_config_dict)

    # 💡 Allow early stopping or extending
    updated_epochs = getattr(config, 'epochs', num_epochs)
    if epoch >= updated_epochs:
        print(f"🛑 Stopping early at epoch {epoch} (new config epochs: {updated_epochs})")
        break

    # 🔄 Update optimizer learning rates dynamically
    optimizer.param_groups[0]['lr'] = getattr(config, 'lr', 1e-4)
    optimizer.param_groups[1]['lr'] = getattr(config, 'lr_backbone', 1e-5)
    lr_scheduler.step_size = getattr(config, 'lr_drop', 40)

    print(f"\n📘 Epoch {epoch+1}/{updated_epochs} — LR: {optimizer.param_groups[0]['lr']}")

    # --- 🔁 Training ---
    train_stats = train_one_epoch(model, criterion, data_loader_train, optimizer, device, epoch, print_freq=10)
    lr_scheduler.step()

    # --- ✅ Validation ---
    base_ds_val = dataset_val.coco
    val_stats, coco_evaluator = evaluate(
        model, criterion, postprocessors, data_loader_val, base_ds_val, device, output_dir
    )

    val_loss = val_stats.get('loss', float('inf'))
    print(f"📊 Validation loss: {val_loss:.4f}")

    if coco_evaluator and coco_evaluator.coco_eval:
        stats = coco_evaluator.coco_eval['bbox'].stats
        print(f"🧪 mAP@0.5: {stats[1]:.3f}, AP@[0.5:0.95]: {stats[0]:.3f}")

    if 'class_error' in val_stats:
        print(f"🎯 Validation Accuracy: {100 - val_stats['class_error']:.2f}%")

    # --- 💾 Save checkpoint ---
    checkpoint = {
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'epoch': epoch + 1,
        'config': updated_config_dict
    }
    torch.save(checkpoint, os.path.join(output_dir, f"checkpoint_epoch_{epoch+1}.pth"))
    torch.save(model.state_dict(), os.path.join(output_dir, "model_final.pth"))



ModuleNotFoundError: No module named 'table_transformer'

COCO Evaluation Function

In [None]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

def evaluate_predictions(pred_json_path, gt_json_path):
    """
    Evaluate predictions in COCO format against ground truth annotations.
    """
    coco_gt = COCO(gt_json_path)
    coco_dt = coco_gt.loadRes(pred_json_path)
    coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

# Example usage:
evaluate_predictions(
    "output/predictions.json",
    "coco_split/annotations/instances_val2017.json"
)


loading annotations into memory...
Done (t=0.32s)
creating index...
index created!
Loading and preparing results...


FileNotFoundError: [Errno 2] No such file or directory: 'output/predictions.json'