In [4]:
# 📦 Imports
import os
import json
import xml.etree.ElementTree as ET
from pathlib import Path
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from PIL import Image
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# 💡 Parse XML Annotations
def parse_structure_annotations(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    boxes, labels = [], []
    for obj in root.findall('object'):
        label = obj.find('name').text
        bbox = obj.find('bndbox')
        box = [
            int(float(bbox.find('xmin').text)),
            int(float(bbox.find('ymin').text)),
            int(float(bbox.find('xmax').text)),
            int(float(bbox.find('ymax').text)),
        ]
        boxes.append(box)
        labels.append(label)
    return boxes, labels

# 📂 Dataset Class
class PubTablesTSRDataset(Dataset):
    def __init__(self, img_dir, ann_dir, label_map, transforms=None):
        self.img_dir = Path(img_dir)
        self.ann_dir = Path(ann_dir)
        self.transforms = transforms
        self.label_map = label_map
        self.img_files = sorted(self.img_dir.glob("*.jpg"))

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_path = self.img_files[idx]
        ann_path = self.ann_dir / (img_path.stem + ".xml")
        image = Image.open(img_path).convert("RGB")
        boxes, labels = parse_structure_annotations(ann_path)
        labels = [self.label_map[lbl] for lbl in labels]

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels, "image_id": torch.tensor([idx])}

        if self.transforms:
            image = self.transforms(image)
        return image, target

# 🏷️ Label Map
label_map = {
    "table": 0,
    "table row": 1,
    "table column": 2,
    "column header": 3,
    "projected row header": 4,
    "spanning cell": 5,
    "no cell": 6,
    "table spanning cell": 7,
    "table column header": 8,
}

# 🔁 Transforms
transform = T.Compose([
    T.Resize((512, 512)),
    T.ToTensor(),
])

# 📁 Paths
base_path = Path("archive")  # ✅ Replace this if your root folder is different
image_path = base_path / "images" 
train_dataset = PubTablesTSRDataset(image_path, base_path / "train", label_map, transforms=transform)
val_dataset   = PubTablesTSRDataset(image_path, base_path / "val", label_map, transforms=transform)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader   = DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# ⚙️ Load Model
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, len(label_map))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 🧠 Optimizer
optimizer = optim.AdamW(model.parameters(), lr=1e-4)

# 🔁 Train One Epoch
def train_one_epoch(model, loader, optimizer, device):
    model.train()
    total_loss = 0.0
    for images, targets in tqdm(loader, desc="Training"):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        total_loss += losses.item()
    return total_loss / len(loader)

# 📏 Evaluation
@torch.no_grad()
def evaluate_model(model, loader, device):
    model.eval()
    for images, targets in tqdm(loader, desc="Evaluating"):
        images = [img.to(device) for img in images]
        outputs = model(images)
        # Add evaluation metrics here if needed (e.g., mAP)
    print("✅ Evaluation completed.")

# 🚀 Training Loop
os.makedirs("checkpoints", exist_ok=True)
epochs = 10
best_loss = float("inf")

for epoch in range(epochs):
    print(f"\n📘 Epoch {epoch+1}/{epochs}")
    loss = train_one_epoch(model, train_loader, optimizer, device)
    print(f"📉 Training Loss: {loss:.4f}")

    # Save checkpoint
    torch.save(model.state_dict(), f"checkpoints/tsr_model_epoch_{epoch+1}.pth")
    print(f"💾 Saved checkpoint: checkpoints/tsr_model_epoch_{epoch+1}.pth")

    # Evaluate
    evaluate_model(model, val_loader, device)

    # Save best
    if loss < best_loss:
        best_loss = loss
        torch.save(model.state_dict(), "best_tsr_model.pth")
        print("✅ Saved best model (best_tsr_model.pth)")

# 🏁 Save Final Model
torch.save(model.state_dict(), "final_tsr_model.pth")
print("✅ Saved final model (final_tsr_model.pth)")



📘 Epoch 1/10


Training:   0%|          | 0/236911 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: 'archive\\train\\PMC5050567_table_1.xml'