In [3]:
from ultralytics import YOLO
import os
import json
import numpy as np
from shapely.geometry import box, mapping
import cv2
import shutil

# Load model
model = YOLO('../runs/detect/yolov11s-imgsz512-webis-webseg-full/weights/best.pt')

# Define data paths
data_dir = '../data/bcubed-test'
image_dir = os.path.join(data_dir, 'images', 'val')  # Adjust as needed
output_dir = '../data/yolo_predictions_for_webisseg'
os.makedirs(output_dir, exist_ok=True)

# Process each image
for img_file in os.listdir(image_dir):
    if not img_file.endswith(('.jpg', '.png')):
        continue
        
    img_path = os.path.join(image_dir, img_file)
    print(img_path)
    img = cv2.imread(img_path)
    height, width = img.shape[:2]
    
    # Get page ID from filename
    page_id = os.path.splitext(img_file)[0]
    
    # Process YOLO model predictions
    results = model.predict(img_path, conf=0.2)
    
    # Convert results to the expected format
    predictions = []
    for resultbox in results[0].boxes:
        x1, y1, x2, y2 = resultbox.xyxy[0].cpu().numpy()
        class_id = int(resultbox.cls.item())
        
        # Create polygon from bounding box
        polygon = box(x1, y1, x2, y2)
        
        predictions.append({
            "polygon": mapping(polygon),
            "tagType": f"class_{class_id}"  # Adjust class naming as needed
        })
    
    # Get ground truth
    ground_truth = []
    ground_truth_path = os.path.join(data_dir, 'labels', 'val', f'{page_id}.txt')
    if os.path.exists(ground_truth_path):
        with open(ground_truth_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                class_id = int(parts[0])
                
                # YOLO format is class_id, x_center, y_center, w, h (normalized)
                x_center, y_center, w, h = map(float, parts[1:5])
                
                # Convert to absolute coordinates
                x1 = (x_center - w/2) * width
                y1 = (y_center - h/2) * height
                x2 = (x_center + w/2) * width
                y2 = (y_center + h/2) * height
                
                polygon = box(x1, y1, x2, y2)
                
                ground_truth.append({
                    "polygon": mapping(polygon),
                    "tagType": f"class_{class_id}"
                })
    
    # Create JSON structure with correct key names - using proper annotator IDs
    result_json = {
        "id": page_id,
        "height": height,
        "width": width,
        "segmentations": {
            "predicted": predictions,      # Changed from "predicted" to "model"
            "ground_truth": ground_truth
        }
    }
    
    # Save to JSON file
    with open(os.path.join(output_dir, f'{page_id}.json'), 'w') as f:
        json.dump(result_json, f, indent=2)

def prepare_folder_structure(predictions_dir, original_images_dir, output_base_dir, annotation_postfix='FC'):
    # Loop through all prediction files
    for json_file in os.listdir(predictions_dir):
        if not json_file.endswith('.json'):
            continue
            
        page_id = os.path.splitext(json_file)[0]
        
        # Create folder for each page
        page_dir = os.path.join(output_base_dir, page_id)
        os.makedirs(page_dir, exist_ok=True)
        
        # Copy the annotation file with proper naming
        src_json = os.path.join(predictions_dir, json_file)
        dst_json = os.path.join(page_dir, f'annotations_{annotation_postfix}.json')
        
        # Read and fix the JSON structure if needed
        with open(src_json, 'r') as f:
            json_data = json.load(f)
        
        # Save the fixed JSON
        with open(dst_json, 'w') as f:
            json.dump(json_data, f, indent=2)
        
        # Copy the corresponding image
        # First try with png extension
        src_img = os.path.join(original_images_dir, f"{page_id}.png")
        if not os.path.exists(src_img):
            # Try with jpg extension
            src_img = os.path.join(original_images_dir, f"{page_id}.jpg")
        
        if os.path.exists(src_img):
            dst_img = os.path.join(page_dir, "screenshot.png")
            # If source is jpg, convert to png
            if src_img.endswith('.jpg'):
                img = cv2.imread(src_img)
                cv2.imwrite(dst_img, img)
            else:
                shutil.copy(src_img, dst_img)
        else:
            print(f"Warning: No image found for {page_id}")

# Prepare folders with images
prepare_folder_structure(
    output_dir,
    '../data/bcubed-test/images/val',  # Path to your original images
    '../data/yolo_for_webisseg_metrics'
)

../data/bcubed-test/images/val/000009.png

image 1/1 /home/bruno/vt2-visual-webseg/src/../data/bcubed-test/images/val/000009.png: 512x160 20 webpage_segments, 27.8ms
Speed: 2.0ms preprocess, 27.8ms inference, 1.8ms postprocess per image at shape (1, 3, 512, 160)
../data/bcubed-test/images/val/000059.png

image 1/1 /home/bruno/vt2-visual-webseg/src/../data/bcubed-test/images/val/000059.png: 512x128 12 webpage_segments, 23.0ms
Speed: 2.3ms preprocess, 23.0ms inference, 1.6ms postprocess per image at shape (1, 3, 512, 128)
../data/bcubed-test/images/val/000053.png

image 1/1 /home/bruno/vt2-visual-webseg/src/../data/bcubed-test/images/val/000053.png: 512x448 7 webpage_segments, 71.8ms
Speed: 3.8ms preprocess, 71.8ms inference, 2.0ms postprocess per image at shape (1, 3, 512, 448)
../data/bcubed-test/images/val/000044.png

image 1/1 /home/bruno/vt2-visual-webseg/src/../data/bcubed-test/images/val/000044.png: 512x480 6 webpage_segments, 72.9ms
Speed: 4.2ms preprocess, 72.9ms inference, 2.0m

In [4]:
!cd ./bcubed-f1/ && python main.py --folder_path ../../data/yolo_for_webisseg_metrics --file_postfix 'FC' --operation 'prediction' --pixel_based


    Processing folder: ../../data/yolo_for_webisseg_metrics
    With options: 
        File Postfix 			 FC
        Operation 			 prediction
        Calculations per Class 		 False
        Scoring Measure 		 F1
        Atomic Element node based 	 False
        Atomic Element pixel based 	 True

  0%|                                                    | 0/23 [00:00<?, ?it/s]['predicted', 'ground_truth']
Agreement matrices
[array([[1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 0.],
       [0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 1., 

In [None]:
import torch
import numpy as np
import os
import json
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path
import shutil
from websam.segment_anything.build_sam import build_sam_vit_b
from shapely.geometry import box, mapping
import torch.nn.functional as F

# Define paths
DATA_DIR = "../data/webis-webseg-20-sam-full"
VAL_DIR = os.path.join(DATA_DIR, "val")
OUTPUT_DIR = "../data/websam_predictions_for_webisseg"
METRICS_DIR = "../data/websam_for_webisseg_metrics"
CHECKPOINT_PATH = "../models/websam/websam/run_20250225_163753/models/checkpoint_epoch_20.pth"
BASE_MODEL_PATH = "../models/websam/websam/sam_vit_b_01ec64.pth"

# Create output directories
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(METRICS_DIR, exist_ok=True)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the model
def load_websam_model():
    print("Loading WebSAM model...")
    
    # Initialize the base SAM model
    model = build_sam_vit_b(
        checkpoint=BASE_MODEL_PATH,
        strict_weights=False, 
        freeze_encoder=True
    )
    
    # Load checkpoint
    checkpoint = torch.load(CHECKPOINT_PATH, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device)
    model.eval()
    
    print("Model loaded successfully!")
    return model

# Function to predict with WebSAM
def predict_mask(model, image, boxes):
    """Generate predictions using WebSAM model"""
    with torch.no_grad():
        # Prepare input in the format expected by WebSAM
        batched_input = [{
            "image": image,
            "boxes": boxes,
            "original_size": (1024, 1024)
        }]
        
        # Get prediction from model
        outputs = model(batched_input, multimask_output=False)
        
        # Process output
        pred_mask = outputs[0]["low_res_logits"][0]  # Shape [1, 256, 256]
        pred_mask = torch.sigmoid(pred_mask) > 0.5  # Convert to binary mask
        
        # Resize to original image size
        pred_mask = F.interpolate(
            pred_mask.float().unsqueeze(0),  # Add batch dimension
            size=(1024, 1024),  # Original image size
            mode='bilinear'
        ).squeeze(0).squeeze(0)  # Remove batch and channel dimensions
        
        return pred_mask.cpu().numpy()

# Prepare mask for visualization
def prepare_mask_overlay(image, mask, alpha=0.5):
    """Create a visualization of mask overlaid on image"""
    # Create a colored mask (red in this case)
    colored_mask = np.zeros_like(image)
    colored_mask[mask > 0] = [255, 0, 0]  # Red color for mask
    
    # Blend the image and colored mask
    blend = cv2.addWeighted(image, 1, colored_mask, alpha, 0)
    return blend

# Process and save predictions
def process_websam_predictions():
    print("Loading model...")
    model = load_websam_model()
    
    # Get list of images in validation set
    val_images_dir = os.path.join(VAL_DIR, "images")
    val_boxes_dir = os.path.join(VAL_DIR, "boxes")
    image_files = sorted([f for f in os.listdir(val_images_dir) if f.endswith('.png')])
    image_files = image_files[:1000]  # Limit to first 10 images for testing
    print(f"Found {len(image_files)} validation images")
    
    # Process each image
    for img_file in tqdm(image_files, desc="Generating predictions"):
        # Get image and box paths
        img_path = os.path.join(val_images_dir, img_file)
        box_path = os.path.join(val_boxes_dir, img_file.replace(".png", ".npy"))
        
        # Get page ID from filename
        page_id = os.path.splitext(img_file)[0]
        
        # Load image and boxes
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
        image_tensor = torch.tensor(image).permute(2, 0, 1).to(device)
        
        # Load bounding boxes
        boxes = np.load(box_path)
        boxes_tensor = torch.tensor(boxes, dtype=torch.float32).to(device)
        
        # Generate prediction
        pred_mask = predict_mask(model, image_tensor, boxes_tensor)
        
        # Convert mask to polygon format (using bounding boxes of connected components)
        contours, _ = cv2.findContours(
            (pred_mask * 255).astype(np.uint8), 
            cv2.RETR_EXTERNAL, 
            cv2.CHAIN_APPROX_SIMPLE
        )
        
        # Convert predictions to expected format
        predictions = []
        for contour in contours:
            # Get bounding box of contour
            x, y, w, h = cv2.boundingRect(contour)
            
            # Create polygon from bounding box
            polygon = box(x, y, x+w, y+h)
            
            # Add to predictions
            predictions.append({
                "polygon": mapping(polygon),
                "tagType": "webpage_segment"  # Use generic class
            })
        
        # Create ground truth data from boxes
        ground_truth = []
        for bbox in boxes:
            x1, y1, x2, y2 = bbox
            polygon = box(x1, y1, x2, y2)
            
            ground_truth.append({
                "polygon": mapping(polygon),
                "tagType": "webpage_segment"
            })
        
        # Create JSON structure with correct key names for BCubed evaluation
        result_json = {
            "id": page_id,
            "height": 1024,
            "width": 1024,
            "segmentations": {
                "predicted": predictions,
                "ground_truth": ground_truth
            }
        }
        
        # Save to JSON file
        with open(os.path.join(OUTPUT_DIR, f'{page_id}.json'), 'w') as f:
            json.dump(result_json, f, indent=2)
        
        # Optional: Save visualization
        if False:  # Set to True if you want visualizations
            vis_dir = os.path.join(OUTPUT_DIR, "visualizations")
            os.makedirs(vis_dir, exist_ok=True)
            
            # Convert image back to 0-255 range and BGR for OpenCV
            vis_image = (image * 255).astype(np.uint8)
            overlay = prepare_mask_overlay(vis_image, pred_mask)
            
            # Save visualization
            cv2.imwrite(os.path.join(vis_dir, f'{page_id}_prediction.png'), 
                        cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR))
    
    print(f"Saved predictions to {OUTPUT_DIR}")
    return

# Prepare folder structure for BCubed F1 evaluation
def prepare_folder_structure(predictions_dir, original_images_dir, output_base_dir, annotation_postfix='FC'):
    print("Preparing folder structure for BCubed F1 evaluation...")
    
    # Loop through all prediction files
    json_files = [f for f in os.listdir(predictions_dir) if f.endswith('.json')]
    
    for json_file in tqdm(json_files, desc="Preparing folders"):
        page_id = os.path.splitext(json_file)[0]
        
        # Create folder for each page
        page_dir = os.path.join(output_base_dir, page_id)
        os.makedirs(page_dir, exist_ok=True)
        
        # Copy the annotation file with proper naming
        src_json = os.path.join(predictions_dir, json_file)
        dst_json = os.path.join(page_dir, f'annotations_{annotation_postfix}.json')
        
        # Read and copy the JSON
        with open(src_json, 'r') as f:
            json_data = json.load(f)
        
        with open(dst_json, 'w') as f:
            json.dump(json_data, f, indent=2)
        
        # Copy the corresponding image
        src_img = os.path.join(original_images_dir, f"{page_id}.png")
        if os.path.exists(src_img):
            dst_img = os.path.join(page_dir, "screenshot.png")
            shutil.copy(src_img, dst_img)
        else:
            print(f"Warning: No image found for {page_id}")
    
    print(f"Prepared {len(json_files)} folders for BCubed F1 evaluation in {output_base_dir}")
    return

# Main execution
print("Starting WebSAM prediction and evaluation process...")

# Step 1: Generate predictions using WebSAM
process_websam_predictions()

# Step 2: Prepare folder structure for BCubed F1 evaluation
prepare_folder_structure(
    OUTPUT_DIR,
    os.path.join(VAL_DIR, "images"),
    METRICS_DIR
)

print("All done! You can now run the BCubed F1 evaluation with:")
print(f"cd ./src/bcubed-f1/ && python main.py --folder_path {METRICS_DIR} --file_postfix 'FC' --operation 'prediction' --pixel_based")

Using device: cuda
Starting WebSAM prediction and evaluation process...
Loading model...
Loading WebSAM model...
../models/websam/websam/sam_vit_b_01ec64.pth


  checkpoint = torch.load(CHECKPOINT_PATH, map_location=device)


Model loaded successfully!
Found 10 validation images


Generating predictions: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Saved predictions to ../data/websam_predictions_for_webisseg
Preparing folder structure for BCubed F1 evaluation...


Preparing folders: 100%|██████████| 10/10 [00:00<00:00, 1050.05it/s]

Prepared 10 folders for BCubed F1 evaluation in ../data/websam_for_webisseg_metrics
All done! You can now run the BCubed F1 evaluation with:
cd ./src/bcubed-f1/ && python main.py --folder_path ../data/websam_for_webisseg_metrics --file_postfix 'FC' --operation 'prediction' --pixel_based





In [9]:
!cd ./bcubed-f1/ && python main.py --folder_path ../../data/websam_for_webisseg_metrics --file_postfix 'FC' --operation 'prediction' --pixel_based


    Processing folder: ../../data/websam_for_webisseg_metrics
    With options: 
        File Postfix 			 FC
        Operation 			 prediction
        Calculations per Class 		 False
        Scoring Measure 		 F1
        Atomic Element node based 	 False
        Atomic Element pixel based 	 True

  0%|                                                    | 0/10 [00:00<?, ?it/s]['predicted', 'ground_truth']
Agreement matrices
[array([[1., 0., 1., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 1., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 1., 1., 0.],
       [0., 0., 0., ..., 1., 1., 0.],
       [1., 0., 1., ..., 0., 0., 1.]], shape=(60, 60)),
 array([[1., 1., 1., ..., 1., 1., 0.],
       [1., 1., 1., ..., 1., 1., 0.],
       [1., 1., 2., ..., 2., 2., 1.],
       ...,
       [1., 1., 2., ..., 3., 2., 1.],
       [1., 1., 2., ..., 2., 3., 1.],
       [0., 0., 1., ..., 1., 1., 1.]], shape=(60, 60))]
BCubed precision matrix
array([[1.        , 0.42154594]