In [1]:
import os

def check_yolo_annotation_type(labels_dir: str):
    """
    Traverse a directory of YOLO .txt label files and check if annotations
    contain values greater than 1 (absolute pixel coords) or not (normalized).
    """
    total_files = 0
    absolute_count = 0
    normalized_count = 0
    mixed_count = 0

    for root, _, files in os.walk(labels_dir):
        for f in files:
            if not f.lower().endswith(".txt"):
                continue

            total_files += 1
            path = os.path.join(root, f)

            with open(path, "r") as fh:
                lines = [l.strip() for l in fh.readlines() if l.strip()]

            # skip empty labels
            if not lines:
                continue

            values = []
            for line in lines:
                parts = line.split()
                if len(parts) != 5:
                    print(f"⚠️ Skipping malformed line in {f}: {line}")
                    continue
                try:
                    nums = list(map(float, parts[1:]))  # skip class_id
                    values.extend(nums)
                except ValueError:
                    print(f"⚠️ Non-numeric value in {f}: {line}")

            if not values:
                continue

            has_over_one = any(v > 1 for v in values)
            has_under_one = any(v <= 1 for v in values)

            if has_over_one and not has_under_one:
                absolute_count += 1
            elif has_under_one and not has_over_one:
                normalized_count += 1
            else:
                mixed_count += 1
                print(f"⚠️ Mixed values (some >1 and some <=1) in {path}")

    print("\n📊 YOLO Annotation Summary")
    print(f"Total files scanned: {total_files}")
    print(f"Normalized (0–1): {normalized_count}")
    print(f"Absolute (pixels): {absolute_count}")
    print(f"Mixed: {mixed_count}")


In [None]:
check_yolo_annotation_type("/home/emma/facultad/pps/datasets/containers/raw/container.v1i.yolov11/train/labels")




📊 YOLO Annotation Summary
Total files scanned: 1424
Normalized (0–1): 1424
Absolute (pixels): 0
Mixed: 0


In [3]:
# Check YOLO annotation types for all container datasets
datasets_to_check = [
    'Cargo Containers.v1i.yolov11',
    'Container Detection.v7i.yolov11', 
    'container.v1i.yolov11',
    'container.v5i.yolov11',
    'Shipping Containers.v4i.yolov11',
    'shipping-container.v1i.yolov11'
]

base_path = "/home/emma/facultad/pps/datasets/containers/raw"

for dataset in datasets_to_check:
    print(f"\n{'='*60}")
    print(f"🔍 Checking dataset: {dataset}")
    print(f"{'='*60}")
    
    labels_path = os.path.join(base_path, dataset, "train", "labels")
    if os.path.exists(labels_path):
        check_yolo_annotation_type(labels_path)
    else:
        print(f"❌ Labels directory not found: {labels_path}")



🔍 Checking dataset: Cargo Containers.v1i.yolov11

📊 YOLO Annotation Summary
Total files scanned: 450
Normalized (0–1): 303
Absolute (pixels): 0
Mixed: 0

🔍 Checking dataset: Container Detection.v7i.yolov11
⚠️ Skipping malformed line in 01271fd08504ad62bf5d8f73c55f229ea55ac829_jpg.rf.305fd7ac3dec5a2270c0e3a9aba69bc9.txt: 0 0.9903846156249999 0.578125 0.9927884609375001 0.55889423125 0.997596153125 0.12860576875000002 0.9146634609375001 0.096153846875 0.6646634609375 0.03125 0.6262019234375 0.03125 0.049278846875 0.341346153125 0.03125 0.359375 0.0168269234375 0.5540865390625 0.025240384375 0.58173076875 0.3954326921875 0.57451923125 0.44831730781249995 0.5865384609375 0.4699519234375 0.5865384609375 0.47235576874999996 0.5793269234375 0.47956730781249995 0.5865384609375 0.48677884687499995 0.5793269234375 0.5108173078125 0.5865384609375 0.669471153125 0.5865384609375 0.6959134609375 0.5697115390625 0.743990384375 0.5697115390625 0.8581730765625 0.5853365390625 0.921875 0.58173076875 0.

In [None]:
def verify_labels_are_bbox_only(labels_dir: str):
    """
    Check if all labels in a directory are bounding box type (5 items per line).
    Reports any segmentation labels found.
    
    Args:
        labels_dir: Path to the labels directory
    """
    import os
    
    if not os.path.exists(labels_dir):
        print(f"❌ Labels directory not found: {labels_dir}")
        return False
    
    total_files = 0
    total_lines = 0
    bbox_lines = 0
    seg_lines = 0
    files_with_seg = []
    files_with_bbox_only = []
    files_with_empty = []
    
    print(f"🔍 Checking labels in: {labels_dir}")
    print("-" * 80)
    
    for filename in sorted(os.listdir(labels_dir)):
        if not filename.endswith('.txt'):
            continue
        
        total_files += 1
        label_file = os.path.join(labels_dir, filename)
        
        with open(label_file, 'r') as f:
            lines = f.readlines()
        
        if not lines:
            files_with_empty.append(filename)
            continue
        
        file_has_bbox = False
        file_has_seg = False
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
            
            parts = line.split()
            total_lines += 1
            
            if len(parts) == 5:
                bbox_lines += 1
                file_has_bbox = True
            else:
                seg_lines += 1
                file_has_seg = True
        
        if file_has_seg:
            files_with_seg.append(filename)
        elif file_has_bbox:
            files_with_bbox_only.append(filename)
    
    # Print summary
    print(f"\n📊 Summary:")
    print(f"   Total label files: {total_files}")
    print(f"   Total annotation lines: {total_lines}")
    print(f"   Bounding box lines: {bbox_lines}")
    print(f"   Segmentation lines: {seg_lines}")
    print(f"   Empty files: {len(files_with_empty)}")
    print(f"\n✅ Files with only bounding boxes: {len(files_with_bbox_only)}")
    print(f"❌ Files with segmentation labels: {len(files_with_seg)}")
    
    if files_with_seg:
        print(f"\n⚠️  Files containing segmentation labels ({len(files_with_seg)}):")
        for f in files_with_seg[:10]:  # Show first 10
            print(f"   - {f}")
        if len(files_with_seg) > 10:
            print(f"   ... and {len(files_with_seg) - 10} more")
    
    if files_with_empty:
        print(f"\n⚠️  Empty files ({len(files_with_empty)}):")
        for f in files_with_empty[:5]:  # Show first 5
            print(f"   - {f}")
        if len(files_with_empty) > 5:
            print(f"   ... and {len(files_with_empty) - 5} more")
    
    is_clean = seg_lines == 0
    if is_clean:
        print(f"\n✅ All labels are bounding box type!")
    else:
        print(f"\n❌ Found {seg_lines} segmentation labels out of {total_lines} total labels")
    
    return is_clean

In [7]:
verify_labels_are_bbox_only("/home/emma/facultad/pps/validacion/containers/dataset/test/labels")

🔍 Checking labels in: /home/emma/facultad/pps/validacion/containers/dataset/test/labels
--------------------------------------------------------------------------------

📊 Summary:
   Total label files: 246
   Total annotation lines: 1540
   Bounding box lines: 1458
   Segmentation lines: 82
   Empty files: 0

✅ Files with only bounding boxes: 188
❌ Files with segmentation labels: 58

⚠️  Files containing segmentation labels (58):
   - -_jpg.rf.b34a5bdb85a4fda92495b9c8718423c6.txt
   - -_png.rf.60a95e5bc934b296ff8e43d8bf64df90.txt
   - 046fe09d14a478a4048da69f34ff05cf590706f3_jpg.rf.58bb590282c7ef9186d19476ffee25ea.txt
   - 172958f9cbf5856badac84d9ad9264327c335861_jpg.rf.32092d910f6f004d77faddd054b3d4c0.txt
   - 172958f9cbf5856badac84d9ad9264327c335861_jpg.rf.6459978695a95f180add873bd31b7c4a.txt
   - 3ebd5bb006922f23933e8ee9d19f4cc182adb556_jpg.rf.3de0fbd91950579abbf6ccd9f2afcf6d.txt
   - 5ffe220420ad3a1cc157dfe04b2d881fa97bb863_jpg.rf.c83190ca545f38e9397b4fb2883763e5.txt
   - BEAU5167

False