# Check Test Images and Annotations
This notebook counts test folder images and annotated images from JSON, and lists images without smoke labels.

In [17]:
import os
import json
from collections import defaultdict

In [18]:
# Define paths
test_dir = os.path.join('..','smoke-segmentation.v5i.coco-segmentation','test')
json_path = os.path.join('..','smoke-segmentation.v5i.coco-segmentation','image_level_labels.json')

In [19]:
# Load annotations from JSON
try:
    with open(json_path, 'r') as f:
        data = json.load(f)
    print(f"Successfully loaded {len(data)} records from {json_path}")
except FileNotFoundError:
    print(f"Error: JSON file not found at {json_path}")
    data = []
except json.JSONDecodeError:
    print(f"Error: Could not decode JSON from {json_path}")
    data = []

Successfully loaded 900 records from ../smoke-segmentation.v5i.coco-segmentation/image_level_labels.json


In [20]:
# Initialize counters for each category
category_counts = defaultdict(int)
category_files = defaultdict(list) # Optional: to store filenames per category

# Define label strings
label_0 = 'no-smoke'
label_1 = 'low-opacity-smoke'
label_2 = 'high-opacity-smoke'

# Iterate through JSON data and categorize images
for item in data:
    # Ensure 'labels' exists and is a list/set, default to empty set if missing
    labels = set(item.get('labels', []))
    file_name = item.get('file_name', 'UnknownFile') # Get filename for optional listing

    # Check for specific label combinations
    has_0 = label_0 in labels
    has_1 = label_1 in labels
    has_2 = label_2 in labels

    # Categorize based on the presence of labels 1 and 2 first
    if has_1 and has_2:
        category_counts['only_1_and_2'] += 1
        category_files['only_1_and_2'].append(file_name)
    elif has_1: # Only label 1 (and not 2, because the previous condition failed)
        category_counts['only_1'] += 1
        category_files['only_1'].append(file_name)
    elif has_2: # Only label 2 (and not 1)
        category_counts['only_2'] += 1
        category_files['only_2'].append(file_name)
    elif has_0: # Only label 0 (and not 1 or 2)
        # This assumes 'no-smoke' won't coexist with smoke labels 1 or 2 in your dataset.
        # If it can, you might need to adjust the logic based on priority.
        category_counts['only_0'] += 1
        category_files['only_0'].append(file_name)
    else:
        # Count images with no relevant labels (0, 1, or 2) or empty labels list
        category_counts['other_or_no_labels'] += 1
        category_files['other_or_no_labels'].append(file_name)

print(category_counts)

defaultdict(<class 'int'>, {'only_1_and_2': 90, 'only_1': 582, 'only_2': 221, 'only_0': 7})
