In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import json

# PACE Data Statistics

In [None]:
class_names = ['toy_car', 'can', 'snack_box', 'ramen_box']

In [None]:
# Dataset path
dataset_path = "/home/data/pace/pace_v3/train"
images_path = os.path.join(dataset_path, "images")
labels_path = os.path.join(dataset_path, "labels")

# Load YOLO annotations
annotations = []
class_counts = {name: 0 for name in class_names}

for label_file in os.listdir(labels_path):
	filepath = os.path.join(labels_path, label_file)
	with open(filepath, 'r') as f:
		lines = f.readlines()
		
	for line in lines:
		parts = line.strip().split()
		class_id, x_center, y_center, width, height = map(float, parts)
		class_name = class_names[int(class_id)]
		
		annotations.append({
			'image': label_file.replace('.txt', ''),
			'class_id': int(class_id),
			'class_name': class_name,
			'x_center': x_center,
			'y_center': y_center,
			'width': width,
			'height': height
		})
		
		class_counts[class_name] += 1

# Basic dataset statistics
num_images = len([f for f in os.listdir(images_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
num_annotations = len(annotations)
avg_annotations_per_image = num_annotations / num_images

print("=== Dataset Statistics ===")
print(f"Total Images: {num_images}")
print(f"Total Annotations: {num_annotations}")
print(f"Average Annotations per Image: {avg_annotations_per_image:.2f}")
print(f"Classes: {len(class_names)}")
print("\n=== Class Distribution ===")
for class_name, count in class_counts.items():
	print(f"{class_name}: {count} ({(count/num_annotations)*100:.1f}%)")

bbox_widths = [ann['width'] for ann in annotations]
bbox_heights = [ann['height'] for ann in annotations]
bbox_areas = [ann['width'] * ann['height'] for ann in annotations]
aspect_ratios = [ann['width'] / ann['height'] for ann in annotations]

width_stats = {
	'min': np.min(bbox_widths),
	'max': np.max(bbox_widths),
	'mean': np.mean(bbox_widths),
	'std': np.std(bbox_widths),
	'median': np.median(bbox_widths)
}

height_stats = {
	'min': np.min(bbox_heights),
	'max': np.max(bbox_heights),
	'mean': np.mean(bbox_heights),
	'std': np.std(bbox_heights),
	'median': np.median(bbox_heights)
}

In [None]:
print("\n=== Bounding Box Statistics (Normalized) ===")
print("Width Statistics:")
for key, value in width_stats.items():
	print(f"  {key}: {value:.4f}")

print("Height Statistics:")
for key, value in height_stats.items():
	print(f"  {key}: {value:.4f}")

print(f"Average Aspect Ratio: {np.mean(aspect_ratios):.2f}")
print(f"Average Area: {np.mean(bbox_areas):.4f}")

# Create visualizations
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
fig.suptitle('Dataset Analysis Visualizations', fontsize=16)

# 1. Class distribution bar chart
axes[0, 0].bar(class_counts.keys(), class_counts.values(), color=['skyblue', 'lightcoral', 'lightgreen', 'gold'])
axes[0, 0].set_title('Class Distribution')
axes[0, 0].set_ylabel('Number of Annotations')
axes[0, 0].tick_params(axis='x', rotation=45)

# 2. Bounding box width distribution
axes[0, 1].hist(bbox_widths, bins=30, alpha=0.7, color='skyblue', edgecolor='black')
axes[0, 1].set_title('Bounding Box Width Distribution')
axes[0, 1].set_xlabel('Normalized Width')
axes[0, 1].set_ylabel('Frequency')

# 3. Bounding box height distribution
axes[0, 2].hist(bbox_heights, bins=30, alpha=0.7, color='lightcoral', edgecolor='black')
axes[0, 2].set_title('Bounding Box Height Distribution')
axes[0, 2].set_xlabel('Normalized Height')
axes[0, 2].set_ylabel('Frequency')

# 4. Aspect ratio distribution
axes[1, 0].hist(aspect_ratios, bins=30, alpha=0.7, color='lightgreen', edgecolor='black')
axes[1, 0].set_title('Aspect Ratio Distribution')
axes[1, 0].set_xlabel('Width/Height Ratio')
axes[1, 0].set_ylabel('Frequency')

# 5. Bounding box area distribution
axes[1, 1].hist(bbox_areas, bins=30, alpha=0.7, color='gold', edgecolor='black')
axes[1, 1].set_title('Bounding Box Area Distribution')
axes[1, 1].set_xlabel('Normalized Area')
axes[1, 1].set_ylabel('Frequency')

# 6. Spatial distribution of object centers
x_centers = [ann['x_center'] for ann in annotations]
y_centers = [ann['y_center'] for ann in annotations]
axes[1, 2].scatter(x_centers, y_centers, alpha=0.5, s=10)
axes[1, 2].set_title('Spatial Distribution of Object Centers')
axes[1, 2].set_xlabel('X Center (Normalized)')
axes[1, 2].set_ylabel('Y Center (Normalized)')
axes[1, 2].set_xlim(0, 1)
axes[1, 2].set_ylim(0, 1)

plt.tight_layout()
plt.show()

In [None]:
# Class-specific statistics
print("\n=== Class-Specific Statistics ===")
for class_name in class_names:
	class_annotations = [ann for ann in annotations if ann['class_name'] == class_name]
	if class_annotations:
		class_widths = [ann['width'] for ann in class_annotations]
		class_heights = [ann['height'] for ann in class_annotations]
		class_areas = [ann['width'] * ann['height'] for ann in class_annotations]
		
		print(f"\n{class_name}:")
		print(f"  Count: {len(class_annotations)}")
		print(f"  Avg Width: {np.mean(class_widths):.4f}")
		print(f"  Avg Height: {np.mean(class_heights):.4f}")
		print(f"  Avg Area: {np.mean(class_areas):.4f}")
		print(f"  Avg Aspect Ratio: {np.mean([w/h for w, h in zip(class_widths, class_heights)]):.2f}")

# Create class-specific size comparison
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Box plot for widths by class
class_width_data = []
class_labels_for_plot = []
for class_name in class_names:
	class_widths = [ann['width'] for ann in annotations if ann['class_name'] == class_name]
	if class_widths:
		class_width_data.append(class_widths)
		class_labels_for_plot.append(class_name)

ax1.boxplot(class_width_data, labels=class_labels_for_plot)
ax1.set_title('Bounding Box Width by Class')
ax1.set_ylabel('Normalized Width')
ax1.tick_params(axis='x', rotation=45)

# Box plot for heights by class
class_height_data = []
for class_name in class_names:
	class_heights = [ann['height'] for ann in annotations if ann['class_name'] == class_name]
	if class_heights:
		class_height_data.append(class_heights)

ax2.boxplot(class_height_data, labels=class_labels_for_plot)
ax2.set_title('Bounding Box Height by Class')
ax2.set_ylabel('Normalized Height')
ax2.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Summary statistics
print(f"\n=== Summary ===")
print(f"Dataset contains {num_images} images with {num_annotations} object annotations across {len(class_names)} classes.")
print(f"Objects have an average size of {np.mean(bbox_areas):.4f} normalized area with aspect ratio {np.mean(aspect_ratios):.2f}.")
print(f"Class distribution ranges from {min(class_counts.values())} to {max(class_counts.values())} instances per class.")

In [None]:
def calculate_bbox_overlap(box1, box2):
    """Calculate IoU (Intersection over Union) between two bounding boxes"""
    # Convert from center format to corner format
    x1_min = box1['x_center'] - box1['width'] / 2
    y1_min = box1['y_center'] - box1['height'] / 2
    x1_max = box1['x_center'] + box1['width'] / 2
    y1_max = box1['y_center'] + box1['height'] / 2
    
    x2_min = box2['x_center'] - box2['width'] / 2
    y2_min = box2['y_center'] - box2['height'] / 2
    x2_max = box2['x_center'] + box2['width'] / 2
    y2_max = box2['y_center'] + box2['height'] / 2
    
    # Calculate intersection
    x_left = max(x1_min, x2_min)
    y_top = max(y1_min, y2_min)
    x_right = min(x1_max, x2_max)
    y_bottom = min(y1_max, y2_max)
    
    if x_right < x_left or y_bottom < y_top:
        return 0.0  # No intersection
    
    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    
    # Calculate union
    box1_area = box1['width'] * box1['height']
    box2_area = box2['width'] * box2['height']
    union_area = box1_area + box2_area - intersection_area
    
    return intersection_area / union_area if union_area > 0 else 0.0

def analyze_occlusion():
    """Analyze occlusion patterns in the dataset"""
    # Group annotations by image
    image_annotations = {}
    for ann in annotations:
        image_name = ann['image']
        if image_name not in image_annotations:
            image_annotations[image_name] = []
        image_annotations[image_name].append(ann)
    
    occlusion_stats = {
        'total_pairs': 0,
        'overlapping_pairs': 0,
        'ious': [],
        'images_with_occlusion': 0,
        'total_images_with_multiple_objects': 0,
        'occlusion_by_class': {},
        'overlap_percentages': []
    }
    
    # Initialize class-specific occlusion tracking
    for class1 in class_names:
        occlusion_stats['occlusion_by_class'][class1] = {}
        for class2 in class_names:
            occlusion_stats['occlusion_by_class'][class1][class2] = {'count': 0, 'total_pairs': 0}
    
    # Analyze each image
    for image_name, image_anns in image_annotations.items():
        if len(image_anns) < 2:
            continue
            
        occlusion_stats['total_images_with_multiple_objects'] += 1
        image_has_occlusion = False
        
        # Check all pairs of objects in the image
        for i in range(len(image_anns)):
            for j in range(i + 1, len(image_anns)):
                box1 = image_anns[i]
                box2 = image_anns[j]
                
                iou = calculate_bbox_overlap(box1, box2)
                occlusion_stats['total_pairs'] += 1
                occlusion_stats['ious'].append(iou)
                
                # Track class-specific occlusion
                class1 = box1['class_name']
                class2 = box2['class_name']
                occlusion_stats['occlusion_by_class'][class1][class2]['total_pairs'] += 1
                occlusion_stats['occlusion_by_class'][class2][class1]['total_pairs'] += 1
                
                if iou > 0:
                    occlusion_stats['overlapping_pairs'] += 1
                    image_has_occlusion = True
                    occlusion_stats['occlusion_by_class'][class1][class2]['count'] += 1
                    occlusion_stats['occlusion_by_class'][class2][class1]['count'] += 1
                    
                    # Calculate overlap percentage for each box
                    overlap_area = iou * (box1['width'] * box1['height'] + box2['width'] * box2['height']) / (1 + iou)
                    overlap_pct1 = overlap_area / (box1['width'] * box1['height'])
                    overlap_pct2 = overlap_area / (box2['width'] * box2['height'])
                    occlusion_stats['overlap_percentages'].extend([overlap_pct1, overlap_pct2])
        
        if image_has_occlusion:
            occlusion_stats['images_with_occlusion'] += 1
    
    return occlusion_stats

# Analyze occlusion
print("Analyzing occlusion patterns...")
occlusion_stats = analyze_occlusion()

# Print occlusion statistics
print("\n=== Occlusion Analysis ===")
print(f"Images with multiple objects: {occlusion_stats['total_images_with_multiple_objects']}")
print(f"Images with occlusion: {occlusion_stats['images_with_occlusion']}")
total_occlusion_rate = (occlusion_stats['images_with_occlusion'] / occlusion_stats['total_images_with_multiple_objects'] * 100) if occlusion_stats['total_images_with_multiple_objects'] > 0 else 0
print(f"Occlusion rate: {total_occlusion_rate:.1f}% of multi-object images")

print(f"\nTotal object pairs analyzed: {occlusion_stats['total_pairs']}")
print(f"Overlapping pairs: {occlusion_stats['overlapping_pairs']}")
total_overlap_rate = (occlusion_stats['overlapping_pairs'] / occlusion_stats['total_pairs'] * 100) if occlusion_stats['total_pairs'] > 0 else 0
print(f"Pair-wise overlap rate: {total_overlap_rate:.1f}%")

if occlusion_stats['ious']:
    print(f"Average IoU: {np.mean(occlusion_stats['ious']):.4f}")
    print(f"Max IoU: {np.max(occlusion_stats['ious']):.4f}")
    overlapping_ious = [iou for iou in occlusion_stats['ious'] if iou > 0]
    if overlapping_ious:
        print(f"Average IoU (overlapping pairs only): {np.mean(overlapping_ious):.4f}")

# Create visualizations
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Occlusion Analysis Visualizations', fontsize=16)

# 1. IoU distribution
axes[0, 0].hist(occlusion_stats['ious'], bins=50, alpha=0.7, color='lightblue', edgecolor='black')
axes[0, 0].set_title('IoU Distribution (All Pairs)')
axes[0, 0].set_xlabel('Intersection over Union (IoU)')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].axvline(x=0.1, color='red', linestyle='--', alpha=0.7, label='IoU = 0.1')
axes[0, 0].axvline(x=0.5, color='orange', linestyle='--', alpha=0.7, label='IoU = 0.5')
axes[0, 0].legend()

# 2. Overlapping IoU distribution (excluding zero)
overlapping_ious = [iou for iou in occlusion_stats['ious'] if iou > 0]
if overlapping_ious:
    axes[0, 1].hist(overlapping_ious, bins=30, alpha=0.7, color='coral', edgecolor='black')
    axes[0, 1].set_title('IoU Distribution (Overlapping Pairs Only)')
    axes[0, 1].set_xlabel('Intersection over Union (IoU)')
    axes[0, 1].set_ylabel('Frequency')
else:
    axes[0, 1].text(0.5, 0.5, 'No overlapping pairs found', ha='center', va='center', transform=axes[0, 1].transAxes)
    axes[0, 1].set_title('IoU Distribution (Overlapping Pairs Only)')

# 3. Overlap percentage distribution
if occlusion_stats['overlap_percentages']:
    axes[1, 0].hist(occlusion_stats['overlap_percentages'], bins=30, alpha=0.7, color='lightgreen', edgecolor='black')
    axes[1, 0].set_title('Object Overlap Percentage Distribution')
    axes[1, 0].set_xlabel('Percentage of Object Area Overlapped')
    axes[1, 0].set_ylabel('Frequency')
else:
    axes[1, 0].text(0.5, 0.5, 'No overlaps found', ha='center', va='center', transform=axes[1, 0].transAxes)
    axes[1, 0].set_title('Object Overlap Percentage Distribution')

# 4. Class-specific occlusion heatmap
class_occlusion_matrix = np.zeros((len(class_names), len(class_names)))
for i, class1 in enumerate(class_names):
    for j, class2 in enumerate(class_names):
        if occlusion_stats['occlusion_by_class'][class1][class2]['total_pairs'] > 0:
            occlusion_rate = (occlusion_stats['occlusion_by_class'][class1][class2]['count'] / 
                            occlusion_stats['occlusion_by_class'][class1][class2]['total_pairs'])
            class_occlusion_matrix[i, j] = occlusion_rate

im = axes[1, 1].imshow(class_occlusion_matrix, cmap='YlOrRd', aspect='auto')
axes[1, 1].set_title('Class-Specific Occlusion Rates')
axes[1, 1].set_xticks(range(len(class_names)))
axes[1, 1].set_yticks(range(len(class_names)))
axes[1, 1].set_xticklabels(class_names, rotation=45)
axes[1, 1].set_yticklabels(class_names)

# Add text annotations to heatmap
for i in range(len(class_names)):
    for j in range(len(class_names)):
        text = axes[1, 1].text(j, i, f'{class_occlusion_matrix[i, j]:.2f}', 
                              ha="center", va="center", color="black", fontsize=8)

plt.colorbar(im, ax=axes[1, 1])
plt.tight_layout()
plt.show()

# Summary for paper
print(f"\n=== Occlusion Summary for Paper ===")
if occlusion_stats['total_images_with_multiple_objects'] > 0:
    print(f"Occlusion analysis reveals that {total_occlusion_rate:.1f}% of multi-object images contain overlapping objects.")
    if overlapping_ious:
        print(f"Among overlapping object pairs, the average IoU is {np.mean(overlapping_ious):.3f}.")
    print(f"Overall, {total_overlap_rate:.1f}% of all object pairs in the dataset exhibit some degree of overlap.")
else:
    print("No images with multiple objects found for occlusion analysis.")