In [None]:
import os
import sys
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import cv2
import yaml
from IPython.display import display
import ipywidgets as widgets

# Add src to path
sys.path.insert(0, '../')
from src.data.dataset import load_image_paths_from_file, get_label_path_from_image_path
from src.utils.visualization import draw_yolo_labels

plt.rcParams['figure.figsize'] = (15, 10)

print("Libraries imported successfully!")

## Load Dataset

In [None]:
# Load dataset configuration
config_path = '../configs/dataset.yaml'

with open(config_path, 'r') as f:
    dataset_config = yaml.safe_load(f)

data_root = Path(dataset_config['path'])
train_txt = data_root / dataset_config['train']

# Load image paths
train_images = load_image_paths_from_file(str(train_txt), str(data_root)) if train_txt.exists() else []

print(f"Loaded {len(train_images)} training images")
print(f"Class names: {dataset_config['names']}")

## Interactive Image Browser

In [None]:
def visualize_image(idx):
    """Visualize image with annotations."""
    if idx >= len(train_images):
        print("Index out of range")
        return
    
    img_path = train_images[idx]
    label_path = get_label_path_from_image_path(img_path)
    
    # Load image
    img = cv2.imread(img_path)
    if img is None:
        print(f"Failed to load image: {img_path}")
        return
    
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Load labels
    labels = []
    if os.path.exists(label_path):
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 5:
                    labels.append([float(p) for p in parts[:5]])
    
    labels = np.array(labels) if labels else np.zeros((0, 5))
    
    # Draw labels
    if len(labels) > 0:
        img = draw_yolo_labels(img, labels, dataset_config['names'])
    
    # Display
    plt.figure(figsize=(15, 10))
    plt.imshow(img)
    plt.title(f"Image {idx+1}/{len(train_images)} - {len(labels)} objects\n{Path(img_path).name}")
    plt.axis('off')
    plt.tight_layout()
    plt.show()
    
    # Print details
    print(f"Image: {img_path}")
    print(f"Size: {img.shape[1]} x {img.shape[0]}")
    print(f"Number of objects: {len(labels)}")
    if len(labels) > 0:
        print("\nAnnotations:")
        for i, label in enumerate(labels):
            class_id = int(label[0])
            class_name = dataset_config['names'][class_id]
            print(f"  {i+1}. Class: {class_name}, BBox: [{label[1]:.3f}, {label[2]:.3f}, {label[3]:.3f}, {label[4]:.3f}]")

# Create interactive widget
if train_images:
    interact = widgets.interact(
        visualize_image,
        idx=widgets.IntSlider(min=0, max=len(train_images)-1, step=1, value=0, description='Image Index:')
    )
else:
    print("No training images found. Please prepare your dataset first.")

## Verify Annotation Quality

In [None]:
def check_annotation_issues(image_paths, max_check=100):
    """Check for common annotation issues."""
    issues = {
        'missing_labels': [],
        'invalid_format': [],
        'out_of_bounds': [],
        'empty_labels': []
    }
    
    for img_path in image_paths[:max_check]:
        label_path = get_label_path_from_image_path(img_path)
        
        # Check if label file exists
        if not os.path.exists(label_path):
            issues['missing_labels'].append(img_path)
            continue
        
        # Check label format
        try:
            with open(label_path, 'r') as f:
                lines = [line.strip() for line in f if line.strip()]
            
            if len(lines) == 0:
                issues['empty_labels'].append(img_path)
                continue
            
            for line in lines:
                parts = line.split()
                if len(parts) < 5:
                    issues['invalid_format'].append((img_path, f"Expected 5 values, got {len(parts)}"))
                    break
                
                try:
                    class_id = int(parts[0])
                    x, y, w, h = map(float, parts[1:5])
                    
                    # Check bounds
                    if not (0 <= x <= 1 and 0 <= y <= 1 and 0 < w <= 1 and 0 < h <= 1):
                        issues['out_of_bounds'].append((img_path, f"Values out of range [0, 1]"))
                        break
                        
                except ValueError as e:
                    issues['invalid_format'].append((img_path, str(e)))
                    break
                    
        except Exception as e:
            issues['invalid_format'].append((img_path, str(e)))
    
    return issues

if train_images:
    print("Checking annotation quality...")
    issues = check_annotation_issues(train_images, max_check=len(train_images))
    
    print("\n" + "="*60)
    print("Annotation Quality Report")
    print("="*60)
    
    total_issues = sum(len(v) for v in issues.values())
    
    if total_issues == 0:
        print("✓ No issues found! All annotations are valid.")
    else:
        print(f"Found {total_issues} issues:\n")
        
        if issues['missing_labels']:
            print(f"  • Missing labels: {len(issues['missing_labels'])} images")
            for img in issues['missing_labels'][:3]:
                print(f"    - {img}")
            if len(issues['missing_labels']) > 3:
                print(f"    ... and {len(issues['missing_labels'])-3} more")
        
        if issues['empty_labels']:
            print(f"\n  • Empty label files: {len(issues['empty_labels'])} images")
        
        if issues['invalid_format']:
            print(f"\n  • Invalid format: {len(issues['invalid_format'])} images")
            for img, error in issues['invalid_format'][:3]:
                print(f"    - {img}: {error}")
        
        if issues['out_of_bounds']:
            print(f"\n  • Out of bounds: {len(issues['out_of_bounds'])} images")
    
    print("="*60)
else:
    print("No images to check.")

## Grid Visualization

In [None]:
def visualize_grid(start_idx=0, n_images=9):
    """Visualize multiple images in a grid."""
    n_cols = 3
    n_rows = (n_images + n_cols - 1) // n_cols
    
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(18, 6*n_rows))
    axes = axes.flatten() if n_images > 1 else [axes]
    
    for i, ax in enumerate(axes):
        idx = start_idx + i
        if idx < len(train_images):
            img_path = train_images[idx]
            label_path = get_label_path_from_image_path(img_path)
            
            # Load image
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
            # Load labels
            labels = []
            if os.path.exists(label_path):
                with open(label_path, 'r') as f:
                    for line in f:
                        parts = line.strip().split()
                        if len(parts) >= 5:
                            labels.append([float(p) for p in parts[:5]])
            
            labels = np.array(labels) if labels else np.zeros((0, 5))
            
            # Draw labels
            if len(labels) > 0:
                img = draw_yolo_labels(img, labels, dataset_config['names'])
            
            ax.imshow(img)
            ax.set_title(f"Image {idx+1} ({len(labels)} objects)")
            ax.axis('off')
        else:
            ax.axis('off')
    
    plt.tight_layout()
    plt.show()

if train_images:
    # Visualize first 9 images
    visualize_grid(start_idx=0, n_images=9)
else:
    print("No images available.")

## Export Annotated Samples

Save annotated images for documentation or review.

In [None]:
def export_annotated_samples(n_samples=10, output_dir='../outputs/visualizations'):
    """Export annotated sample images."""
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    indices = np.random.choice(len(train_images), min(n_samples, len(train_images)), replace=False)
    
    for i, idx in enumerate(indices):
        img_path = train_images[idx]
        label_path = get_label_path_from_image_path(img_path)
        
        # Load image
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Load labels
        labels = []
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) >= 5:
                        labels.append([float(p) for p in parts[:5]])
        
        labels = np.array(labels) if labels else np.zeros((0, 5))
        
        # Draw labels
        if len(labels) > 0:
            img = draw_yolo_labels(img, labels, dataset_config['names'])
        
        # Save
        save_path = output_path / f"sample_{i+1}.jpg"
        cv2.imwrite(str(save_path), cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
    
    print(f"Exported {len(indices)} annotated samples to {output_path}")

if train_images:
    export_annotated_samples(n_samples=10)
else:
    print("No images available for export.")