In [1]:
import os
import glob
import shutil
from pathlib import Path

In [5]:
def yolo_format(annotations_dir, output_dir=None):
    """
    Convert all YOLO segmentation classes to a single class "Tree-crown" (class index 0).
    Handles polygon segmentation format (class_id x1 y1 x2 y2 x3 y3 ...)

    Args:
        annotations_dir: Directory containing original YOLO label files (.txt)
        output_dir: Optional output directory. If None, overwrites original files.
    """

    # Create output directory if specified
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
        print(f"üìÅ Output directory: {output_dir}")
    else:
        print("‚ö†Ô∏è  Warning: Will overwrite original files")

    # Process all annotation files
    annotation_files = glob.glob(os.path.join(annotations_dir, '*.txt'))

    if len(annotation_files) == 0:
        print(f"‚ùå No .txt files found in {annotations_dir}")
        return

    print(f"\nüîÑ Processing {len(annotation_files)} annotation files...")
    print("="*70)

    total_polygons_converted = 0
    files_processed = 0
    bbox_format_count = 0
    polygon_format_count = 0

    for ann_file in annotation_files:
        try:
            # Read original annotations
            with open(ann_file, 'r') as f:
                lines = f.readlines()

            new_lines = []
            polygons_in_file = 0

            for line in lines:
                line = line.strip()

                # Skip empty lines
                if not line:
                    continue

                parts = line.split()

                # Determine format type
                if len(parts) == 5:
                    # Bounding box format: class_id x_center y_center width height
                    new_line = f"0 {' '.join(parts[1:])}\n"
                    bbox_format_count += 1
                elif len(parts) >= 7 and len(parts) % 2 == 1:
                    # Polygon segmentation format: class_id x1 y1 x2 y2 x3 y3 ...
                    # Must have odd number of elements (class + pairs of coordinates)
                    coordinates = ' '.join(parts[1:])
                    new_line = f"0 {coordinates}\n"
                    polygon_format_count += 1
                else:
                    print(f"‚ö†Ô∏è  Skipping invalid line in {os.path.basename(ann_file)}")
                    print(f"    Line has {len(parts)} parts (expected 5 for bbox or odd number for polygon)")
                    continue

                new_lines.append(new_line)
                polygons_in_file += 1

            # Determine output file path
            if output_dir:
                output_file = os.path.join(output_dir, os.path.basename(ann_file))
            else:
                output_file = ann_file

            # Write converted annotations
            with open(output_file, 'w') as f:
                f.writelines(new_lines)

            total_polygons_converted += polygons_in_file
            files_processed += 1

            # Show sample of first file
            if files_processed == 1 and polygons_in_file > 0:
                num_coords = len(new_lines[0].split()) - 1
                print(f"üìù Detected format: {'Polygon Segmentation' if num_coords > 4 else 'Bounding Box'}")
                print(f"   First annotation has {num_coords} coordinate values ({num_coords//2} points)")
                print(f"   Sample: {new_lines[0][:80]}..." if len(new_lines[0]) > 80 else f"   Sample: {new_lines[0].strip()}")
                print()

            print(f"‚úì {os.path.basename(ann_file)}: {polygons_in_file} annotations ‚Üí Tree-crown (class 0)")

        except Exception as e:
            print(f"‚ùå Error processing {os.path.basename(ann_file)}: {e}")

    print("="*70)
    print(f"\n‚úÖ Conversion Complete!")
    print(f"   Files processed: {files_processed}/{len(annotation_files)}")
    print(f"   Total annotations converted: {total_polygons_converted}")
    if bbox_format_count > 0:
        print(f"   - Bounding boxes: {bbox_format_count}")
    if polygon_format_count > 0:
        print(f"   - Polygon segmentations: {polygon_format_count}")
    print(f"   All classes are now: 0 (Tree-crown)")


def verify_conversion(directory, num_samples=5):
    """
    Verify the conversion by displaying sample annotations.

    Args:
        directory: Directory containing converted label files
        num_samples: Number of sample files to display
    """
    print(f"\n{'='*70}")
    print("VERIFICATION - Sample Annotations")
    print(f"{'='*70}")

    annotation_files = glob.glob(os.path.join(directory, '*.txt'))[:num_samples]

    for ann_file in annotation_files:
        print(f"\nüìÑ File: {os.path.basename(ann_file)}")
        print("-"*70)

        with open(ann_file, 'r') as f:
            lines = f.readlines()

        if lines:
            # Show first 3 annotations
            for i, line in enumerate(lines[:3], 1):
                parts = line.strip().split()
                num_coords = len(parts) - 1

                # Truncate long lines for display
                display_line = line.strip()
                if len(display_line) > 100:
                    display_line = display_line[:97] + "..."

                print(f"  Annotation {i}:")
                print(f"    Class: {parts[0]} (Tree-crown)")
                print(f"    Coordinates: {num_coords} values ({num_coords//2} points)")
                print(f"    Data: {display_line}")

            if len(lines) > 3:
                print(f"  ... ({len(lines) - 3} more annotations)")
        else:
            print("  (empty file)")


def analyze_labels(directory):
    """
    Analyze label files to show class distribution and format info.

    Args:
        directory: Directory containing label files
    """
    print(f"\n{'='*70}")
    print("LABEL ANALYSIS")
    print(f"{'='*70}")

    annotation_files = glob.glob(os.path.join(directory, '*.txt'))

    class_counts = {}
    total_annotations = 0
    total_points = []
    format_types = {'bbox': 0, 'polygon': 0}

    for ann_file in annotation_files:
        with open(ann_file, 'r') as f:
            for line in f:
                line = line.strip()
                if line:
                    parts = line.split()

                    if len(parts) >= 3:  # At minimum: class + 1 coordinate pair
                        class_id = parts[0]
                        num_coords = len(parts) - 1

                        class_counts[class_id] = class_counts.get(class_id, 0) + 1
                        total_annotations += 1
                        total_points.append(num_coords // 2)

                        # Determine format
                        if num_coords == 4:
                            format_types['bbox'] += 1
                        else:
                            format_types['polygon'] += 1

    print(f"\nüìä Class Distribution:")
    print("-"*70)
    for class_id, count in sorted(class_counts.items()):
        percentage = (count / total_annotations * 100) if total_annotations > 0 else 0
        print(f"  Class {class_id}: {count:,} annotations ({percentage:.2f}%)")

    print(f"\nüìê Format Information:")
    print("-"*70)
    if format_types['bbox'] > 0:
        print(f"  Bounding boxes: {format_types['bbox']:,}")
    if format_types['polygon'] > 0:
        print(f"  Polygon segmentations: {format_types['polygon']:,}")
        if total_points:
            avg_points = sum(total_points) / len(total_points)
            min_points = min(total_points)
            max_points = max(total_points)
            print(f"    - Average points per polygon: {avg_points:.1f}")
            print(f"    - Points range: {min_points} to {max_points}")

    print(f"\n  Total: {total_annotations:,} annotations in {len(annotation_files)} files")

In [8]:
if __name__ == "__main__":
    # Configuration
    annotations_dir = "/content/drive/MyDrive/AGRI/TreeCrown_Segmentation/dataset/valid/labels"
    output_dir = "/content/drive/MyDrive/AGRI/TreeCrown_Segmentation/dataset/valid/formatLabels"

    # Step 1: Analyze original labels
    print("\nüîç STEP 1: Analyzing ORIGINAL labels...")
    analyze_labels(annotations_dir)

    # Step 2: Convert labels
    print("\n\nüîÑ STEP 2: Converting labels...")
    yolo_format(annotations_dir, output_dir)

    # Step 3: Verify conversion
    print("\n\nüîç STEP 3: Verifying conversion...")
    verify_conversion(output_dir, num_samples=3)

    # Step 4: Analyze converted labels
    print("\n\nüìä STEP 4: Analyzing CONVERTED labels...")
    analyze_labels(output_dir)

    print(f"\n{'='*70}")
    print("‚úÖ ALL DONE!")
    print(f"{'='*70}")
    print(f"\nOriginal files: {annotations_dir}")
    print(f"Converted files: {output_dir}")
    print(f"\nAll classes have been converted to class 0 (Tree-crown)")
    print(f"{'='*70}\n")


üîç STEP 1: Analyzing ORIGINAL labels...

LABEL ANALYSIS

üìä Class Distribution:
----------------------------------------------------------------------
  Class 0: 1,334 annotations (41.26%)
  Class 1: 318 annotations (9.84%)
  Class 2: 1,581 annotations (48.90%)

üìê Format Information:
----------------------------------------------------------------------
  Polygon segmentations: 3,233
    - Average points per polygon: 45.0
    - Points range: 4 to 140

  Total: 3,233 annotations in 49 files


üîÑ STEP 2: Converting labels...
üìÅ Output directory: /content/drive/MyDrive/AGRI/TreeCrown_Segmentation/dataset/valid/formatLabels

üîÑ Processing 49 annotation files...
üìù Detected format: Polygon Segmentation
   First annotation has 78 coordinate values (39 points)
   Sample: 0 0.44375 0.8234375 0.4421875 0.825 0.4421875 0.8375 0.44375 0.8390625 0.44375 0...

‚úì 52000_20000_2773_1031_jpg.rf.eabce235b72bd9ad360f86afeeab1a0d.txt: 71 annotations ‚Üí Tree-crown (class 0)
‚úì img-112-_