In [1]:
import os
import shutil
import json
import numpy as np
from ultralytics import YOLO
import matplotlib.pyplot as plt

In [2]:
unlabeled_data_path = "./unlabeled_data" # Local path where unlabeled data will be saved from GCS
model_path = "./best.pt" # Local path where manually downloaded model from GCS is saved
augmented_model_path = "./augmented.pt" # Local path where manually downloaded model from GCS is saved
unlabeled_results_path = "./unlabeled_test_results" # Local path where test results of unlabeled data will be stored
test_images_path = "./test/images"  # Local path to the test images folder
test_results_path = "./test_results"  # Local folder for test results
base_results_path = os.path.join(unlabeled_results_path, "base_model_unlabeled_results")
augmented_results_path = os.path.join(unlabeled_results_path, "augmented_model_unlabeled_results")

comparison_results_path = os.path.join(unlabeled_results_path, "comparison_metrics")
class_names = {0: "V", 1: "C", 2: "S"}

# Evaluation Function

In [3]:
# Initialize YOLO models
base_model = YOLO(model_path)
augmented_model = YOLO(augmented_model_path)

In [4]:
def evaluate_model(model, unlabeled_data_path, results_path, class_names):
    """
    Evaluate a YOLO model on unlabeled test datasets.

    Parameters:
        model (YOLO): The YOLO model object to evaluate.
        unlabeled_data_path (str): Path to the folder containing test sets.
        results_path (str): Path to save evaluation results.
        class_names (dict): Mapping of class indices to class names.
    """
    # Process each test set
    for test_set in range(1, 6):
        print(f"\n[INFO] Processing Test Set {test_set}...")

        # Initialize paths
        test_set_path = os.path.join(unlabeled_data_path, f"test_set_{test_set}")
        annotated_dir = os.path.join(results_path, f"test_set_{test_set}/annotated_images")
        predictions_path = os.path.join(results_path, f"test_set_{test_set}/predictions.json")
        summary_path = os.path.join(results_path, f"test_set_{test_set}/summary.txt")

        # Perform predictions
        results = model.predict(
            source=test_set_path,
            save=True,
            conf=0.5,
            project=annotated_dir,
            name="annotated_images"
        )
        print(f"[INFO] Predictions completed for Test Set {test_set}.")

        # Initialize variables for processing results
        predictions = []
        class_counts = {}
        confidence_scores = []
        box_widths = []
        box_heights = []

        # Process results
        for result in results:
            if hasattr(result, "boxes"):
                image_path = result.path  # Get the full path of the image
                image_name = os.path.basename(image_path)  # Extract the image name

                boxes = result.boxes.xyxy.tolist()  # Bounding box coordinates
                confidences = result.boxes.conf.tolist()  # Confidence scores
                class_ids = result.boxes.cls.tolist()  # Class IDs

                for box, conf, cls in zip(boxes, confidences, class_ids):
                    cls = int(cls)
                    class_name = class_names.get(cls, "Unknown")

                    class_counts[class_name] = class_counts.get(class_name, 0) + 1
                    confidence_scores.append(conf)

                    # Save box dimensions
                    box_widths.append(box[2] - box[0])
                    box_heights.append(box[3] - box[1])

                    # Save prediction
                    predictions.append({
                        "image": image_name,
                        "box": box,
                        "label": cls,
                        "confidence": conf
                    })

        # Save predictions to JSON
        with open(predictions_path, "w") as f:
            json.dump(predictions, f)
        print(f"[INFO] Predictions saved for Test Set {test_set}.")

        # Metrics Calculation
        avg_confidence = np.mean(confidence_scores) if confidence_scores else 0
        median_confidence = np.median(confidence_scores) if confidence_scores else 0
        std_confidence = np.std(confidence_scores) if confidence_scores else 0

        avg_width = np.mean(box_widths) if box_widths else 0
        avg_height = np.mean(box_heights) if box_heights else 0

        # Class Ratios
        total_predictions = sum(class_counts.values())
        class_ratios = {cls: (count / total_predictions) * 100 for cls, count in class_counts.items()} if total_predictions > 0 else {}

        # Save metrics to summary
        with open(summary_path, "w") as f:
            f.write(f"Metrics for Test Set {test_set}:\n")
            f.write(f"Class Distribution:\n")
            for class_name, count in class_counts.items():
                f.write(f"  {class_name}: {count}\n")
            f.write("\nClass Ratios (Percentages):\n")
            for class_name, ratio in class_ratios.items():
                f.write(f"  {class_name}: {ratio:.2f}%\n")
            f.write("\nConfidence Score Statistics:\n")
            f.write(f"  Average Confidence: {avg_confidence:.4f}\n")
            f.write(f"  Median Confidence: {median_confidence:.4f}\n")
            f.write(f"  Standard Deviation of Confidence: {std_confidence:.4f}\n")
            f.write("\nAverage Box Size:\n")
            f.write(f"  Average Width: {avg_width:.2f}\n")
            f.write(f"  Average Height: {avg_height:.2f}\n")

        # Visualization: Confidence Distribution
        if confidence_scores:
            plt.figure()
            plt.hist(confidence_scores, bins=20, alpha=0.75, edgecolor="black")
            plt.title(f"Confidence Score Distribution for Test Set {test_set}")
            plt.xlabel("Confidence Score")
            plt.ylabel("Frequency")
            plt.savefig(os.path.join(annotated_dir, f"confidence_distribution_test_set_{test_set}.png"))
            plt.close()

        # Visualization: Box Size Distribution
        if box_widths and box_heights:
            plt.figure()
            plt.hist(box_widths, bins=20, alpha=0.75, edgecolor="black", label="Widths")
            plt.hist(box_heights, bins=20, alpha=0.75, edgecolor="black", label="Heights")
            plt.title(f"Box Size Distribution for Test Set {test_set}")
            plt.xlabel("Size")
            plt.ylabel("Frequency")
            plt.legend()
            plt.savefig(os.path.join(annotated_dir, f"box_size_distribution_test_set_{test_set}.png"))
            plt.close()

# Evaluation on Base Model

In [5]:
evaluate_model(base_model, unlabeled_data_path, base_results_path, class_names)


[INFO] Processing Test Set 1...

image 1/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050545.jpg: 384x640 1 S, 84.0ms
image 2/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050643.jpg: 384x640 2 Vs, 65.2ms
image 3/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050703.jpg: 384x640 1 S, 65.0ms
image 4/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050801.jpg: 384x640 2 Vs, 1 C, 1 S, 77.0ms
image 5/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050820.jpg: 384x640 3 Vs, 1 S, 71.1ms
image 6/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050957.jpg: 384x640 (no detections), 83.3ms
image 7/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_051036.jpg: 384x640 1 V, 80.0ms
image 8/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_051114.jpg: 384x640 2 Vs, 1 S, 7

# Evaluation on Augmented model

In [6]:
evaluate_model(augmented_model, unlabeled_data_path, augmented_results_path, class_names)


[INFO] Processing Test Set 1...

image 1/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050545.jpg: 384x640 1 S, 124.0ms
image 2/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050643.jpg: 384x640 2 Vs, 101.3ms
image 3/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050703.jpg: 384x640 1 S, 95.0ms
image 4/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050801.jpg: 384x640 3 Vs, 1 C, 1 S, 98.0ms
image 5/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050820.jpg: 384x640 3 Vs, 1 C, 1 S, 104.0ms
image 6/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_050957.jpg: 384x640 (no detections), 128.0ms
image 7/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_051036.jpg: 384x640 1 V, 103.0ms
image 8/840 C:\Users\binmh\AI group Project\unlabeled_data\test_set_1\frame_20241126_051114.jpg: 384x640 1 

# Model Comparison

In [37]:
# Calculate the Intersection over Union (IoU) of two bounding boxes.
def calculate_iou(box1, box2):
    
    x1, y1, x2, y2 = box1
    x1_p, y1_p, x2_p, y2_p = box2

    xi1, yi1 = max(x1, x1_p), max(y1, y1_p)
    xi2, yi2 = min(x2, x2_p), min(y2, y2_p)
    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)

    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2_p - x1_p) * (y2_p - y1_p)
    union_area = box1_area + box2_area - inter_area

    return inter_area / union_area if union_area > 0 else 0

# Load predictions from a specific test set
def load_predictions(results_path, test_set):
    
    predictions_path = os.path.join(results_path, f"test_set_{test_set}/predictions.json")
    if os.path.exists(predictions_path):
        with open(predictions_path, "r") as f:
            return json.load(f)
    return []

# Compare two models
def compare_models(base_results_path, augmented_results_path, comparison_results_path, class_names):
    
    os.makedirs(comparison_results_path, exist_ok=True)

    base_metrics = []
    augmented_metrics = []

    overall_base_class_counts = {}
    overall_augmented_class_counts = {}

    for test_set in range(1, 6):
        base_predictions = load_predictions(base_results_path, test_set)
        augmented_predictions = load_predictions(augmented_results_path, test_set)

        if not base_predictions or not augmented_predictions:
            print(f"[WARNING] Missing predictions for Test Set {test_set}.")
            continue

        # Initialize comparison metrics
        base_class_counts = {}
        augmented_class_counts = {}
        base_confidence = []
        augmented_confidence = []

        agreement_count = 0
        total_comparisons = 0

        for base_pred, aug_pred in zip(base_predictions, augmented_predictions):
            if base_pred["label"] == aug_pred["label"]:
                iou = calculate_iou(base_pred["box"], aug_pred["box"])
                if iou > 0.5:
                    agreement_count += 1

            base_class = class_names.get(base_pred["label"], "Unknown")
            aug_class = class_names.get(aug_pred["label"], "Unknown")

            base_class_counts[base_class] = base_class_counts.get(base_class, 0) + 1
            augmented_class_counts[aug_class] = augmented_class_counts.get(aug_class, 0) + 1

            base_confidence.append(base_pred["confidence"])
            augmented_confidence.append(aug_pred["confidence"])
            total_comparisons += 1

        # Update overall class counts
        for cls, count in base_class_counts.items():
            overall_base_class_counts[cls] = overall_base_class_counts.get(cls, 0) + count

        for cls, count in augmented_class_counts.items():
            overall_augmented_class_counts[cls] = overall_augmented_class_counts.get(cls, 0) + count

        # Compute metrics for this test set
        agreement_ratio = (agreement_count / total_comparisons) * 100 if total_comparisons else 0

        base_metrics.append({
            "test_set": test_set,
            "class_counts": base_class_counts,
            "avg_confidence": np.mean(base_confidence) if base_confidence else 0,
        })

        augmented_metrics.append({
            "test_set": test_set,
            "class_counts": augmented_class_counts,
            "avg_confidence": np.mean(augmented_confidence) if augmented_confidence else 0,
        })

        # Save test set summary
        with open(os.path.join(comparison_results_path, f"test_set_{test_set}_comparison.txt"), "w") as f:
            f.write(f"Test Set {test_set} Comparison:\n")
            f.write(f"  Agreement Ratio: {agreement_ratio:.2f}%\n")
            f.write(f"  Base Model Avg Confidence: {np.mean(base_confidence):.4f}\n")
            f.write(f"  Augmented Model Avg Confidence: {np.mean(augmented_confidence):.4f}\n")

    # Aggregate results across test sets
    base_avg_confidence = np.mean([m["avg_confidence"] for m in base_metrics])
    augmented_avg_confidence = np.mean([m["avg_confidence"] for m in augmented_metrics])

    base_total = sum(overall_base_class_counts.values())
    augmented_total = sum(overall_augmented_class_counts.values())

    base_class_ratios = {cls: (count / base_total) * 100 for cls, count in overall_base_class_counts.items()} if base_total else {}
    augmented_class_ratios = {cls: (count / augmented_total) * 100 for cls, count in overall_augmented_class_counts.items()} if augmented_total else {}

    overall_comparison_path = os.path.join(comparison_results_path, "overall_comparison_summary.txt")
    with open(overall_comparison_path, "w") as f:
        f.write("Overall Comparison Summary:\n")
        f.write(f"  Base Model Avg Confidence: {base_avg_confidence:.4f}\n")
        f.write(f"  Augmented Model Avg Confidence: {augmented_avg_confidence:.4f}\n")
        f.write(f"\nClass Distribution:\n")
        f.write("Base Model:\n")
        for class_name, ratio in base_class_ratios.items():
            f.write(f"  {class_name}: {ratio:.2f}%\n")
        f.write("\nAugmented Model:\n")
        for class_name, ratio in augmented_class_ratios.items():
            f.write(f"  {class_name}: {ratio:.2f}%\n")

    print("Comparison completed. Check results in:", comparison_results_path)


In [38]:
compare_models(base_results_path, augmented_results_path, comparison_results_path, class_names)

Comparison completed. Check results in: ./unlabeled_test_results\comparison_metrics
