In [1]:
# Ensure Ultralytics is installed in the Kaggle environment
!pip install ultralytics pyyaml tqdm


Collecting ultralytics
  Downloading ultralytics-8.3.115-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cusolver_cu12-11.6

In [17]:
# --- Snippet 1: Setup, Installs, Config, Basic Helpers ---

!pip install ultralytics pyyaml tqdm matplotlib seaborn tidecv # Use latest available tidecv
import os
import cv2
import numpy as np
import yaml
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from ultralytics import YOLO
from ultralytics.utils.metrics import box_iou
from pathlib import Path
from tqdm import tqdm
import shutil
import logging
import json
from tidecv import TIDE, datasets # Import TIDE library

# --- Configuration ---
MODEL_PATH = Path("/kaggle/input/best-model-30/best.pt")
DATASET_YAML_PATH = Path("/kaggle/input/cardetection/car/data.yaml")
OUTPUT_DIR = Path("/kaggle/working/robust_error_analysis") # New output dir name
SPLIT_TO_ANALYZE = 'valid' # Or 'test'
CONF_THRESHOLD = 0.25 # Standard validation confidence
IOU_THRESHOLD = 0.50 # Standard IoU for matching

# --- Analysis Specific Config ---
# Set to a class name like "Green Light" to analyze its FPs, or None to disable
TARGET_FP_ANALYSIS_CLASS_NAME = "Green Light"
# Brightness thresholds for slicing (0-255 scale)
BRIGHTNESS_DARK_THRESHOLD = 70
BRIGHTNESS_BRIGHT_THRESHOLD = 180
# Confidence bins for analysis
CONFIDENCE_BINS = [0.0, 0.3, 0.5, 0.7, 0.9, 1.0]

# --- Setup Logging ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s-%(levelname)s: %(message)s')
logging.getLogger('matplotlib').setLevel(logging.WARNING) # Quieter plots

# --- Basic Helper Functions ---

def setup_output_dirs(base_dir):
    """Creates necessary output directories, removing old ones."""
    logging.info(f"Setting up output directories under {base_dir}")
    if base_dir.exists():
        logging.warning(f"Removing previous analysis directory: {base_dir}")
        shutil.rmtree(base_dir)
    base_dir.mkdir(parents=True, exist_ok=True)

    dirs = {
        "errors_fp": base_dir / "error_images" / "false_positives",
        "errors_fn": base_dir / "error_images" / "false_negatives",
        "errors_misc": base_dir / "error_images" / "misclassified",
        "fp_color_analysis": base_dir / "fp_color_analysis",
        "tide_results": base_dir / "tide_results",
        "slice_analysis": base_dir / "slice_analysis",
        "confidence_analysis": base_dir / "confidence_analysis"
    }
    for d in dirs.values():
        d.mkdir(parents=True, exist_ok=True)
    logging.info("Output directories created.")
    return dirs

def load_dataset_info(yaml_path, split):
    """Loads dataset paths and class names from YAML file (more flexible path handling)."""
    logging.info(f"Attempting to load dataset info from: {yaml_path}")
    if not yaml_path.exists():
        logging.error(f"Dataset YAML file not found at {yaml_path}")
        raise FileNotFoundError(f"Dataset YAML file not found at {yaml_path}")

    logging.debug(f"Reading YAML content...")
    with open(yaml_path, 'r') as f:
        try:
            data = yaml.safe_load(f)
            logging.debug(f"YAML content loaded: {data}")
        except yaml.YAMLError as e:
            logging.error(f"Error parsing YAML file: {e}")
            raise

    if not isinstance(data, dict):
        logging.error(f"YAML content is not a dictionary. Content: {data}")
        raise ValueError("YAML content is not a dictionary.")

    # --- Class Names (Still required from YAML) ---
    if 'names' not in data:
        logging.error(f"'names' key (class names) not found in YAML data.")
        raise ValueError(f"'names' key (class names) not found in {yaml_path}")
    class_names = data['names']
    logging.debug(f"Class names found: {class_names}")

    # Determine the dataset root directory (where the yaml file is located)
    dataset_root = yaml_path.parent
    logging.debug(f"Dataset root directory (location of YAML): {dataset_root}")

    # --- Flexible Path Finding ---
    img_path = None
    label_path = None
    image_path_found = False
    label_path_found = False

    # Method 1: Check if 'split' is a key in YAML (standard Ultralytics way)
    if split in data:
        logging.info(f"Split '{split}' found as a key in YAML. Using defined path.")
        img_path_relative = data[split]
        logging.debug(f"Relative image path for split '{split}' from YAML: {img_path_relative}")
        img_path_attempt = (dataset_root / img_path_relative).resolve()
        logging.info(f"Attempting path from YAML key: {img_path_attempt}")
        if img_path_attempt.exists() and img_path_attempt.is_dir():
            img_path = img_path_attempt
            image_path_found = True
            logging.info(f"Image path found via YAML key: {img_path}")
            # Try finding corresponding label path by replacing 'images'
            label_path_attempt = Path(str(img_path).replace("images", "labels", 1))
            if label_path_attempt.exists() and label_path_attempt.is_dir():
                 label_path = label_path_attempt
                 label_path_found = True
                 logging.info(f"Corresponding label path found: {label_path}")
            else:
                 logging.warning(f"Found image path via YAML, but corresponding label path {label_path_attempt} not found.")
        else:
             logging.warning(f"Path '{img_path_attempt}' defined for key '{split}' in YAML not found or not a directory.")

    # Method 2: If YAML key didn't work OR wasn't present, assume standard structure
    if not image_path_found:
        logging.info(f"Split '{split}' not found as key in YAML or path invalid. Assuming standard directory structure: {dataset_root}/{split}/images")
        img_path_attempt = (dataset_root / split / "images").resolve()
        logging.info(f"Attempting standard path: {img_path_attempt}")
        if img_path_attempt.exists() and img_path_attempt.is_dir():
            img_path = img_path_attempt
            image_path_found = True
            logging.info(f"Image path found via standard structure: {img_path}")
            # Check for corresponding labels dir
            label_path_attempt = (dataset_root / split / "labels").resolve()
            if label_path_attempt.exists() and label_path_attempt.is_dir():
                label_path = label_path_attempt
                label_path_found = True
                logging.info(f"Corresponding label path found: {label_path}")
            else:
                logging.warning(f"Found image path via standard structure, but corresponding label path {label_path_attempt} not found.")
        else:
            logging.error(f"Standard image path {img_path_attempt} also not found or not a directory.")

    # Final Check
    if not image_path_found:
        raise FileNotFoundError(f"Could not locate image directory for split '{split}' using YAML key or standard structure relative to {dataset_root}.")
    if not label_path_found:
        # Allow proceeding without labels for prediction-only tasks, but log error for analysis
        logging.error(f"Could not locate label directory for split '{split}' corresponding to {img_path}. Ground truth cannot be loaded.")
        raise FileNotFoundError(f"Could not locate label directory for split '{split}' corresponding to {img_path}.")


    # Find image files in the located directory
    allowed_extensions = ['.jpg', '.png', '.jpeg', '.bmp', '.tif', '.tiff', '.webp']
    logging.debug(f"Scanning {img_path} for images...")
    image_files = sorted([p for p in img_path.iterdir() if p.suffix.lower() in allowed_extensions])
    logging.info(f"Found {len(image_files)} images in {img_path}")

    if not image_files:
         logging.warning(f"No image files found in the directory: {img_path}. Check extensions and directory contents.")

    logging.info(f"Using Image Path: {img_path}")
    logging.info(f"Using Label Path: {label_path}")
    logging.info(f"Class names ({len(class_names)}): {class_names}")
    logging.info(f"Load_dataset_info finished successfully.")
    return image_files, label_path, class_names
    
print("Snippet 1 (Setup & Basic Helpers) finished.")

Snippet 1 (Setup & Basic Helpers) finished.


In [19]:
# --- Snippet 2: Core Analysis & Visualization Helpers ---
import colorsys # For HSV conversion

# --- Ground Truth & Coordinate Helpers --- (Refined from previous attempts)
def load_ground_truth(label_path, class_count):
    """Loads ground truth boxes [class_id, xc, yc, w, h] from a label file."""
    gt_boxes = []
    if label_path.exists():
        with open(label_path, 'r') as f:
            for line_num, line in enumerate(f):
                parts = line.strip().split()
                if len(parts) == 5:
                    try:
                        class_id = int(parts[0])
                        x_c, y_c, w, h = map(float, parts[1:])
                        if not (0 <= class_id < class_count):
                            logging.warning(f"L{line_num+1}: Invalid class ID {class_id} in {label_path.name}. Max is {class_count-1}. Skipping.")
                            continue
                        if not (0 <= x_c <= 1 and 0 <= y_c <= 1 and 0 < w <= 1 and 0 < h <= 1):
                            logging.warning(f"L{line_num+1}: Invalid coords [{x_c},{y_c},{w},{h}] in {label_path.name}. Skipping.")
                            continue
                        gt_boxes.append([class_id, x_c, y_c, w, h])
                    except ValueError:
                        logging.warning(f"L{line_num+1}: Cannot parse coords in {label_path.name}. Line: '{line.strip()}'. Skipping.")
                elif len(parts) > 0:
                    logging.warning(f"L{line_num+1}: Incorrect format ({len(parts)} parts) in {label_path.name}. Line: '{line.strip()}'. Skipping.")
    return np.array(gt_boxes) # Shape: (N, 5) -> [class_id, xc, yc, w, h]


def yolo_to_xyxy(boxes_yolo, img_width, img_height):
    """Converts YOLO [xc, yc, w, h] (normalized) to [x1, y1, x2, y2] (absolute)."""
    boxes_xyxy = []
    if boxes_yolo is None or len(boxes_yolo) == 0: return np.array(boxes_xyxy)
    for box in boxes_yolo:
        xc, yc, w, h = box # Assumes format [xc, yc, w, h, ...]
        x1 = (xc - w / 2) * img_width
        y1 = (yc - h / 2) * img_height
        x2 = (xc + w / 2) * img_width
        y2 = (yc + h / 2) * img_height
        # Clamp to image bounds
        x1, y1 = max(0.0, x1), max(0.0, y1)
        x2, y2 = min(float(img_width - 1), x2), min(float(img_height - 1), y2)
        if x1 < x2 and y1 < y2: # Ensure valid box after clamping
             boxes_xyxy.append([x1, y1, x2, y2])
        # else: logging.warning(f"Invalid box after clamping: {box} -> {[x1,y1,x2,y2]}")
    return np.array(boxes_xyxy) # Shape: (N, 4) -> [x1, y1, x2, y2]


# --- Image Condition Analysis Helpers ---
def calculate_brightness(image):
    """Calculates average brightness (grayscale value)."""
    try:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        return np.mean(gray)
    except cv2.error as e:
        logging.error(f"OpenCV error calculating brightness: {e}")
        return -1 # Indicate error


def calculate_contrast(image):
    """Calculates RMS contrast."""
    try:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        return gray.std()
    except cv2.error as e:
        logging.error(f"OpenCV error calculating contrast: {e}")
        return -1 # Indicate error

# --- Color Analysis Helpers ---
def get_box_roi(image, box_xyxy):
    """Safely extracts the Region of Interest for a bounding box."""
    h, w = image.shape[:2]
    x1, y1, x2, y2 = map(int, box_xyxy[:4])
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(w - 1, x2), min(h - 1, y2)
    if x1 >= x2 or y1 >= y2: return None # Invalid box
    return image[y1:y2, x1:x2]

def analyze_box_hsv(roi):
    """Calculates average Hue, Saturation, Value for a ROI."""
    if roi is None or roi.size == 0: return None
    try:
        hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
        # H ranges 0-179, S 0-255, V 0-255 in OpenCV
        avg_h = np.mean(hsv_roi[:, :, 0])
        avg_s = np.mean(hsv_roi[:, :, 1])
        avg_v = np.mean(hsv_roi[:, :, 2])
        return {'H': avg_h, 'S': avg_s, 'V': avg_v}
    except cv2.error as e:
        logging.error(f"OpenCV error analyzing HSV: {e}")
        return None


def plot_color_histogram(roi, save_path, title_prefix=""):
    """Calculates and saves the BGR color histogram for a ROI."""
    if roi is None or roi.size == 0:
        logging.warning(f"Cannot plot histogram for empty ROI. Save path: {save_path}")
        return
    try:
        plt.figure(figsize=(8, 5))
        plt.suptitle(f'{title_prefix} Color Histogram'.strip(), fontsize=12)
        colors = ('b', 'g', 'r')
        max_pixels = 0
        for i, color in enumerate(colors):
            hist = cv2.calcHist([roi], [i], None, [256], [0, 256])
            max_pixels = max(max_pixels, hist.max())
            plt.plot(hist, color=color, label=f'{color.upper()} channel')

        plt.title(f'Box Shape: {roi.shape[1]}x{roi.shape[0]}', fontsize=9, style='italic')
        plt.xlabel('Pixel Value (0-255)')
        plt.ylabel('Number of Pixels')
        plt.xlim([0, 256])
        plt.ylim([0, max_pixels * 1.1]) # Dynamic Y limit
        plt.legend()
        plt.grid(True, linestyle='--', alpha=0.6)
        plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout for suptitle
        plt.savefig(save_path)
        plt.close()
        logging.debug(f"Color histogram saved to {save_path}")
    except Exception as e:
        logging.error(f"Failed to plot/save histogram {save_path}: {e}", exc_info=True)
        plt.close() # Ensure plot is closed even on error


# --- Visualization Helper ---
def draw_analysis_results(image, gt_data, pred_data, class_names):
    """
    Draws GT and Prediction boxes with error type indication.

    Args:
        image: The input image (numpy array).
        gt_data: List of GT box dicts {'box_xyxy':..., 'class_id':..., 'matched_pred_idx':...}
        pred_data: List of Pred box dicts {'box_xyxy':..., 'class_id':..., 'score':..., 'matched_gt_idx':..., 'error_type':...}
        class_names: List of class names.
    """
    img_out = image.copy()
    # Draw GT boxes
    for i, gt in enumerate(gt_data):
        x1, y1, x2, y2 = map(int, gt['box_xyxy'])
        class_id = gt['class_id']
        if not (0 <= class_id < len(class_names)): continue # Skip invalid
        label = class_names[class_id]
        color = (0, 255, 0) # Green (default = FN)
        prefix = "GT:"
        if gt.get('matched_pred_idx') is not None:
              color = (0, 200, 0) # Darker green if matched
              prefix = "GT(M):"

        cv2.rectangle(img_out, (x1, y1), (x2, y2), color, 2)
        cv2.putText(img_out, f"{prefix} {label}", (x1, y1 - 10 if y1>10 else y1+15 ), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Draw Prediction boxes
    for i, pred in enumerate(pred_data):
        x1, y1, x2, y2 = map(int, pred['box_xyxy'])
        class_id = pred['class_id']
        score = pred['score']
        error_type = pred.get('error_type', 'Unknown') # Default if somehow missing

        if not (0 <= class_id < len(class_names)): continue # Skip invalid
        pred_cls_name = class_names[class_id]
        pred_label = f"P: {pred_cls_name} ({score:.2f})"

        color = (128, 128, 128) # Default grey for unknown
        if error_type == 'TP':
            color = (255, 0, 0) # Blue
        elif error_type == 'FP':
            color = (0, 0, 255) # Red
        elif error_type == 'Misclassification':
            color = (0, 165, 255) # Orange
            # Optionally add GT class if available
            matched_gt_idx = pred.get('matched_gt_idx')
            if matched_gt_idx is not None and 0 <= matched_gt_idx < len(gt_data):
                gt_cls_id = gt_data[matched_gt_idx]['class_id']
                if 0 <= gt_cls_id < len(class_names):
                    pred_label += f" (GT: {class_names[gt_cls_id]})"

        cv2.rectangle(img_out, (x1, y1), (x2, y2), color, 2)
        cv2.putText(img_out, pred_label, (x1, y2 + 15 if y2 < img_out.shape[0]-15 else y2-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    return img_out

print("Snippet 2 (Core Helpers) finished.")

Snippet 2 (Core Helpers) finished.


In [20]:
# --- Snippet 3: Main Analysis Loop & Data Collection ---

def run_analysis(model, image_files, label_dir, class_names, dirs, conf_thresh, iou_thresh, target_fp_class_name=None):
    """Runs the main error analysis loop."""
    logging.info(f"Starting main analysis loop for {len(image_files)} images...")
    num_classes = len(class_names)
    device = next(model.parameters()).device # Get device model is on

    # Data containers for TIDE
    tide_gt_data = {} # {img_id: [{'bbox': [x1,y1,x2,y2], 'class': class_id}]}
    tide_pred_data = [] # [{'bbox': [x1,y1,x2,y2], 'score': score, 'class': class_id, 'image_id': img_id}]

    # Data containers for detailed analysis
    all_results = [] # Store detailed info per image/box

    # Optional: Target class ID for color analysis
    target_fp_class_id = None
    if target_fp_class_name:
        try: target_fp_class_id = class_names.index(target_fp_class_name)
        except ValueError: logging.warning(f"Target FP class '{target_fp_class_name}' not found. Disabling color analysis.")

    for img_idx, img_path in enumerate(tqdm(image_files, desc="Analyzing Images")):
        image_id = img_path.stem # Use filename stem as image ID
        logging.debug(f"Processing Img {img_idx+1}/{len(image_files)}: {img_path.name}")

        try:
            # --- Load Image & GT ---
            img = cv2.imread(str(img_path))
            if img is None:
                logging.warning(f"Could not read image {img_path}. Skipping.")
                continue
            img_height, img_width = img.shape[:2]

            label_path = label_dir / (img_path.stem + ".txt")
            gt_boxes_yolo = load_ground_truth(label_path, num_classes) # [cls, xc, yc, w, h]
            gt_boxes_xyxy = yolo_to_xyxy(gt_boxes_yolo[:, 1:], img_width, img_height) # Get only coords

            # Prepare GT for TIDE and detailed analysis
            current_gt_list = []
            tide_gt_list = []
            for i, gt_yolo in enumerate(gt_boxes_yolo):
                class_id = int(gt_yolo[0])
                box_xyxy = gt_boxes_xyxy[i]
                gt_entry = {'box_xyxy': box_xyxy.tolist(), 'class_id': class_id, 'matched_pred_idx': None}
                current_gt_list.append(gt_entry)
                tide_gt_list.append({'bbox': box_xyxy.tolist(), 'class': class_id})
            tide_gt_data[image_id] = tide_gt_list


            # --- Image Level Analysis ---
            brightness = calculate_brightness(img)
            contrast = calculate_contrast(img)
            logging.debug(f"Image {image_id}: Brightness={brightness:.2f}, Contrast={contrast:.2f}")

            # --- Prediction ---
            results = model.predict(img_path, conf=conf_thresh, iou=iou_thresh, device=device, verbose=False)
            preds = results[0] # Results for this image

            pred_boxes_xyxy = preds.boxes.xyxy.cpu().numpy()
            pred_scores = preds.boxes.conf.cpu().numpy()
            pred_classes = preds.boxes.cls.cpu().numpy().astype(int)

            # Prepare Predictions for TIDE and detailed analysis
            current_pred_list = []
            for i in range(len(pred_boxes_xyxy)):
                 box_xyxy = pred_boxes_xyxy[i]
                 score = pred_scores[i]
                 class_id = pred_classes[i]
                 pred_entry = {
                     'box_xyxy': box_xyxy.tolist(),
                     'score': float(score),
                     'class_id': int(class_id),
                     'matched_gt_idx': None,
                     'error_type': None # Will be filled during matching
                 }
                 current_pred_list.append(pred_entry)
                 tide_pred_data.append({
                     'bbox': box_xyxy.tolist(),
                     'score': float(score),
                     'class': int(class_id),
                     'image_id': image_id
                 })

            # --- Matching GT and Predictions ---
            num_gt = len(current_gt_list)
            num_pred = len(current_pred_list)
            gt_matched_indices = set() # Indices of GT boxes that are matched
            pred_matched_gt_indices = [None] * num_pred # Index of matched GT for each pred

            if num_gt > 0 and num_pred > 0:
                # Prepare boxes for IoU calculation
                gt_xyxy_np = np.array([gt['box_xyxy'] for gt in current_gt_list])
                pred_xyxy_np = np.array([p['box_xyxy'] for p in current_pred_list])

                # Calculate IoU matrix (GT rows, Pred columns)
                iou_matrix = box_iou(torch.tensor(gt_xyxy_np), torch.tensor(pred_xyxy_np)).numpy()

                # Find potential matches above threshold
                gt_idx_match, pred_idx_match = np.where(iou_matrix >= iou_thresh)
                matches = np.array([gt_idx_match, pred_idx_match, iou_matrix[gt_idx_match, pred_idx_match]]).T

                # Greedy matching based on highest IoU first
                if len(matches) > 0:
                    matches = matches[matches[:, 2].argsort()[::-1]] # Sort by IoU desc
                    used_gt, used_pred = set(), set()
                    for match in matches:
                        gt_idx, pred_idx = int(match[0]), int(match[1])
                        if gt_idx not in used_gt and pred_idx not in used_pred:
                             # Record the match
                             pred_matched_gt_indices[pred_idx] = gt_idx
                             current_pred_list[pred_idx]['matched_gt_idx'] = gt_idx
                             current_gt_list[gt_idx]['matched_pred_idx'] = pred_idx # Link back GT -> Pred
                             gt_matched_indices.add(gt_idx)
                             # Mark as used
                             used_gt.add(gt_idx)
                             used_pred.add(pred_idx)

            # --- Categorize Errors ---
            image_has_errors = False
            # Process Predictions
            for i, pred in enumerate(current_pred_list):
                matched_gt_idx = pred['matched_gt_idx']
                if matched_gt_idx is not None: # Matched prediction
                    gt_class_id = current_gt_list[matched_gt_idx]['class_id']
                    if pred['class_id'] == gt_class_id:
                        pred['error_type'] = 'TP'
                    else:
                        pred['error_type'] = 'Misclassification'
                        image_has_errors = True # Misclassification is an error
                else: # Unmatched prediction
                    pred['error_type'] = 'FP'
                    image_has_errors = True # FP is an error

            # Check for False Negatives (unmatched GT)
            num_fn = 0
            for i, gt in enumerate(current_gt_list):
                 if gt['matched_pred_idx'] is None:
                      num_fn += 1
            if num_fn > 0: image_has_errors = True # FN is an error


            # --- Store Detailed Results ---
            all_results.append({
                'image_id': image_id,
                'image_path': str(img_path),
                'width': img_width,
                'height': img_height,
                'brightness': float(brightness) if brightness != -1 else None,
                'contrast': float(contrast) if contrast != -1 else None,
                'ground_truth': current_gt_list,
                'predictions': current_pred_list,
                'has_errors': image_has_errors,
                'fn_count': num_fn
            })


            # --- Save Error Image Visualization ---
            if image_has_errors:
                logging.debug(f"Saving error image for {image_id}")
                img_to_draw = cv2.imread(str(img_path)) # Re-read to ensure clean image
                drawn_img = draw_analysis_results(img_to_draw, current_gt_list, current_pred_list, class_names)

                # Determine primary error type for directory saving
                has_fn = num_fn > 0
                has_fp = any(p['error_type'] == 'FP' for p in current_pred_list)
                has_misc = any(p['error_type'] == 'Misclassification' for p in current_pred_list)

                if has_fn: save_dir = dirs["errors_fn"]
                elif has_fp: save_dir = dirs["errors_fp"]
                elif has_misc: save_dir = dirs["errors_misc"]
                else: save_dir = OUTPUT_DIR # Fallback, should not happen if image_has_errors is True

                error_flags = f"{'FN' if has_fn else ''}{'FP' if has_fp else ''}{'MISC' if has_misc else ''}"
                save_path = save_dir / f"err_{error_flags}_{image_id}.png"
                try:
                    cv2.imwrite(str(save_path), drawn_img)
                except Exception as write_e:
                    logging.error(f"Failed to write error image {save_path}: {write_e}")

            # --- Targeted FP Analysis (Color/HSV) ---
            if target_fp_class_id is not None:
                for i, pred in enumerate(current_pred_list):
                    if pred['error_type'] == 'FP' and pred['class_id'] == target_fp_class_id:
                        logging.debug(f"Analyzing FP Box {i} (Class {target_fp_class_name}) in {image_id}")
                        fp_roi = get_box_roi(img, pred['box_xyxy'])
                        if fp_roi is not None:
                            # Plot Histogram
                            hist_save_path = dirs["fp_color_analysis"] / f"hist_fp_{image_id}_pred{i}.png"
                            plot_color_histogram(fp_roi, hist_save_path, title_prefix=f"FP {target_fp_class_name}")

                            # Analyze HSV (results could be added to all_results if needed)
                            hsv_data = analyze_box_hsv(fp_roi)
                            if hsv_data:
                                logging.debug(f"  FP Box HSV: H={hsv_data['H']:.1f}, S={hsv_data['S']:.1f}, V={hsv_data['V']:.1f}")
                                # Store this info if needed later: pred['hsv_avg'] = hsv_data

        except Exception as e:
            logging.error(f"FATAL error processing image {img_path.name}: {e}", exc_info=True)
            # Optionally add to a list of failed images

    logging.info("Finished main analysis loop.")
    return all_results, tide_gt_data, tide_pred_data


# --- Placeholder for main execution logic ---
# This snippet only defines the function. The next snippet will call it.
print("Snippet 3 (Main Loop Definition) finished.")

Snippet 3 (Main Loop Definition) finished.


In [21]:
# --- Snippet 4: Execution, Summary, Slicing, Confidence, TIDE Analysis ---

def calculate_slice_metrics(results_slice):
    """Calculates metrics for a subset of results."""
    tp, fp, fn, misc = 0, 0, 0, 0
    total_gt_in_slice = 0

    for img_result in results_slice:
        total_gt_in_slice += len(img_result['ground_truth'])
        fn += img_result.get('fn_count', 0) # FN is per-image
        for pred in img_result['predictions']:
            if pred['error_type'] == 'TP': tp += 1
            elif pred['error_type'] == 'FP': fp += 1
            elif pred['error_type'] == 'Misclassification': misc += 1
            # Note: FNs are counted based on unmatched GT, not directly from pred list

    # Recalculate FN based on TP and Misc for consistency with overall GT count in slice
    # Total GT = TP + FN_missed + FN_misclassified_location(Misc)
    fn = total_gt_in_slice - tp - misc

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn + misc) if (tp + fn + misc) > 0 else 0 # Recall denominator is total GT objects
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return {'TP': tp, 'FP': fp, 'FN': fn, 'Misc': misc, 'Precision': precision, 'Recall': recall, 'F1': f1, 'Total GT': total_gt_in_slice, 'Num Images': len(results_slice)}

def plot_analysis_results(analysis_dict, title, xlabel, save_path):
    """Plots Precision, Recall, F1, and Support (Num Images) for analysis slices."""
    labels = list(analysis_dict.keys())
    precision = [v['Precision'] for v in analysis_dict.values()]
    recall = [v['Recall'] for v in analysis_dict.values()]
    f1 = [v['F1'] for v in analysis_dict.values()]
    support_gt = [v['Total GT'] for v in analysis_dict.values()]
    support_img = [v['Num Images'] for v in analysis_dict.values()]

    x = np.arange(len(labels))
    width = 0.2

    fig, ax1 = plt.subplots(figsize=(12, 6))
    fig.suptitle(title, fontsize=14)

    rects1 = ax1.bar(x - width, precision, width, label='Precision', color='skyblue')
    rects2 = ax1.bar(x, recall, width, label='Recall', color='lightcoral')
    rects3 = ax1.bar(x + width, f1, width, label='F1 Score', color='lightgreen')

    ax1.set_ylabel('Scores (P, R, F1)', color='black')
    ax1.set_xlabel(xlabel)
    ax1.set_xticks(x)
    ax1.set_xticklabels(labels, rotation=45, ha="right")
    ax1.tick_params(axis='y', labelcolor='black')
    ax1.legend(loc='upper left')
    ax1.grid(True, axis='y', linestyle='--', alpha=0.7)
    ax1.set_ylim(0, 1.05)

    # Add counts on a second y-axis
    ax2 = ax1.twinx()
    ax2.set_ylabel('Support (# GT Boxes / # Images)', color='dimgray')
    # Plot support bars slightly offset or use lines/points
    ax2.plot(x, support_gt, label='# GT Boxes', color='dimgray', linestyle='--', marker='o', markersize=5)
    ax2.plot(x, support_img, label='# Images', color='darkgray', linestyle=':', marker='x', markersize=5)
    ax2.tick_params(axis='y', labelcolor='dimgray')
    ax2.legend(loc='upper right')
    # ax2.set_ylim(0, max(max(support_gt), max(support_img)) * 1.2) # Adjust ylim for support

    fig.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout for suptitle
    plt.savefig(save_path)
    plt.close(fig)
    logging.info(f"Analysis plot saved to {save_path}")

# --- Main Execution Block ---
if __name__ == "__main__":
    logging.info("Script execution started (main guard).")
    print("DEBUG: Script execution started in __main__.")

    model_exists = MODEL_PATH.exists()
    yaml_exists = DATASET_YAML_PATH.exists()
    print(f"DEBUG: Checking Model Path: {MODEL_PATH} - Exists: {model_exists}")
    print(f"DEBUG: Checking YAML Path: {DATASET_YAML_PATH} - Exists: {yaml_exists}")

    if not model_exists:
        log_msg = f"CRITICAL: Model file not found: {MODEL_PATH}. Exiting."
        logging.error(log_msg); print(log_msg)
    elif not yaml_exists:
        log_msg = f"CRITICAL: Dataset YAML not found: {DATASET_YAML_PATH}. Exiting."
        logging.error(log_msg); print(log_msg)
    else:
        print("DEBUG: Pre-checks passed. Proceeding with analysis setup.")
        try:
            # --- Setup ---
            dirs = setup_output_dirs(OUTPUT_DIR)
            model = YOLO(MODEL_PATH) # Load model here before analysis starts
            image_files, label_dir, class_names = load_dataset_info(DATASET_YAML_PATH, SPLIT_TO_ANALYZE)

            if not image_files:
                 logging.error("No image files found. Aborting analysis.")
            else:
                # --- Run Core Analysis ---
                all_results, tide_gt_data, tide_pred_data = run_analysis(
                    model, image_files, label_dir, class_names, dirs,
                    CONF_THRESHOLD, IOU_THRESHOLD, TARGET_FP_ANALYSIS_CLASS_NAME
                )

                # --- Process Overall Results ---
                logging.info("Calculating overall summary statistics...")
                overall_stats = calculate_slice_metrics(all_results) # Use helper for overall stats too
                summary = {
                    "Total Images": len(all_results),
                    "Total GT Boxes": overall_stats['Total GT'],
                    "Total Predictions": sum(len(r['predictions']) for r in all_results),
                    "TP": overall_stats['TP'],
                    "FP": overall_stats['FP'],
                    "FN (Missed + Misclassified)": overall_stats['FN'] + overall_stats['Misc'], # Total GT not correctly identified
                    "FN (Missed Only)": overall_stats['FN'],
                    "Misclassified": overall_stats['Misc'],
                    "Precision": overall_stats['Precision'],
                    "Recall": overall_stats['Recall'],
                    "F1 Score": overall_stats['F1'],
                }
                print("\n" + "="*40)
                print("--- Overall Analysis Summary ---")
                print("="*40)
                for key, value in summary.items():
                    if isinstance(value, float): print(f"{key:<30}: {value:.4f}")
                    else: print(f"{key:<30}: {value}")
                print("="*40)
                # Save summary
                summary_path = OUTPUT_DIR / "overall_summary.json"
                with open(summary_path, 'w') as f: json.dump(summary, f, indent=4)
                logging.info(f"Overall summary saved to {summary_path}")

                # --- Perform Brightness Slicing Analysis ---
                logging.info("Performing analysis sliced by image brightness...")
                brightness_slices = {
                    f'Dark (<{BRIGHTNESS_DARK_THRESHOLD})': [r for r in all_results if r['brightness'] is not None and r['brightness'] < BRIGHTNESS_DARK_THRESHOLD],
                    f'Medium ({BRIGHTNESS_DARK_THRESHOLD}-{BRIGHTNESS_BRIGHT_THRESHOLD})': [r for r in all_results if r['brightness'] is not None and BRIGHTNESS_DARK_THRESHOLD <= r['brightness'] <= BRIGHTNESS_BRIGHT_THRESHOLD],
                    f'Bright (>{BRIGHTNESS_BRIGHT_THRESHOLD})': [r for r in all_results if r['brightness'] is not None and r['brightness'] > BRIGHTNESS_BRIGHT_THRESHOLD],
                    'Unknown/Failed': [r for r in all_results if r['brightness'] is None]
                }
                brightness_analysis = {}
                for name, results_slice in brightness_slices.items():
                    if results_slice: # Only analyze if slice has images
                        brightness_analysis[name] = calculate_slice_metrics(results_slice)
                        logging.info(f"  Slice '{name}': {brightness_analysis[name]['Num Images']} images, Metrics: P={brightness_analysis[name]['Precision']:.3f}, R={brightness_analysis[name]['Recall']:.3f}, F1={brightness_analysis[name]['F1']:.3f}")
                    else:
                         logging.info(f"  Slice '{name}': 0 images.")

                # Plot brightness results
                if brightness_analysis:
                    plot_path = dirs["slice_analysis"] / "brightness_slice_performance.png"
                    plot_analysis_results(brightness_analysis, "Performance across Image Brightness Slices", "Brightness Condition", plot_path)

                # --- Perform Confidence Bin Analysis ---
                logging.info("Performing analysis sliced by prediction confidence...")
                confidence_analysis = {}
                # Initialize bins
                for i in range(len(CONFIDENCE_BINS) - 1):
                     bin_label = f'{CONFIDENCE_BINS[i]:.1f}-{CONFIDENCE_BINS[i+1]:.1f}'
                     confidence_analysis[bin_label] = {'TP': 0, 'FP': 0, 'Misc': 0, 'count': 0}

                # Accumulate counts per bin
                for img_result in all_results:
                    for pred in img_result['predictions']:
                         score = pred['score']
                         error_type = pred['error_type']
                         # Find correct bin
                         for i in range(len(CONFIDENCE_BINS) - 1):
                             if CONFIDENCE_BINS[i] <= score < CONFIDENCE_BINS[i+1]:
                                 bin_label = f'{CONFIDENCE_BINS[i]:.1f}-{CONFIDENCE_BINS[i+1]:.1f}'
                                 confidence_analysis[bin_label]['count'] += 1
                                 if error_type == 'TP': confidence_analysis[bin_label]['TP'] += 1
                                 elif error_type == 'FP': confidence_analysis[bin_label]['FP'] += 1
                                 elif error_type == 'Misclassification': confidence_analysis[bin_label]['Misc'] += 1
                                 break
                         # Handle score == 1.0 edge case
                         if score == 1.0 and len(CONFIDENCE_BINS) > 1:
                              last_bin_label = f'{CONFIDENCE_BINS[-2]:.1f}-{CONFIDENCE_BINS[-1]:.1f}'
                              confidence_analysis[last_bin_label]['count'] += 1
                              if error_type == 'TP': confidence_analysis[last_bin_label]['TP'] += 1
                              elif error_type == 'FP': confidence_analysis[last_bin_label]['FP'] += 1
                              elif error_type == 'Misclassification': confidence_analysis[last_bin_label]['Misc'] += 1


                # Calculate precision per bin
                bin_labels_plot = []
                bin_precision_plot = []
                bin_support_plot = []
                print("\n--- Confidence Bin Analysis ---")
                print(f"{'Confidence Bin':<15} {'Total Preds':<12} {'TP Rate':<10} {'FP Rate':<10} {'Misc Rate':<10} {'Precision':<10}")
                print("-" * 70)
                for label, data in confidence_analysis.items():
                     count = data['count']
                     if count > 0:
                         tp_rate = data['TP'] / count
                         fp_rate = data['FP'] / count
                         misc_rate = data['Misc'] / count
                         precision = data['TP'] / (data['TP'] + data['FP'] + data['Misc']) if (data['TP'] + data['FP'] + data['Misc']) > 0 else 0
                         print(f"{label:<15} {count:<12} {tp_rate:<10.3f} {fp_rate:<10.3f} {misc_rate:<10.3f} {precision:<10.3f}")
                         bin_labels_plot.append(label)
                         bin_precision_plot.append(precision)
                         bin_support_plot.append(count)
                     else:
                          print(f"{label:<15} {count:<12} {'N/A':<10} {'N/A':<10} {'N/A':<10} {'N/A':<10}")

                # Plot confidence results
                if bin_labels_plot:
                     plt.figure(figsize=(10, 6))
                     plt.plot(bin_labels_plot, bin_precision_plot, marker='o', label='Precision per Bin')
                     plt.xlabel("Confidence Bin")
                     plt.ylabel("Precision")
                     plt.title("Prediction Precision across Confidence Bins")
                     plt.ylim(0, 1.05)
                     plt.grid(True, linestyle='--', alpha=0.7)
                     # Add count as text
                     for i, count in enumerate(bin_support_plot): plt.text(i, bin_precision_plot[i] + 0.02, f'n={count}', ha='center', va='bottom', fontsize=9)
                     plt.tight_layout()
                     plt.savefig(dirs["confidence_analysis"] / "confidence_bin_precision.png")
                     plt.close()
                     logging.info(f"Confidence analysis plot saved.")


                # --- Run TIDE Analysis ---
                logging.info("Preparing and running TIDE analysis...")
                # Convert GT data for TIDE datasets API
                tide_gt = datasets.DetectionResult('GroundTruth', image_ids=list(tide_gt_data.keys()))
                for img_id, annotations in tide_gt_data.items():
                    for ann in annotations:
                        tide_gt.add_annotation(img_id, ann['class'], ann['bbox'])

                # Convert Pred data for TIDE datasets API
                tide_preds = datasets.DetectionResult('Predictions', image_ids=list(tide_gt_data.keys())) # Use GT image IDs
                for pred_ann in tide_pred_data:
                     # Ensure required keys exist
                     if all(k in pred_ann for k in ['image_id', 'class', 'bbox', 'score']):
                           tide_preds.add_prediction(pred_ann['image_id'], pred_ann['class'], pred_ann['bbox'], pred_ann['score'])
                     else:
                           logging.warning(f"Skipping prediction due to missing keys: {pred_ann}")


                tide = TIDE()
                # Note: Adjust TIDE pos_threshold if different from default 0.5 needed
                # Note: background_threshold controls suppression, mode='bbox' for standard detection
                try:
                    tide.evaluate_range(tide_gt, tide_preds, mode=TIDE.BOX)
                    summary_str = tide.get_summary() # Returns string
                    print("\n" + "="*40)
                    print("--- TIDE Analysis Summary ---")
                    print("="*40)
                    print(summary_str)
                    print("="*40)

                    # Save TIDE summary plot and detailed errors
                    tide_plot_path = dirs["tide_results"] / "tide_summary_plot.png"
                    tide.plot(str(OUTPUT_DIR / dirs["tide_results"])) # Pass folder path to save plots
                    logging.info(f"TIDE summary plot potentially saved in {dirs['tide_results']}") # Plot function saves directly
                    # Check if plot actually exists (tide.plot might fail silently sometimes)
                    # if not tide_plot_path.exists():
                    #      logging.warning(f"TIDE plot file was not found at {tide_plot_path}. Plotting might have failed.")

                    # Save detailed TIDE errors to JSON
                    tide_errors = tide.get_all_errors()
                    tide_errors_path = dirs["tide_results"] / "tide_detailed_errors.json"
                    with open(tide_errors_path, 'w') as f: json.dump(tide_errors, f, indent=2)
                    logging.info(f"TIDE detailed errors saved to {tide_errors_path}")

                except Exception as tide_e:
                    logging.error(f"TIDE evaluation failed: {tide_e}", exc_info=True)


        except Exception as e:
             logging.error(f"An unexpected critical error occurred during analysis: {e}", exc_info=True)
             print(f"CRITICAL ERROR during analysis: {e}")

    logging.info("Script execution finished (main guard).")
    print("DEBUG: Script execution finished in __main__.")

DEBUG: Script execution started in __main__.
DEBUG: Checking Model Path: /kaggle/input/best-model-30/best.pt - Exists: True
DEBUG: Checking YAML Path: /kaggle/input/cardetection/car/data.yaml - Exists: True
DEBUG: Pre-checks passed. Proceeding with analysis setup.


Analyzing Images: 100%|██████████| 801/801 [02:01<00:00,  6.61it/s]



--- Overall Analysis Summary ---
Total Images                  : 801
Total GT Boxes                : 944
Total Predictions             : 948
TP                            : 828
FP                            : 87
FN (Missed + Misclassified)   : 116
FN (Missed Only)              : 83
Misclassified                 : 33
Precision                     : 0.9049
Recall                        : 0.8771
F1 Score                      : 0.8908

--- Confidence Bin Analysis ---
Confidence Bin  Total Preds  TP Rate    FP Rate    Misc Rate  Precision 
----------------------------------------------------------------------
0.0-0.3         33           0.333      0.485      0.182      0.333     
0.3-0.5         73           0.233      0.493      0.274      0.233     
0.5-0.7         65           0.600      0.369      0.031      0.600     
0.7-0.9         118          0.873      0.085      0.042      0.873     
0.9-1.0         659          0.998      0.002      0.000      0.998     
CRITICAL ERROR during 

In [22]:
# --- Snippet 4: Execution, Summary, Slicing, Confidence, TIDE Analysis ---

def calculate_slice_metrics(results_slice):
    """Calculates metrics for a subset of results."""
    tp, fp, fn, misc = 0, 0, 0, 0
    total_gt_in_slice = 0

    for img_result in results_slice:
        total_gt_in_slice += len(img_result['ground_truth'])
        # FN count from the image result is the count of GT boxes not matched
        fn += img_result.get('fn_count', 0)
        for pred in img_result['predictions']:
            if pred['error_type'] == 'TP': tp += 1
            elif pred['error_type'] == 'FP': fp += 1
            elif pred['error_type'] == 'Misclassification': misc += 1

    # Validate counts: TP + FN (missed GT) + Misc (wrong class for GT) should equal total GT
    # Note: The FN calculated here based on summing image FNs might differ slightly if
    # the definition used in overall stats needs refinement, but should be close.
    # Let's stick to the definition: Recall = TP / Total Actual Positives
    total_actual_positives = tp + fn + misc # All GT boxes fall into one of these categories relative to preds
    if total_actual_positives != total_gt_in_slice:
         logging.warning(f"Mismatch in GT counts! total_gt_in_slice={total_gt_in_slice}, tp+fn+misc={tp+fn+misc}")
         # Use total_gt_in_slice as the denominator for recall consistency
         total_actual_positives = total_gt_in_slice


    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    # Recall: How many of the actual objects did we find correctly?
    recall = tp / total_actual_positives if total_actual_positives > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return {'TP': tp, 'FP': fp, 'FN': fn, 'Misc': misc, 'Precision': precision, 'Recall': recall, 'F1': f1, 'Total GT': total_gt_in_slice, 'Num Images': len(results_slice)}

def plot_analysis_results(analysis_dict, title, xlabel, save_path):
    """Plots Precision, Recall, F1, and Support (Num Images) for analysis slices."""
    labels = list(analysis_dict.keys())
    precision = [v['Precision'] for v in analysis_dict.values()]
    recall = [v['Recall'] for v in analysis_dict.values()]
    f1 = [v['F1'] for v in analysis_dict.values()]
    support_gt = [v['Total GT'] for v in analysis_dict.values()]
    support_img = [v['Num Images'] for v in analysis_dict.values()]

    x = np.arange(len(labels))
    width = 0.2

    fig, ax1 = plt.subplots(figsize=(12, 6))
    fig.suptitle(title, fontsize=14)

    rects1 = ax1.bar(x - width, precision, width, label='Precision', color='skyblue')
    rects2 = ax1.bar(x, recall, width, label='Recall', color='lightcoral')
    rects3 = ax1.bar(x + width, f1, width, label='F1 Score', color='lightgreen')

    ax1.set_ylabel('Scores (P, R, F1)', color='black')
    ax1.set_xlabel(xlabel)
    ax1.set_xticks(x)
    ax1.set_xticklabels(labels, rotation=45, ha="right")
    ax1.tick_params(axis='y', labelcolor='black')
    ax1.legend(loc='upper left')
    ax1.grid(True, axis='y', linestyle='--', alpha=0.7)
    ax1.set_ylim(0, 1.05)

    # Add counts on a second y-axis
    ax2 = ax1.twinx()
    ax2.set_ylabel('Support (# GT Boxes / # Images)', color='dimgray')
    line1, = ax2.plot(x, support_gt, label='# GT Boxes', color='dimgray', linestyle='--', marker='o', markersize=5)
    line2, = ax2.plot(x, support_img, label='# Images', color='darkgray', linestyle=':', marker='x', markersize=5)
    ax2.tick_params(axis='y', labelcolor='dimgray')
    # Combine legends
    lines = [rects1[0], rects2[0], rects3[0], line1, line2]
    labels_combined = [l.get_label() for l in lines]
    ax1.legend(lines, labels_combined, loc='best') # Adjust legend location if needed
    # ax2.legend(loc='upper right') # Remove separate legend for ax2


    fig.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout for suptitle
    plt.savefig(save_path)
    plt.close(fig)
    logging.info(f"Analysis plot saved to {save_path}")

# --- Main Execution Block ---
if __name__ == "__main__":
    logging.info("Script execution started (main guard).")
    print("DEBUG: Script execution started in __main__.")

    model_exists = MODEL_PATH.exists()
    yaml_exists = DATASET_YAML_PATH.exists()
    print(f"DEBUG: Checking Model Path: {MODEL_PATH} - Exists: {model_exists}")
    print(f"DEBUG: Checking YAML Path: {DATASET_YAML_PATH} - Exists: {yaml_exists}")

    if not model_exists:
        log_msg = f"CRITICAL: Model file not found: {MODEL_PATH}. Exiting."
        logging.error(log_msg); print(log_msg)
    elif not yaml_exists:
        log_msg = f"CRITICAL: Dataset YAML not found: {DATASET_YAML_PATH}. Exiting."
        logging.error(log_msg); print(log_msg)
    else:
        print("DEBUG: Pre-checks passed. Proceeding with analysis setup.")
        try:
            # --- Setup ---
            dirs = setup_output_dirs(OUTPUT_DIR)
            print("DEBUG: Loading model...") # Add debug print
            model = YOLO(MODEL_PATH) # Load model here before analysis starts
            print("DEBUG: Loading dataset info...") # Add debug print
            image_files, label_dir, class_names = load_dataset_info(DATASET_YAML_PATH, SPLIT_TO_ANALYZE)

            if not image_files:
                 logging.error("No image files found. Aborting analysis.")
                 print("ERROR: No image files found. Aborting analysis.") # Add debug print
            else:
                # --- Run Core Analysis ---
                print(f"DEBUG: Calling run_analysis for {len(image_files)} images...") # Add debug print
                all_results, tide_gt_data, tide_pred_data = run_analysis(
                    model, image_files, label_dir, class_names, dirs,
                    CONF_THRESHOLD, IOU_THRESHOLD, TARGET_FP_ANALYSIS_CLASS_NAME
                )
                print(f"DEBUG: run_analysis finished. Found {len(all_results)} image results.") # Add debug print

                # --- Process Overall Results ---
                logging.info("Calculating overall summary statistics...")
                print("DEBUG: Calculating overall summary stats...") # Add debug print
                overall_stats = calculate_slice_metrics(all_results) # Use helper for overall stats too
                summary = {
                    "Total Images": len(all_results),
                    "Total GT Boxes": overall_stats['Total GT'],
                    "Total Predictions": sum(len(r['predictions']) for r in all_results),
                    "TP": overall_stats['TP'],
                    "FP": overall_stats['FP'],
                    "FN (Missed GT Only)": overall_stats['FN'],
                    "Misclassified": overall_stats['Misc'],
                    "Total GT Errors (FN + Misc)": overall_stats['FN'] + overall_stats['Misc'],
                    "Precision": overall_stats['Precision'],
                    "Recall": overall_stats['Recall'],
                    "F1 Score": overall_stats['F1'],
                }
                print("\n" + "="*40)
                print("--- Overall Analysis Summary ---")
                print("="*40)
                for key, value in summary.items():
                    if isinstance(value, float): print(f"{key:<30}: {value:.4f}")
                    else: print(f"{key:<30}: {value}")
                print("="*40)
                # Save summary
                summary_path = OUTPUT_DIR / "overall_summary.json"
                with open(summary_path, 'w') as f: json.dump(summary, f, indent=4)
                logging.info(f"Overall summary saved to {summary_path}")

                # --- Perform Brightness Slicing Analysis ---
                logging.info("Performing analysis sliced by image brightness...")
                print("DEBUG: Performing brightness slicing analysis...") # Add debug print
                brightness_slices = {
                    f'Dark (<{BRIGHTNESS_DARK_THRESHOLD})': [r for r in all_results if r['brightness'] is not None and r['brightness'] < BRIGHTNESS_DARK_THRESHOLD],
                    f'Medium ({BRIGHTNESS_DARK_THRESHOLD}-{BRIGHTNESS_BRIGHT_THRESHOLD})': [r for r in all_results if r['brightness'] is not None and BRIGHTNESS_DARK_THRESHOLD <= r['brightness'] <= BRIGHTNESS_BRIGHT_THRESHOLD],
                    f'Bright (>{BRIGHTNESS_BRIGHT_THRESHOLD})': [r for r in all_results if r['brightness'] is not None and r['brightness'] > BRIGHTNESS_BRIGHT_THRESHOLD],
                    'Unknown/Failed': [r for r in all_results if r['brightness'] is None]
                }
                brightness_analysis = {}
                for name, results_slice in brightness_slices.items():
                    if results_slice: # Only analyze if slice has images
                        brightness_analysis[name] = calculate_slice_metrics(results_slice)
                        logging.info(f"  Slice '{name}': {brightness_analysis[name]['Num Images']} images, Metrics: P={brightness_analysis[name]['Precision']:.3f}, R={brightness_analysis[name]['Recall']:.3f}, F1={brightness_analysis[name]['F1']:.3f}")
                    else:
                         logging.info(f"  Slice '{name}': 0 images.")

                # Plot brightness results
                if brightness_analysis:
                    plot_path = dirs["slice_analysis"] / "brightness_slice_performance.png"
                    plot_analysis_results(brightness_analysis, "Performance across Image Brightness Slices", "Brightness Condition", plot_path)
                    print(f"DEBUG: Brightness slice plot saved to {plot_path}") # Add debug print

                # --- Perform Confidence Bin Analysis ---
                logging.info("Performing analysis sliced by prediction confidence...")
                print("DEBUG: Performing confidence bin analysis...") # Add debug print
                confidence_analysis = {}
                # Initialize bins
                for i in range(len(CONFIDENCE_BINS) - 1):
                     bin_label = f'{CONFIDENCE_BINS[i]:.1f}-{CONFIDENCE_BINS[i+1]:.1f}'
                     confidence_analysis[bin_label] = {'TP': 0, 'FP': 0, 'Misc': 0, 'count': 0}

                # Accumulate counts per bin
                for img_result in all_results:
                    for pred in img_result['predictions']:
                         score = pred['score']
                         error_type = pred['error_type']
                         # Find correct bin
                         assigned_bin = False
                         for i in range(len(CONFIDENCE_BINS) - 1):
                             if CONFIDENCE_BINS[i] <= score < CONFIDENCE_BINS[i+1]:
                                 bin_label = f'{CONFIDENCE_BINS[i]:.1f}-{CONFIDENCE_BINS[i+1]:.1f}'
                                 confidence_analysis[bin_label]['count'] += 1
                                 if error_type == 'TP': confidence_analysis[bin_label]['TP'] += 1
                                 elif error_type == 'FP': confidence_analysis[bin_label]['FP'] += 1
                                 elif error_type == 'Misclassification': confidence_analysis[bin_label]['Misc'] += 1
                                 assigned_bin = True
                                 break
                         # Handle score == 1.0 edge case or scores slightly outside range due to float issues
                         if not assigned_bin and score >= CONFIDENCE_BINS[-1] - 1e-6: # Account for float precision near 1.0
                              last_bin_label = f'{CONFIDENCE_BINS[-2]:.1f}-{CONFIDENCE_BINS[-1]:.1f}'
                              confidence_analysis[last_bin_label]['count'] += 1
                              if error_type == 'TP': confidence_analysis[last_bin_label]['TP'] += 1
                              elif error_type == 'FP': confidence_analysis[last_bin_label]['FP'] += 1
                              elif error_type == 'Misclassification': confidence_analysis[last_bin_label]['Misc'] += 1


                # Calculate precision per bin
                bin_labels_plot = []
                bin_precision_plot = []
                bin_support_plot = []
                print("\n--- Confidence Bin Analysis ---")
                print(f"{'Confidence Bin':<15} {'Total Preds':<12} {'TP Rate':<10} {'FP Rate':<10} {'Misc Rate':<10} {'Precision':<10}")
                print("-" * 70)
                for label, data in confidence_analysis.items():
                     count = data['count']
                     if count > 0:
                         tp_rate = data['TP'] / count
                         fp_rate = data['FP'] / count
                         misc_rate = data['Misc'] / count
                         # Precision = TP / (TP + FP + Misc) for this bin
                         precision = data['TP'] / count if count > 0 else 0 # Alt: TP Rate is kinda like precision if TP+FP+Misc = count
                         precision_strict = data['TP'] / (data['TP'] + data['FP'] + data['Misc']) if (data['TP'] + data['FP'] + data['Misc']) > 0 else 0
                         print(f"{label:<15} {count:<12} {tp_rate:<10.3f} {fp_rate:<10.3f} {misc_rate:<10.3f} {precision_strict:<10.3f}")
                         bin_labels_plot.append(label)
                         bin_precision_plot.append(precision_strict) # Plot strict precision
                         bin_support_plot.append(count)
                     else:
                          print(f"{label:<15} {count:<12} {'N/A':<10} {'N/A':<10} {'N/A':<10} {'N/A':<10}")

                # Plot confidence results
                if bin_labels_plot:
                     conf_plot_path = dirs["confidence_analysis"] / "confidence_bin_precision.png"
                     plt.figure(figsize=(10, 6))
                     plt.plot(bin_labels_plot, bin_precision_plot, marker='o', label='Precision per Bin')
                     plt.xlabel("Confidence Bin")
                     plt.ylabel("Precision (TP / (TP+FP+Misc))")
                     plt.title("Prediction Precision across Confidence Bins")
                     plt.ylim(0, 1.05)
                     plt.grid(True, linestyle='--', alpha=0.7)
                     # Add count as text
                     for i, count in enumerate(bin_support_plot): plt.text(i, bin_precision_plot[i] + 0.02, f'n={count}', ha='center', va='bottom', fontsize=9)
                     plt.tight_layout()
                     plt.savefig(conf_plot_path)
                     plt.close()
                     logging.info(f"Confidence analysis plot saved to {conf_plot_path}")
                     print(f"DEBUG: Confidence plot saved to {conf_plot_path}") # Add debug print


                # --- Run TIDE Analysis ---
                logging.info("Preparing and running TIDE analysis...")
                print("DEBUG: Preparing TIDE data...") # Add debug print
                # Convert GT data for TIDE datasets API
                tide_gt = datasets.DetectionResult('GroundTruth', image_ids=list(tide_gt_data.keys()))
                for img_id, annotations in tide_gt_data.items():
                    for ann in annotations:
                        tide_gt.add_annotation(img_id, ann['class'], ann['bbox'])

                # Convert Pred data for TIDE datasets API
                tide_preds = datasets.DetectionResult('Predictions', image_ids=list(tide_gt_data.keys())) # Use GT image IDs
                preds_added_tide = 0
                for pred_ann in tide_pred_data:
                     # Ensure required keys exist
                     if all(k in pred_ann for k in ['image_id', 'class', 'bbox', 'score']):
                           tide_preds.add_prediction(pred_ann['image_id'], pred_ann['class'], pred_ann['bbox'], pred_ann['score'])
                           preds_added_tide += 1
                     else:
                           logging.warning(f"Skipping prediction for TIDE due to missing keys: {pred_ann}")
                print(f"DEBUG: Added {preds_added_tide} predictions to TIDE structure.") # Add debug print

                tide = TIDE()
                # Note: Adjust TIDE pos_threshold if different from default 0.5 needed
                # Note: background_threshold controls suppression, mode='bbox' for standard detection
                try:
                    print("DEBUG: Running TIDE evaluate_range...") # Add debug print
                    tide.evaluate_range(tide_gt, tide_preds, mode=TIDE.BOX)
                    print("DEBUG: TIDE evaluate_range finished.") # Add debug print
                    summary_str = tide.get_summary() # Returns string
                    print("\n" + "="*40)
                    print("--- TIDE Analysis Summary ---")
                    print("="*40)
                    print(summary_str)
                    print("="*40)

                    # Save TIDE summary plot and detailed errors
                    print("DEBUG: Plotting TIDE results...") # Add debug print
                    tide_plot_dir = str(dirs["tide_results"]) # TIDE plot needs a folder path
                    tide.plot(tide_plot_dir)
                    logging.info(f"TIDE summary plot saved in {tide_plot_dir}")
                    print(f"DEBUG: TIDE plot saved in {tide_plot_dir}") # Add debug print

                    tide_errors = tide.get_all_errors()
                    tide_errors_path = dirs["tide_results"] / "tide_detailed_errors.json"
                    with open(tide_errors_path, 'w') as f: json.dump(tide_errors, f, indent=2)
                    logging.info(f"TIDE detailed errors saved to {tide_errors_path}")
                    print(f"DEBUG: TIDE errors saved to {tide_errors_path}") # Add debug print

                except Exception as tide_e:
                    logging.error(f"TIDE evaluation failed: {tide_e}", exc_info=True)
                    print(f"ERROR: TIDE evaluation failed: {tide_e}") # Add debug print


        except Exception as e:
             logging.error(f"An unexpected critical error occurred during analysis setup or execution: {e}", exc_info=True)
             print(f"CRITICAL ERROR during analysis setup/execution: {e}") # Add debug print

    logging.info("Script execution finished (main guard).")
    print("DEBUG: Script execution finished in __main__.")

DEBUG: Script execution started in __main__.
DEBUG: Checking Model Path: /kaggle/input/best-model-30/best.pt - Exists: True
DEBUG: Checking YAML Path: /kaggle/input/cardetection/car/data.yaml - Exists: True
DEBUG: Pre-checks passed. Proceeding with analysis setup.
DEBUG: Loading model...
DEBUG: Loading dataset info...
DEBUG: Calling run_analysis for 801 images...


Analyzing Images: 100%|██████████| 801/801 [01:43<00:00,  7.77it/s]
  ax1.legend(lines, labels_combined, loc='best') # Adjust legend location if needed
  ax1.legend(lines, labels_combined, loc='best') # Adjust legend location if needed
  ax1.legend(lines, labels_combined, loc='best') # Adjust legend location if needed


DEBUG: run_analysis finished. Found 801 image results.
DEBUG: Calculating overall summary stats...

--- Overall Analysis Summary ---
Total Images                  : 801
Total GT Boxes                : 944
Total Predictions             : 948
TP                            : 828
FP                            : 87
FN (Missed GT Only)           : 83
Misclassified                 : 33
Total GT Errors (FN + Misc)   : 116
Precision                     : 0.9049
Recall                        : 0.8771
F1 Score                      : 0.8908
DEBUG: Performing brightness slicing analysis...
DEBUG: Brightness slice plot saved to /kaggle/working/robust_error_analysis/slice_analysis/brightness_slice_performance.png
DEBUG: Performing confidence bin analysis...

--- Confidence Bin Analysis ---
Confidence Bin  Total Preds  TP Rate    FP Rate    Misc Rate  Precision 
----------------------------------------------------------------------
0.0-0.3         33           0.333      0.485      0.182      0.333   

In [23]:
import os
import zipfile
from shutil import make_archive

# Define the directory to be zipped
working_dir = '/kaggle/working'  # This is the default working directory in Kaggle Notebooks

# Create a zip file from the working directory
zip_filename = '/kaggle/working/working_directory.zip'
make_archive(zip_filename.replace('.zip', ''), 'zip', working_dir)

# Now you can download the zip file via Kaggle's interface
print(f"Zip file created: {zip_filename}")


Zip file created: /kaggle/working/working_directory.zip
