In [None]:
import os
import cv2 as cv

def generate_train_frames_and_labels(vidPath, label_txt_path, save_frame_path, save_label_path):
    if not os.path.exists(label_txt_path):
        return

    os.makedirs(save_frame_path, exist_ok=True)
    os.makedirs(save_label_path, exist_ok=True)
                   
    fileName = vidPath.split('/')[-1]
    fileName = fileName.split('.')[0]
    
    cap = cv.VideoCapture(vidPath)
    with open(label_txt_path, 'r') as f:
            label_data = f.read()
    while cap.isOpened():
        frame_idx = cap.get(cv.CAP_PROP_POS_FRAMES)
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_path = os.path.join(save_frame_path, f"{fileName}_frame_{int(frame_idx):06d}.jpg")
        cv.imwrite(frame_path, frame)
                                                                                                                        
        label_path = os.path.join(save_label_path, f"{fileName}_frame_{int(frame_idx):06d}.txt")
        with open(label_path, 'w') as f:
            f.write(label_data)
            

generate_train_frames_and_labels(
    vidPath='./IMG_1156.mov',
    label_txt_path='./IMG_1156.txt',
    save_frame_path='data/train/images',
    save_label_path='data/train/labels'
)

KeyboardInterrupt: 

In [5]:
import numpy as np
from typing import Dict, Tuple
from pathlib import Path

def parse_yolo_label(label_line: str) -> Dict:
    """
    Parse a single YOLO pose label line
    
    Args:
        label_line: String like "0 0.538 0.468 0.780 0.643 0.148 0.639 2 ..."
    
    Returns:
        dict with:
            - class_id: int
            - bbox: np.array shape (4,) [x_center, y_center, width, height]
            - keypoints_xy: np.array shape (10, 2) [x, y coordinates]
            - visibility: np.array shape (10,) [visibility flags]
    """
    values = label_line.strip().split()
    values = [float(v) for v in values]
    
    class_id = int(values[0])
    bbox = np.array(values[1:5])
    
    # Keypoints: groups of 3 (x, y, visibility)
    kpt_data = np.array(values[5:]).reshape(-1, 3)
    keypoints_xy = kpt_data[:, :2]  # Just x, y
    visibility = kpt_data[:, 2]      # Visibility flags
    
    return {
        'class_id': class_id,
        'bbox': bbox,
        'keypoints_xy': keypoints_xy,
        'visibility': visibility
    }


def parse_yolo_label_file(label_path: str) -> Dict:
    """Parse label from a .txt file"""
    with open(label_path, 'r') as f:
        line = f.read().strip()
    return parse_yolo_label(line)


def get_yolo_predictions(model, image_path: str, normalize: bool = True) -> Dict:
    """
    Run YOLO inference and extract keypoints
    
    Args:
        model: YOLO model instance
        image_path: Path to image
        normalize: If True, normalize keypoints to [0, 1] range
    Returns:
        dict with:
            - keypoints_xy: np.array shape (10, 2)
            - confidence: float (detection confidence)
    """
    from ultralytics import YOLO
    
    results = model.predict(image_path)
    
    
    image = cv.imread(str(image_path))
    img_height, img_width = image.shape[:2]
    # print(f"Image size: width={img_width}, height={img_height}")
    
    # Check if any detections
    if len(results[0].keypoints.xy) == 0:
        # No detection - return zeros
        return {
            'keypoints_xy': np.zeros((10, 2)),
            'confidence': 0.0
        }
    
    # Get first detection (assuming single box in image)
    pred_keypoints = results[0].keypoints.xy[0].cpu().numpy()
    
    if normalize:
        # print("Normalizing predicted keypoints")
        # print(f"Predicted keypoints before normalization: {pred_keypoints}")
        pred_keypoints_normalized = pred_keypoints.copy()
        pred_keypoints_normalized[:, 0] = pred_keypoints_normalized[:, 0] / img_width   # Normalize x
        pred_keypoints_normalized[:, 1] = pred_keypoints_normalized[:, 1] / img_height  # Normalize y
        # print(f"Predicted keypoints after normalization: {pred_keypoints_normalized}")
    else:
        pred_keypoints_normalized = pred_keypoints
    
    # Get detection confidence if available
    confidence = float(results[0].boxes.conf[0]) if len(results[0].boxes.conf) > 0 else 1.0
    
    return {
        'keypoints_xy': pred_keypoints_normalized,
        'confidence': confidence
    }

def calculate_euclidean_distance(pred: np.ndarray, gt: np.ndarray) -> np.ndarray:
    """
    Calculate Euclidean distance between predicted and ground truth keypoints
    
    Args:
        pred: shape (K, 2) - K keypoints, (x, y) coordinates
        gt: shape (K, 2)
    
    Returns:
        distances: shape (K,) - distance for each keypoint
    """
    diff = pred - gt
    distances = np.sqrt(np.sum(diff ** 2, axis=1))
    return distances

def calculate_mse_single(pred: np.ndarray, gt: np.ndarray, 
                        visibility: np.ndarray = None) -> Dict:
    """
    Calculate MSE (Mean Squared Error) for a single image
    
    Args:
        pred: shape (K, 2) - predicted keypoints
        gt: shape (K, 2) - ground truth keypoints
        visibility: shape (K,) - visibility flags (optional)
    
    Returns:
        dict with:
            - mse_overall: float (average MSE across all keypoints)
            - mse_per_keypoint: np.array shape (K,)
    """
    squared_diff = (pred - gt) ** 2
    
    if visibility is not None:
        # Only compute MSE for visible keypoints (visibility == 2)
        visible_mask = (visibility == 2)
        
        if not np.any(visible_mask):
            # No visible keypoints
            return {
                'mse_overall': float('inf'),
                'mse_per_keypoint': np.full(len(pred), float('inf'))
            }
        
        # MSE per keypoint (x and y averaged)
        mse_per_kpt = np.mean(squared_diff, axis=1)
        mse_per_kpt = np.where(visible_mask, mse_per_kpt, np.nan)
        
        # Overall MSE (only visible)
        mse_overall = np.nanmean(mse_per_kpt)
    else:
        mse_per_kpt = np.mean(squared_diff, axis=1)
        mse_overall = np.mean(mse_per_kpt)
    
    return {
        'mse_overall': float(mse_overall),
        'mse_per_keypoint': mse_per_kpt
    }

def calculate_mae_single(pred: np.ndarray, gt: np.ndarray,
                        visibility: np.ndarray = None) -> Dict:
    """
    Calculate MAE (Mean Absolute Error) for a single image
    
    Args:
        pred: shape (K, 2)
        gt: shape (K, 2)
        visibility: shape (K,)
    
    Returns:
        dict with:
            - mae_overall: float
            - mae_per_keypoint: np.array shape (K,)
    """
    abs_diff = np.abs(pred - gt)
    
    if visibility is not None:
        visible_mask = (visibility == 2)
        
        if not np.any(visible_mask):
            return {
                'mae_overall': float('inf'),
                'mae_per_keypoint': np.full(len(pred), float('inf'))
            }
        
        mae_per_kpt = np.mean(abs_diff, axis=1)
        mae_per_kpt = np.where(visible_mask, mae_per_kpt, np.nan)
        mae_overall = np.nanmean(mae_per_kpt)
    else:
        mae_per_kpt = np.mean(abs_diff, axis=1)
        mae_overall = np.mean(mae_per_kpt)
    
    return {
        'mae_overall': float(mae_overall),
        'mae_per_keypoint': mae_per_kpt
    }
    
def calculate_pck_single(pred: np.ndarray, gt: np.ndarray, 
                        threshold: float = 0.05) -> Dict:
    """
    Calculate PCK (Percentage of Correct Keypoints) for a single image
    
    Args:
        pred: shape (K, 2)
        gt: shape (K, 2)
        threshold: distance threshold (normalized, e.g., 0.05 = 5% of image)
    
    Returns:
        dict with:
            - pck: float (percentage of correct keypoints, 0-1)
            - correct_per_keypoint: np.array shape (K,) - bool array
    
    Interpretation:
        PCK@0.05 = 0.95 means 95% of keypoints within 5% of image size    
    """
    distances = calculate_euclidean_distance(pred, gt)
    correct = distances < threshold
    pck = np.mean(correct)
    
    return {
        'pck': float(pck),
        'correct_per_keypoint': correct
    }

def evaluate_single_frame(pred_keypoints: np.ndarray, 
                               gt_keypoints: np.ndarray,
                               visibility: np.ndarray = None,
                               thresholds: list = [0.02, 0.05, 0.10]) -> Dict:
    """
    Calculate ALL metrics for a single frame
    
    Args:
        pred_keypoints: shape (K, 2) - predicted keypoints
        gt_keypoints: shape (K, 2) - ground truth keypoints
        visibility: shape (K,) - visibility flags
        thresholds: list of PCK thresholds to evaluate
    
    Returns:
        dict with all metrics
    """
    metrics = {}
    
    # MSE
    mse_result = calculate_mse_single(pred_keypoints, gt_keypoints, visibility)
    metrics['mse_overall'] = mse_result['mse_overall']
    metrics['mse_per_keypoint'] = mse_result['mse_per_keypoint']
    
    # MAE
    mae_result = calculate_mae_single(pred_keypoints, gt_keypoints, visibility)
    metrics['mae_overall'] = mae_result['mae_overall']
    metrics['mae_per_keypoint'] = mae_result['mae_per_keypoint']
    
    # PCK at multiple thresholds
    for thresh in thresholds:
        pck_result = calculate_pck_single(pred_keypoints, gt_keypoints, thresh)
        metrics[f'pck_{thresh}'] = pck_result['pck']
        metrics[f'pck_{thresh}_per_keypoint'] = pck_result['correct_per_keypoint']
    
    # Per-keypoint Euclidean distances (useful for analysis)
    metrics['distances'] = calculate_euclidean_distance(pred_keypoints, gt_keypoints)
    
    return metrics

In [6]:
from ultralytics import YOLO

print("SINGLE IMAGE EVALUATION EXAMPLE")

# 1. Load model
model_path = './/best.pt'
model = YOLO(model_path)
print(f"\nLoaded model: {model_path}")

# 2. Load image and label
image_path = 'data/train/images/IMG_1156_frame_000001.png'
label_path = 'data/train/labels/IMG_1156_frame_000001.txt'

print(f"Image: {image_path}")
print(f"Label: {label_path}")

# 3. Parse ground truth
gt_data = parse_yolo_label_file(label_path)

print("\nGround Truth Keypoints:")
print(gt_data)
gt_keypoints = gt_data['keypoints_xy']
visibility = gt_data['visibility']

# 4. Get predictions
pred_data = get_yolo_predictions(model, image_path, normalize=True)

print(f"pred_data: {pred_data}")
pred_keypoints = pred_data['keypoints_xy']
confidence = pred_data['confidence']

# 5. Calculate metrics
metrics = evaluate_single_frame(
    pred_keypoints, 
    gt_keypoints, 
    visibility,
    thresholds=[0.02, 0.05, 0.10]
)

# 6. Display results
print(f"\nOverall Metrics:")
print(f"  MSE: {metrics['mse_overall']:.6f}")
print(f"  MAE: {metrics['mae_overall']:.6f}")
print(f"  PCK@0.02: {metrics['pck_0.02']:.4f}")
print(f"  PCK@0.05: {metrics['pck_0.05']:.4f}")
print(f"  PCK@0.10: {metrics['pck_0.1']:.4f}")

print(f"\nPer-Keypoint Analysis:")
print(f"  {'Keypoint':<12} {'Distance':<12} {'MSE':<12} {'Correct@0.05'}")
print(f"  {'-'*50}")

keypoint_names = [
    'FrontTopL', 'FrontBotL', 'FrontTopM', 'FrontBotM',
    'FrontTopR', 'FrontBotR', 'BackDivTop', 'FrontDivTop',
    'BackTopL', 'BackTopR'
]

for i, name in enumerate(keypoint_names):
    dist = metrics['distances'][i]
    mse = metrics['mse_per_keypoint'][i]
    correct = 'Yes' if metrics['pck_0.05_per_keypoint'][i] else 'No'
    print(f"  {name:<12} {dist:<12.6f} {mse:<12.6f} {correct}")


SINGLE IMAGE EVALUATION EXAMPLE

Loaded model: .//best.pt
Image: data/train/images/IMG_1156_frame_000001.png
Label: data/train/labels/IMG_1156_frame_000001.txt

Ground Truth Keypoints:
{'class_id': 0, 'bbox': array([    0.48884,     0.43644,     0.90866,     0.67783]), 'keypoints_xy': array([[   0.034516,     0.57294],
       [   0.064146,     0.77364],
       [    0.49159,     0.55046],
       [    0.49159,     0.77469],
       [    0.94317,     0.57562],
       [    0.91748,     0.77536],
       [    0.49115,    0.097528],
       [    0.48891,     0.28708],
       [    0.17536,     0.28119],
       [    0.80877,      0.2842]]), 'visibility': array([          2,           2,           2,           2,           2,           2,           2,           2,           2,           2])}

image 1/1 d:\cs_projects\CMORE_Box_Keypoints\data\train\images\IMG_1156_frame_000001.png: 288x512 1 box, 13.4ms
Speed: 2.8ms preprocess, 13.4ms inference, 1.8ms postprocess per image at shape (1, 3, 288, 512)

In [25]:
import time
import psutil
import numpy as np
import os
import torch
from typing import Dict, List

def benchmark_resource_usage(model, test_images: List, num_warmup: int = 5, num_iterations: int = 100) -> Dict:
    """
    Benchmark specifically designed for CPU-bound, resource-constrained environments.
    Tracks End-to-End latency (Disk I/O + Inference) and Peak RAM usage.
    """
    
    
    process = psutil.Process()
    
    # Prepare image list
    benchmark_images = test_images[:min(num_iterations, len(test_images))]
    # Handle tuple unpacking if your list is [(path, label), ...]
    if len(benchmark_images) > 0 and isinstance(benchmark_images[0], tuple):
        benchmark_images = [img for img, _ in benchmark_images]

    print("\n" + "="*70)
    print("CPU RESOURCE BENCHMARK (End-to-End)")
    print("="*70)

    # --- BASELINE MEMORY ---
    # Force garbage collection before starting to get a clean baseline
    import gc
    gc.collect()
    baseline_memory_mb = process.memory_info().rss / (1024 * 1024)
    print(f"Baseline Process RAM (Model Loaded): {baseline_memory_mb:.2f} MB")

    # --- WARMUP ---
    # Essential to cache file system reads if OS caching is active
    print(f"\nWarming up ({num_warmup} iterations)...")
    for i in range(num_warmup):
        _ = model.predict(str(benchmark_images[i % len(benchmark_images)]), verbose=False)

    # --- BENCHMARK ---
    print(f"Benchmarking ({len(benchmark_images)} iterations)...")
    
    inference_times = []
    peak_ram_mb = baseline_memory_mb
    cpu_percentages = []

    for img_path in benchmark_images:
        # Track CPU usage during this specific interval
        process.cpu_percent(interval=None) # Reset counter
        
        # High precision CPU timer
        start_time = time.perf_counter()
        
        # Run End-to-End (Load -> Preprocess -> Infer -> Postprocess)
        _ = model.predict(str(img_path), verbose=False, device='cpu')
        
        end_time = time.perf_counter()
        
        # Measure metrics
        current_mem = process.memory_info().rss / (1024 * 1024)
        if current_mem > peak_ram_mb:
            peak_ram_mb = current_mem
            
        inference_times.append((end_time - start_time) * 1000) # ms
        
        # Note: cpu_percent needs a blocking interval to be accurate, 
        # but since prediction is blocking, we can just poll it immediately after
        # to see the average usage since the last call.
        cpu_percentages.append(process.cpu_percent(interval=None))

    # --- STATISTICS ---
    inference_times = np.array(inference_times)
    
    results = {
        # Timing
        'avg_latency_ms': float(np.mean(inference_times)),
        'p95_latency_ms': float(np.percentile(inference_times, 95)),
        'fps': float(1000.0 / np.mean(inference_times)),
        
        # Resource Constraints Metrics
        'baseline_ram_mb': float(baseline_memory_mb),
        'peak_ram_mb': float(peak_ram_mb),
        'avg_cpu_usage_percent': float(np.mean(cpu_percentages)),
        
        # Info
        'num_samples': len(benchmark_images)
    }

    # --- REPORT ---
    print("\n" + "-"*70)
    print("SUMMARY RESULTS")
    print("-"*-70)
    print(f"Avg Latency (E2E):  {results['avg_latency_ms']:.2f} ms")
    print(f"FPS:                {results['fps']:.2f}")
    print(f"CPU Usage (Avg):    {results['avg_cpu_usage_percent']:.1f}%")
    print("-"*-70)
    print(f"Baseline RAM:       {results['baseline_ram_mb']:.2f} MB")
    print(f"Peak RAM:           {results['peak_ram_mb']:.2f} MB")    
    return results

In [24]:
from ultralytics import YOLO
import json
from datetime import datetime
    
print("EVALUATE ALL TEST IMAGES")

def evaluate_dataset(model_path: str, test_images_dir: str, test_labels_dir: str,
                    save_results: bool = True, output_path: str = None, benchmark_resources: bool = True, 
                    num_benchmark_iterations: int = 100) -> Dict:
    """
    Evaluate model on entire test dataset
    
    Args:
        model_path: Path to YOLO model (.pt file)
        test_images_dir: Directory with test images
        test_labels_dir: Directory with test labels (.txt files)
        save_results: Whether to save results to JSON
        output_path: Where to save results (optional)
        benchmark_resources: Whether to run resource usage benchmark
        num_benchmark_iterations: Number of iterations for resource benchmark
    Returns:
        dict with aggregated metrics across all images
    """
    
    print(f"Loading model from: {model_path}")
    model = YOLO(model_path)
    
    test_images_dir = Path(test_images_dir)
    test_labels_dir = Path(test_labels_dir)
    
    test_images = []
    for img_file in test_images_dir.glob('*'):
        test_images.append(img_file)
    test_images = test_images[:50]
    print(f"Found {len(test_images)} test images.")
    
    images_with_labels = []
    for img_path in test_images:
        label_path = test_labels_dir / (img_path.stem + '.txt')
        if label_path.exists():
            images_with_labels.append((img_path, label_path))
        else:
            print(f"Warning: No label file for image {img_path.name}, skipping.")
            
    print(f"Evaluating {len(images_with_labels)} images with labels.")
    
    test_images = images_with_labels
    all_metrics = []
    all_distances = []
    all_mse_per_kpt = []
    all_pck_per_kpt = {
        'pck_0.02': [],
        'pck_0.05': [],
        'pck_0.1': []
    }
    
    failed_images = []
    
    for idx, (img_path, label_path) in enumerate(test_images):
        if (idx + 1) % 25 == 0:
            print(f"  Progress: {idx + 1}/{len(test_images)}")
                
        try:
            gt_data = parse_yolo_label_file(str(label_path))
            
            pred_data = get_yolo_predictions(model, str(img_path), normalize=True)
            
            metrics = evaluate_single_frame(
                pred_data['keypoints_xy'],
                gt_data['keypoints_xy'],
                gt_data['visibility'],
                thresholds=[0.02, 0.05, 0.10]
            )
            
            all_metrics.append({
                'image': img_path.name,
                'mse': metrics['mse_overall'],
                'mae': metrics['mae_overall'],
                'pck_0.02': metrics.get('pck_0.02', 0.0),
                'pck_0.05': metrics.get('pck_0.05', 0.0),
                'pck_0.1': metrics.get('pck_0.1', 0.0),
                'confidence': pred_data['confidence']
            })
            all_distances.append(metrics['distances'])
            all_mse_per_kpt.append(metrics['mse_per_keypoint'])
            
            all_pck_per_kpt['pck_0.02'].append(metrics['pck_0.02_per_keypoint'])
            all_pck_per_kpt['pck_0.05'].append(metrics['pck_0.05_per_keypoint'])
            all_pck_per_kpt['pck_0.1'].append(metrics['pck_0.1_per_keypoint'])
        
        except Exception as e:
            print(f"  WARNING: Failed on {img_path.name}: {e}")
            failed_images.append(str(img_path.name))
            
    print(f"Evaluated {len(all_metrics)} images.")
    
    if failed_images:
        print(f"Failed on {len(failed_images)} images: {failed_images}")
            
    # Aggregate overall metrics
    all_distances = np.array(all_distances)
    all_mse_per_kpt = np.array(all_mse_per_kpt)
    
    overall_metrics = {
        'mse_overall': float(np.mean([m['mse'] for m in all_metrics])),
        'mae_overall': float(np.mean([m['mae'] for m in all_metrics])),
        'pck_0.02': float(np.mean([m['pck_0.02'] for m in all_metrics])),
        'pck_0.05': float(np.mean([m['pck_0.05'] for m in all_metrics])),
        'pck_0.1': float(np.mean([m['pck_0.1'] for m in all_metrics])),
        'avg_distances_per_keypoint': np.mean(all_distances, axis=0).tolist(),
        'avg_mse_per_keypoint': np.mean(all_mse_per_kpt, axis=0).tolist(),
        'avg_pck_per_keypoint': {
            'pck_0.02': np.mean(all_pck_per_kpt['pck_0.02'], axis=0).tolist(),
            'pck_0.05': np.mean(all_pck_per_kpt['pck_0.05'], axis=0).tolist(),
            'pck_0.1': np.mean(all_pck_per_kpt['pck_0.1'], axis=0).tolist(),
        },
        'num_images': len(all_metrics),
        'num_failed': len(failed_images),
        'model_path': str(model_path),
        'test_images_dir': str(test_images_dir),
        'test_labels_dir': str(test_labels_dir),
        'timestamp': datetime.now().isoformat(),
    }
    
    print("\nOverall Dataset Metrics:")
    print(f"Total Images Evaluated: {overall_metrics['num_images']}")
    print(f"Failed Images: {overall_metrics['num_failed']}")
    print(f"  MSE: {overall_metrics['mse_overall']:.6f}")
    print(f"  MAE: {overall_metrics['mae_overall']:.6f}")
    print(f"  PCK@0.02: {overall_metrics['pck_0.02']:.4f}")
    print(f"  PCK@0.05: {overall_metrics['pck_0.05']:.4f}")
    print(f"  PCK@0.10: {overall_metrics['pck_0.1']:.4f}")
    
    print("\nAverage Per-Keypoint Metrics:")
    keypoint_names = [
        'FrontTopL', 'FrontBotL', 'FrontTopM', 'FrontBotM',
        'FrontTopR', 'FrontBotR', 'BackDivTop', 'FrontDivTop',
        'BackTopL', 'BackTopR'
    ]
    print(f"  {'Keypoint':<12} {'Avg Distance':<15} {'Avg MSE':<15} {'PCK@0.05'}")
    print(f"  {'-'*60}")
    for i, name in enumerate(keypoint_names):
        avg_dist = overall_metrics['avg_distances_per_keypoint'][i]
        avg_mse = overall_metrics['avg_mse_per_keypoint'][i]
        pck_05 = overall_metrics['avg_pck_per_keypoint']['pck_0.05'][i]
        print(f"  {name:<12} {avg_dist:<15.6f} {avg_mse:<15.6f} {pck_05:.4f}")
        
    if benchmark_resources:
        resource_metrics = benchmark_resource_usage(
            model, 
            test_images, 
            num_warmup=5,
            num_iterations=min(num_benchmark_iterations, len(test_images))
        )
        overall_metrics['resource_usage'] = resource_metrics
    # Save results to JSON
    if save_results:
        if output_path is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_path = f"evaluation_results_{timestamp}.json"
        
        with open(output_path, 'w') as f:
            json.dump(overall_metrics, f, indent=4)
        
        print(f"\nSaved evaluation results to: {output_path}")
        
    return overall_metrics

# Example usage:
evaluate_dataset(
    model_path='.//best.pt', # model is from https://github.com/AlundorZhu/CMORE/blob/24d8e02045004bb753b986f82fdf974e70d14637/playground/keypoints/runs/pose/train/weights/best.pt
    test_images_dir='data/train/images',
    test_labels_dir='data/train/labels',
    save_results=True
)

EVALUATE ALL TEST IMAGES
Loading model from: .//best.pt
Found 50 test images.
Evaluating 50 images with labels.

image 1/1 d:\cs_projects\CMORE_Box_Keypoints\data\train\images\IMG_1156_frame_000000.png: 288x512 1 box, 13.3ms
Speed: 0.8ms preprocess, 13.3ms inference, 1.8ms postprocess per image at shape (1, 3, 288, 512)

image 1/1 d:\cs_projects\CMORE_Box_Keypoints\data\train\images\IMG_1156_frame_000001.png: 288x512 1 box, 12.5ms
Speed: 0.9ms preprocess, 12.5ms inference, 1.5ms postprocess per image at shape (1, 3, 288, 512)

image 1/1 d:\cs_projects\CMORE_Box_Keypoints\data\train\images\IMG_1156_frame_000002.png: 288x512 1 box, 13.1ms
Speed: 0.8ms preprocess, 13.1ms inference, 1.6ms postprocess per image at shape (1, 3, 288, 512)

image 1/1 d:\cs_projects\CMORE_Box_Keypoints\data\train\images\IMG_1156_frame_000003.png: 288x512 1 box, 12.6ms
Speed: 0.8ms preprocess, 12.6ms inference, 1.6ms postprocess per image at shape (1, 3, 288, 512)

image 1/1 d:\cs_projects\CMORE_Box_Keypoints\da

{'mse_overall': 0.00016508917068535245,
 'mae_overall': 0.00939194630614042,
 'pck_0.02': 0.7020000000000002,
 'pck_0.05': 1.0,
 'pck_0.1': 1.0,
 'avg_distances_per_keypoint': [0.003339289216485081,
  0.010793506824754858,
  0.011895563066887868,
  0.007026570620736547,
  0.009676984471908788,
  0.02318293226973189,
  0.012837331289792664,
  0.022050583448786833,
  0.018228111477448093,
  0.036939205618616015],
 'avg_mse_per_keypoint': [5.616007876053586e-06,
  5.828013948180556e-05,
  7.081844355377459e-05,
  2.470319262406303e-05,
  4.734053116351326e-05,
  0.00026880501941945877,
  8.294876261324715e-05,
  0.00024333805431136417,
  0.00016667773906525632,
  0.0006823638167449878],
 'avg_pck_per_keypoint': {'pck_0.02': [1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   0.0,
   1.0,
   0.02,
   1.0,
   0.0],
  'pck_0.05': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
  'pck_0.1': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]},
 'num_images': 50,
 'num_failed': 0,
 'model_path': './/b