In [1]:
import torch
from PIL import Image
import time
import numpy as np
import os
import json
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from typing import List, Dict
from torchvision.ops import nms
# Configuration
CONFIDENCE_THRESHOLD = 0.5
IMG_SIZE = 640
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DATASET_PATH = r"C:\Users\SIU856526097\datasets\coco-pose-subset"
MAX_IMAGES = 32  # Only process first 32 images
BATCH_SIZES = [1, 4, 8]  # Batch sizes to evaluate

class COCODataset(Dataset):
    def __init__(self, root_path, split='val2017', max_images=MAX_IMAGES):
        """Initialize COCO dataset with limited images"""
        self.root_path = root_path
        self.image_dir = os.path.join(root_path, 'images', split)
        self.max_images = max_images
        
        # Check for annotation files
        self.annotation_file = os.path.join(root_path, 'annotations', f'person_keypoints_{split}.json')
        if not os.path.exists(self.annotation_file):
            self.annotation_file = os.path.join(root_path, 'annotations', f'instances_{split}.json')
            if not os.path.exists(self.annotation_file):
                raise FileNotFoundError(f"Neither person_keypoints_{split}.json nor instances_{split}.json found")
        
        print(f"Using annotation file: {self.annotation_file}")
        
        # Load annotations
        with open(self.annotation_file) as f:
            self.annotations = json.load(f)
        
        # Create image id to annotations mapping
        self.image_info = {img['id']: img for img in self.annotations['images']}
        self.annotations_per_image = {}
        
        for ann in self.annotations['annotations']:
            if ann['image_id'] not in self.annotations_per_image:
                self.annotations_per_image[ann['image_id']] = []
            self.annotations_per_image[ann['image_id']].append(ann)
        
        # Filter images with annotations and limit to max_images
        self.valid_image_ids = [img_id for img_id in self.image_info.keys() 
                              if img_id in self.annotations_per_image][:max_images]
        
        print(f"Selected {len(self.valid_image_ids)} images for evaluation")
        
        # Transform for input images
        self.transform = T.Compose([
            T.Resize((IMG_SIZE, IMG_SIZE)),
            T.ToTensor(),
        ])
    
    def __len__(self):
        return len(self.valid_image_ids)
    
    def __getitem__(self, idx):
        img_id = self.valid_image_ids[idx]
        img_info = self.image_info[img_id]
        img_path = os.path.join(self.image_dir, img_info['file_name'])
        
        # Load image
        try:
            img = Image.open(img_path).convert('RGB')
        except:
            print(f"Error loading image: {img_path}")
            return {
                'image': torch.zeros((3, IMG_SIZE, IMG_SIZE)),
                'original_image': Image.new('RGB', (IMG_SIZE, IMG_SIZE)),
                'boxes': torch.zeros((0, 4)),
                'labels': torch.zeros(0, dtype=torch.int64),
                'original_size': (IMG_SIZE, IMG_SIZE),
                'image_id': img_id,
                'image_path': img_path
            }
        
        original_size = img.size  # (width, height)
        
        # Get annotations for this image
        annotations = self.annotations_per_image[img_id]
        
        # Prepare ground truth boxes and labels
        boxes = []
        labels = []
        
        for ann in annotations:
            x, y, w, h = ann['bbox']
            boxes.append([x, y, x + w, y + h])
            labels.append(0)  # 0 is for person class in COCO
        
        boxes = torch.tensor(boxes, dtype=torch.float32) if boxes else torch.zeros((0, 4))
        labels = torch.tensor(labels, dtype=torch.int64) if labels else torch.zeros(0, dtype=torch.int64)
        
        # Apply transformations
        img_tensor = self.transform(img)
        
        return {
            'image': img_tensor,
            'original_image': img,
            'boxes': boxes,
            'labels': labels,
            'original_size': original_size,
            'image_id': img_id,
            'image_path': img_path
        }

from torchvision.ops import nms

class YOLOv5Evaluator:
    def __init__(self):
        """Initialize YOLOv5 model with evaluation capabilities"""
        print(f"PyTorch version: {torch.__version__}")
        print(f"Using device: {DEVICE}")
        
        import warnings
        warnings.filterwarnings("ignore", category=FutureWarning)
        
        # Load model
        self.model = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained=True, autoshape=True, force_reload=True)
        self.model.to(DEVICE)
        self.model.eval()
        
        # Initialize metrics
        self.metric = MeanAveragePrecision(
            box_format='xyxy',
            iou_type='bbox',
            iou_thresholds=[0.5],
            rec_thresholds=[0.0, 0.1, 0.3, 0.5, 0.7, 0.9],
            max_detection_thresholds=[1, 10, 100],
            class_metrics=True
        )

    def evaluate_batch(self, batch: Dict) -> List[Dict]:
        """Evaluate batch of images with raw tensor handling"""
        images = batch['image'].to(DEVICE)
        original_sizes = batch['original_size']
        image_paths = batch['image_path']
        batch_size = images.shape[0]
        
        # Time inference
        start_time = time.time()
        with torch.no_grad():
            results = self.model(images, size=IMG_SIZE)
        inference_time = time.time() - start_time
        
        # Debug
        print(f"Results type: {type(results)}")
        if isinstance(results, torch.Tensor):
            print(f"Raw results shape: {results.shape}")
        else:
            print(f"Results attributes: {dir(results)}")
        
        # Ensure results is a tensor
        if not isinstance(results, torch.Tensor):
            raise ValueError(f"Expected torch.Tensor, got {type(results)}")
        if results.dim() != 3 or results.shape[0] != batch_size:
            raise ValueError(f"Unexpected results shape: {results.shape}. Expected [batch_size, num_detections, 85]")
        
        batch_results = []
        for i in range(batch_size):
            pred = results[i]  # [num_detections, 85]
            print(f"Image {i} predictions shape: {pred.shape}")
            
            # Extract components
            if pred.shape[0] == 0:
                boxes = np.zeros((0, 4))
                scores = np.zeros(0)
                labels = np.zeros(0, dtype=np.int64)
            else:
                # Convert to xyxy format
                x_center = pred[:, 0]
                y_center = pred[:, 1]
                w = pred[:, 2]
                h = pred[:, 3]
                conf = pred[:, 4]
                class_scores = pred[:, 5:]
                
                x1 = x_center - w / 2
                y1 = y_center - h / 2
                x2 = x_center + w / 2
                y2 = y_center + h / 2
                boxes = torch.stack([x1, y1, x2, y2], dim=1)  # [N, 4]
                scores = conf  # [N]
                labels = torch.argmax(class_scores, dim=1)  # [N]
                
                # Apply confidence filter first
                mask = scores >= CONFIDENCE_THRESHOLD
                boxes = boxes[mask]
                scores = scores[mask]
                labels = labels[mask]
                print(f"After confidence filter: {boxes.shape[0]} detections")
                
                # Apply NMS
                if boxes.shape[0] > 0:
                    keep = nms(boxes, scores, iou_threshold=0.45)  # IoU threshold for NMS
                    boxes = boxes[keep].cpu().numpy()
                    scores = scores[keep].cpu().numpy()
                    labels = labels[keep].cpu().numpy()
                    print(f"After NMS: {boxes.shape[0]} detections")
                else:
                    boxes = np.zeros((0, 4))
                    scores = np.zeros(0)
                    labels = np.zeros(0, dtype=np.int64)
            
            # Scale boxes back to original image size
            orig_w, orig_h = original_sizes[i]
            scale_x = orig_w / IMG_SIZE
            scale_y = orig_h / IMG_SIZE
            
            if len(boxes) > 0:
                boxes[:, 0] *= scale_x  # x1
                boxes[:, 1] *= scale_y  # y1
                boxes[:, 2] *= scale_x  # x2
                boxes[:, 3] *= scale_y  # y2
            
            # Convert to tensors for metrics
            boxes_tensor = torch.tensor(boxes, dtype=torch.float32)
            scores_tensor = torch.tensor(scores, dtype=torch.float32)
            labels_tensor = torch.tensor(labels, dtype=torch.int64)
            
            # Ensure correct shape
            if boxes_tensor.dim() == 1 and len(boxes_tensor) > 0:
                boxes_tensor = boxes_tensor.unsqueeze(0)
            if scores_tensor.dim() == 0 and len(scores_tensor) > 0:
                scores_tensor = scores_tensor.unsqueeze(0)
            if labels_tensor.dim() == 0 and len(labels_tensor) > 0:
                labels_tensor = labels_tensor.unsqueeze(0)
            
            # Prepare predictions for metrics
            pred_metrics = [{
                'boxes': boxes_tensor,
                'scores': scores_tensor,
                'labels': labels_tensor
            }]
            
            # Prepare targets
            target_boxes = batch['boxes'][i].cpu().float()
            target_labels = batch['labels'][i].cpu().long()
            
            if target_boxes.dim() == 1 and len(target_boxes) > 0:
                target_boxes = target_boxes.unsqueeze(0)
            if target_labels.dim() == 0 and len(target_labels) > 0:
                target_labels = target_labels.unsqueeze(0)
            
            targets = [{
                'boxes': target_boxes,
                'labels': target_labels
            }]
            
            # Update metrics
            try:
                self.metric.update(pred_metrics, targets)
            except Exception as e:
                print(f"Error updating metrics for image {image_paths[i]}: {e}")
                print(f"Prediction boxes shape: {boxes_tensor.shape}")
                print(f"Target boxes shape: {target_boxes.shape}")
                print(f"Sample prediction boxes: {boxes_tensor[:2]}")
                print(f"Sample target boxes: {target_boxes[:2]}")
                continue
            
            # Store results
            batch_results.append({
                'image_path': image_paths[i],
                'boxes': boxes,
                'scores': scores,
                'labels': [self.model.names[int(x)] for x in labels] if len(labels) > 0 else [],
                'time': inference_time / batch_size  # Average time per image
            })
        
        return batch_results
if __name__ == "__main__":
    try:
        dataset = COCODataset(DATASET_PATH)
    except Exception as e:
        print(f"Error loading dataset: {str(e)}")
        print("Please verify the dataset path and files exist")
        raise
    
    # Lists to store metrics for plotting
    map_50_values = []
    inference_times = []
    batch_sizes = BATCH_SIZES

    for batch_size in BATCH_SIZES:
        print(f"\n{'='*50}")
        print(f"Evaluating with batch size: {batch_size}")
        print(f"Processing {len(dataset)} images")
        print(f"{'='*50}")
        
        data_loader = DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=0,
            collate_fn=lambda x: x
        )
        
        evaluator = YOLOv5Evaluator()
        total_images = 0
        total_time = 0
        results = []
        
        for batch_idx, batch in enumerate(data_loader):
            batch_dict = {
                'image': torch.stack([item['image'] for item in batch]),
                'original_image': [item['original_image'] for item in batch],
                'boxes': [item['boxes'] for item in batch],
                'labels': [item['labels'] for item in batch],
                'original_size': [item['original_size'] for item in batch],
                'image_id': [item['image_id'] for item in batch],
                'image_path': [item['image_path'] for item in batch]
            }
            
            batch_results = evaluator.evaluate_batch(batch_dict)
            results.extend(batch_results)
            total_images += len(batch_results)
            total_time += sum(r['time'] for r in batch_results)
            
            print(f"Processed batch {batch_idx+1} with {len(batch)} images")
        
        metrics = evaluator.metric.compute()
        
        print("\nEvaluation Metrics:")
        print(f"mAP@0.5: {metrics['map_50'].item():.3f}")
        print(f"mAP@0.5-0.95: {metrics['map'].item():.3f}")
        print(f"Recall@100: {metrics['mar_100'].item():.3f}")
        
        print(f"\nTotal Images Processed: {total_images}")
        print(f"Total Inference Time: {total_time:.4f}s")
        print(f"Average Inference Time: {total_time/total_images:.4f}s per image")
        
        # Store metrics for plotting
        map_50_values.append(metrics['map_50'].item())
        inference_times.append(total_time / total_images)
        
        # Save visualizations
        os.makedirs(f"visualizations_batch_{batch_size}", exist_ok=True)
        
        for idx, result in enumerate(results):
            try:
                img = Image.open(result['image_path'])
                fig, ax = plt.subplots(1, figsize=(12, 8))
                ax.imshow(img)
                
                for box, label, score in zip(result['boxes'], result['labels'], result['scores']):
                    x1, y1, x2, y2 = box
                    rect = patches.Rectangle(
                        (x1, y1), x2-x1, y2-y1,
                        linewidth=2, edgecolor='red', facecolor='none'
                    )
                    ax.add_patch(rect)
                    ax.text(
                        x1, y1-10, 
                        f"{label} {score:.2f}",
                        color='white', fontsize=10,
                        bbox=dict(facecolor='red', alpha=0.8, pad=2)
                    )
                
                plt.axis('off')
                save_path = os.path.join(f"visualizations_batch_{batch_size}", f"result_{idx}.png")
                plt.savefig(save_path, bbox_inches='tight', pad_inches=0.1, dpi=300)
                plt.close()
            except Exception as e:
                print(f"Could not visualize {result['image_path']}: {str(e)}")
        
        print(f"Saved visualizations to visualizations_batch_{batch_size} folder")
    
    # Plot mAP vs Inference Time
    fig, ax1 = plt.subplots(figsize=(10, 6))

    # Plot mAP on left y-axis
    ax1.plot(batch_sizes, map_50_values, 'b-', marker='o', label='mAP@0.5')
    ax1.set_xlabel('Batch Size')
    ax1.set_ylabel('mAP@0.5', color='b')
    ax1.tick_params(axis='y', labelcolor='b')
    ax1.grid(True, linestyle='--', alpha=0.7)

    # Create right y-axis for inference time
    ax2 = ax1.twinx()
    ax2.plot(batch_sizes, inference_times, 'r-', marker='s', label='Inference Time (s)')
    ax2.set_ylabel('Inference Time (s)', color='r')
    ax2.tick_params(axis='y', labelcolor='r')

    # Title and layout
    plt.title('mAP@0.5 vs Inference Time for Different Batch Sizes')
    fig.tight_layout()

    # Combine legends
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc='center right')

    # Save and show plot
    plt.savefig('map_vs_inference_time.png', dpi=300)
    plt.show()

  from .autonotebook import tqdm as notebook_tqdm


Using annotation file: C:\Users\SIU856526097\datasets\coco-pose-subset\annotations\instances_val2017.json
Selected 32 images for evaluation

Evaluating with batch size: 1
Processing 32 images
PyTorch version: 1.13.1+cpu
Using device: cpu


Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\SIU856526097/.cache\torch\hub\master.zip
YOLOv5  2025-4-13 Python-3.9.21 torch-1.13.1+cpu CPU

Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 79 detections
After NMS: 8 detections
Processed batch 1 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 56 detections
After NMS: 5 detections
Processed batch 2 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 108 detections
After NMS: 9 detections
Processed batch 3 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 73 detections
After NMS: 6 detections
Processed batch 4 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])


Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\SIU856526097/.cache\torch\hub\master.zip
YOLOv5  2025-4-13 Python-3.9.21 torch-1.13.1+cpu CPU

Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([4, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 79 detections
After NMS: 8 detections
Image 1 predictions shape: torch.Size([25200, 85])
After confidence filter: 56 detections
After NMS: 5 detections
Image 2 predictions shape: torch.Size([25200, 85])
After confidence filter: 108 detections
After NMS: 9 detections
Image 3 predictions shape: torch.Size([25200, 85])
After confidence filter: 73 detections
After NMS: 6 detections
Processed batch 1 with 4 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([4, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 93 detections
After NMS: 9 detections
Image 1 predictions shape: torch.Size([25200, 85])
After confidence filter: 34 detections
After NMS: 3 detections
Image 2 predictions shape: torch.Size([25200, 85])
After confidence filter: 38 detections
After NMS: 4 detections
Ima

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\SIU856526097/.cache\torch\hub\master.zip
YOLOv5  2025-4-13 Python-3.9.21 torch-1.13.1+cpu CPU

Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([8, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 79 detections
After NMS: 8 detections
Image 1 predictions shape: torch.Size([25200, 85])
After confidence filter: 56 detections
After NMS: 5 detections
Image 2 predictions shape: torch.Size([25200, 85])
After confidence filter: 108 detections
After NMS: 9 detections
Image 3 predictions shape: torch.Size([25200, 85])
After confidence filter: 73 detections
After NMS: 6 detections
Image 4 predictions shape: torch.Size([25200, 85])
After confidence filter: 93 detections
After NMS: 9 detections
Image 5 predictions shape: torch.Size([25200, 85])
After confidence filter: 34 detections
After NMS: 3 detections
Image 6 predictions shape: torch.Size([25200, 85])
After confidence filter: 38 detections
After NMS: 4 detections
Image 7 predictions shape: torch.Size([25200, 85])
After confidence filter: 26 detections
After NMS: 5 detections
Proc

In [2]:
pip install pycocotools

Collecting pycocotools
  Using cached pycocotools-2.0.8-cp39-cp39-win_amd64.whl.metadata (1.1 kB)
Using cached pycocotools-2.0.8-cp39-cp39-win_amd64.whl (85 kB)
Installing collected packages: pycocotools
Successfully installed pycocotools-2.0.8
Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
from PIL import Image
import time
import numpy as np
import os
import json
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from typing import List, Dict
from torchvision.ops import nms
# Configuration
CONFIDENCE_THRESHOLD = 0.5
IMG_SIZE = 640
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DATASET_PATH = r"C:\Users\SIU856526097\datasets\coco-pose-subset"
MAX_IMAGES = 32  # Only process first 32 images
BATCH_SIZES = [1, 4, 8]  # Batch sizes to evaluate

class COCODataset(Dataset):
    def __init__(self, root_path, split='val2017', max_images=MAX_IMAGES):
        """Initialize COCO dataset with limited images"""
        self.root_path = root_path
        self.image_dir = os.path.join(root_path, 'images', split)
        self.max_images = max_images
        
        # Check for annotation files
        self.annotation_file = os.path.join(root_path, 'annotations', f'person_keypoints_{split}.json')
        if not os.path.exists(self.annotation_file):
            self.annotation_file = os.path.join(root_path, 'annotations', f'instances_{split}.json')
            if not os.path.exists(self.annotation_file):
                raise FileNotFoundError(f"Neither person_keypoints_{split}.json nor instances_{split}.json found")
        
        print(f"Using annotation file: {self.annotation_file}")
        
        # Load annotations
        with open(self.annotation_file) as f:
            self.annotations = json.load(f)
        
        # Create image id to annotations mapping
        self.image_info = {img['id']: img for img in self.annotations['images']}
        self.annotations_per_image = {}
        
        for ann in self.annotations['annotations']:
            if ann['image_id'] not in self.annotations_per_image:
                self.annotations_per_image[ann['image_id']] = []
            self.annotations_per_image[ann['image_id']].append(ann)
        
        # Filter images with annotations and limit to max_images
        self.valid_image_ids = [img_id for img_id in self.image_info.keys() 
                              if img_id in self.annotations_per_image][:max_images]
        
        print(f"Selected {len(self.valid_image_ids)} images for evaluation")
        
        # Transform for input images
        self.transform = T.Compose([
            T.Resize((IMG_SIZE, IMG_SIZE)),
            T.ToTensor(),
        ])
    
    def __len__(self):
        return len(self.valid_image_ids)
    
    def __getitem__(self, idx):
        img_id = self.valid_image_ids[idx]
        img_info = self.image_info[img_id]
        img_path = os.path.join(self.image_dir, img_info['file_name'])
        
        # Load image
        try:
            img = Image.open(img_path).convert('RGB')
        except:
            print(f"Error loading image: {img_path}")
            return {
                'image': torch.zeros((3, IMG_SIZE, IMG_SIZE)),
                'original_image': Image.new('RGB', (IMG_SIZE, IMG_SIZE)),
                'boxes': torch.zeros((0, 4)),
                'labels': torch.zeros(0, dtype=torch.int64),
                'original_size': (IMG_SIZE, IMG_SIZE),
                'image_id': img_id,
                'image_path': img_path
            }
        
        original_size = img.size  # (width, height)
        
        # Get annotations for this image
        annotations = self.annotations_per_image[img_id]
        
        # Prepare ground truth boxes and labels
        boxes = []
        labels = []
        
        for ann in annotations:
            x, y, w, h = ann['bbox']
            boxes.append([x, y, x + w, y + h])
            labels.append(0)  # 0 is for person class in COCO
        
        boxes = torch.tensor(boxes, dtype=torch.float32) if boxes else torch.zeros((0, 4))
        labels = torch.tensor(labels, dtype=torch.int64) if labels else torch.zeros(0, dtype=torch.int64)
        
        # Apply transformations
        img_tensor = self.transform(img)
        
        return {
            'image': img_tensor,
            'original_image': img,
            'boxes': boxes,
            'labels': labels,
            'original_size': original_size,
            'image_id': img_id,
            'image_path': img_path
        }

from torchvision.ops import nms

class YOLOv5Evaluator:
    def __init__(self):
        """Initialize YOLOv5 model with evaluation capabilities"""
        print(f"PyTorch version: {torch.__version__}")
        print(f"Using device: {DEVICE}")
        
        import warnings
        warnings.filterwarnings("ignore", category=FutureWarning)
        
        # Load model
        self.model = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained=True, autoshape=True, force_reload=True)
        self.model.to(DEVICE)
        self.model.eval()
        
        # Initialize metrics
        self.metric = MeanAveragePrecision(
            box_format='xyxy',
            iou_type='bbox',
            iou_thresholds=[0.5],
            rec_thresholds=[0.0, 0.1, 0.3, 0.5, 0.7, 0.9],
            max_detection_thresholds=[1, 10, 100],
            class_metrics=True
        )

    def evaluate_batch(self, batch: Dict) -> List[Dict]:
        """Evaluate batch of images with raw tensor handling"""
        images = batch['image'].to(DEVICE)
        original_sizes = batch['original_size']
        image_paths = batch['image_path']
        batch_size = images.shape[0]
        
        # Time inference
        start_time = time.time()
        with torch.no_grad():
            results = self.model(images, size=IMG_SIZE)
        inference_time = time.time() - start_time
        
        # Debug
        print(f"Results type: {type(results)}")
        if isinstance(results, torch.Tensor):
            print(f"Raw results shape: {results.shape}")
        else:
            print(f"Results attributes: {dir(results)}")
        
        # Ensure results is a tensor
        if not isinstance(results, torch.Tensor):
            raise ValueError(f"Expected torch.Tensor, got {type(results)}")
        if results.dim() != 3 or results.shape[0] != batch_size:
            raise ValueError(f"Unexpected results shape: {results.shape}. Expected [batch_size, num_detections, 85]")
        
        batch_results = []
        for i in range(batch_size):
            pred = results[i]  # [num_detections, 85]
            print(f"Image {i} predictions shape: {pred.shape}")
            
            # Extract components
            if pred.shape[0] == 0:
                boxes = np.zeros((0, 4))
                scores = np.zeros(0)
                labels = np.zeros(0, dtype=np.int64)
            else:
                # Convert to xyxy format
                x_center = pred[:, 0]
                y_center = pred[:, 1]
                w = pred[:, 2]
                h = pred[:, 3]
                conf = pred[:, 4]
                class_scores = pred[:, 5:]
                
                x1 = x_center - w / 2
                y1 = y_center - h / 2
                x2 = x_center + w / 2
                y2 = y_center + h / 2
                boxes = torch.stack([x1, y1, x2, y2], dim=1)  # [N, 4]
                scores = conf  # [N]
                labels = torch.argmax(class_scores, dim=1)  # [N]
                
                # Apply confidence filter first
                mask = scores >= CONFIDENCE_THRESHOLD
                boxes = boxes[mask]
                scores = scores[mask]
                labels = labels[mask]
                print(f"After confidence filter: {boxes.shape[0]} detections")
                
                # Apply NMS
                if boxes.shape[0] > 0:
                    keep = nms(boxes, scores, iou_threshold=0.45)  # IoU threshold for NMS
                    boxes = boxes[keep].cpu().numpy()
                    scores = scores[keep].cpu().numpy()
                    labels = labels[keep].cpu().numpy()
                    print(f"After NMS: {boxes.shape[0]} detections")
                else:
                    boxes = np.zeros((0, 4))
                    scores = np.zeros(0)
                    labels = np.zeros(0, dtype=np.int64)
            
            # Scale boxes back to original image size
            orig_w, orig_h = original_sizes[i]
            scale_x = orig_w / IMG_SIZE
            scale_y = orig_h / IMG_SIZE
            
            if len(boxes) > 0:
                boxes[:, 0] *= scale_x  # x1
                boxes[:, 1] *= scale_y  # y1
                boxes[:, 2] *= scale_x  # x2
                boxes[:, 3] *= scale_y  # y2
            
            # Convert to tensors for metrics
            boxes_tensor = torch.tensor(boxes, dtype=torch.float32)
            scores_tensor = torch.tensor(scores, dtype=torch.float32)
            labels_tensor = torch.tensor(labels, dtype=torch.int64)
            
            # Ensure correct shape
            if boxes_tensor.dim() == 1 and len(boxes_tensor) > 0:
                boxes_tensor = boxes_tensor.unsqueeze(0)
            if scores_tensor.dim() == 0 and len(scores_tensor) > 0:
                scores_tensor = scores_tensor.unsqueeze(0)
            if labels_tensor.dim() == 0 and len(labels_tensor) > 0:
                labels_tensor = labels_tensor.unsqueeze(0)
            
            # Prepare predictions for metrics
            pred_metrics = [{
                'boxes': boxes_tensor,
                'scores': scores_tensor,
                'labels': labels_tensor
            }]
            
            # Prepare targets
            target_boxes = batch['boxes'][i].cpu().float()
            target_labels = batch['labels'][i].cpu().long()
            
            if target_boxes.dim() == 1 and len(target_boxes) > 0:
                target_boxes = target_boxes.unsqueeze(0)
            if target_labels.dim() == 0 and len(target_labels) > 0:
                target_labels = target_labels.unsqueeze(0)
            
            targets = [{
                'boxes': target_boxes,
                'labels': target_labels
            }]
            
            # Update metrics
            try:
                self.metric.update(pred_metrics, targets)
            except Exception as e:
                print(f"Error updating metrics for image {image_paths[i]}: {e}")
                print(f"Prediction boxes shape: {boxes_tensor.shape}")
                print(f"Target boxes shape: {target_boxes.shape}")
                print(f"Sample prediction boxes: {boxes_tensor[:2]}")
                print(f"Sample target boxes: {target_boxes[:2]}")
                continue
            
            # Store results
            batch_results.append({
                'image_path': image_paths[i],
                'boxes': boxes,
                'scores': scores,
                'labels': [self.model.names[int(x)] for x in labels] if len(labels) > 0 else [],
                'time': inference_time / batch_size  # Average time per image
            })
        
        return batch_results
if __name__ == "__main__":
    try:
        dataset = COCODataset(DATASET_PATH)
    except Exception as e:
        print(f"Error loading dataset: {str(e)}")
        print("Please verify the dataset path and files exist")
        raise
    
    # Lists to store metrics for plotting
    map_50_values = []
    map_values = []
    mar_100_values = []
    inference_times_per_image = []
    total_inference_times = []  # For total inference time
    batch_sizes = BATCH_SIZES

    for batch_size in BATCH_SIZES:
        print(f"\n{'='*50}")
        print(f"Evaluating with batch size: {batch_size}")
        print(f"Processing {len(dataset)} images")
        print(f"{'='*50}")
        
        data_loader = DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=0,
            collate_fn=lambda x: x
        )
        
        evaluator = YOLOv5Evaluator()
        total_images = 0
        total_time = 0
        results = []
        
        for batch_idx, batch in enumerate(data_loader):
            batch_dict = {
                'image': torch.stack([item['image'] for item in batch]),
                'original_image': [item['original_image'] for item in batch],
                'boxes': [item['boxes'] for item in batch],
                'labels': [item['labels'] for item in batch],
                'original_size': [item['original_size'] for item in batch],
                'image_id': [item['image_id'] for item in batch],
                'image_path': [item['image_path'] for item in batch]
            }
            
            batch_results = evaluator.evaluate_batch(batch_dict)
            results.extend(batch_results)
            total_images += len(batch_results)
            total_time += sum(r['time'] for r in batch_results)
            
            print(f"Processed batch {batch_idx+1} with {len(batch)} images")
        
        metrics = evaluator.metric.compute()
        
        print("\nEvaluation Metrics:")
        print(f"mAP@0.5: {metrics['map_50'].item():.3f}")
        print(f"mAP@0.5-0.95: {metrics['map'].item():.3f}")
        print(f"Recall@100: {metrics['mar_100'].item():.3f}")
        
        print(f"\nTotal Images Processed: {total_images}")
        print(f"Total Inference Time: {total_time:.4f}s")
        print(f"Average Inference Time: {total_time/total_images:.4f}s per image")
        
        # Store metrics for plotting
        map_50_values.append(metrics['map_50'].item())
        map_values.append(metrics['map'].item())
        mar_100_values.append(metrics['mar_100'].item())
        inference_times_per_image.append(total_time / total_images)
        total_inference_times.append(total_time)
        
        # Save visualizations
        os.makedirs(f"visualizations_batch_{batch_size}", exist_ok=True)
        
        for idx, result in enumerate(results):
            try:
                img = Image.open(result['image_path'])
                fig, ax = plt.subplots(1, figsize=(12, 8))
                ax.imshow(img)
                
                for box, label, score in zip(result['boxes'], result['labels'], result['scores']):
                    x1, y1, x2, y2 = box
                    rect = patches.Rectangle(
                        (x1, y1), x2-x1, y2-y1,
                        linewidth=2, edgecolor='red', facecolor='none'
                    )
                    ax.add_patch(rect)
                    ax.text(
                        x1, y1-10, 
                        f"{label} {score:.2f}",
                        color='white', fontsize=10,
                        bbox=dict(facecolor='red', alpha=0.8, pad=2)
                    )
                
                plt.axis('off')
                save_path = os.path.join(f"visualizations_batch_{batch_size}", f"result_{idx}.png")
                plt.savefig(save_path, bbox_inches='tight', pad_inches=0.1, dpi=300)
                plt.close()
            except Exception as e:
                print(f"Could not visualize {result['image_path']}: {str(e)}")
        
        print(f"Saved visualizations to visualizations_batch_{batch_size} folder")
    
    # Plot multiple graphs
    def plot_dual_axis(metric_values, metric_name, time_values, time_label, filename):
        fig, ax1 = plt.subplots(figsize=(10, 6))

        # Plot metric on left y-axis
        ax1.plot(batch_sizes, metric_values, 'b-', marker='o', label=metric_name)
        ax1.set_xlabel('Batch Size')
        ax1.set_ylabel(metric_name, color='b')
        ax1.tick_params(axis='y', labelcolor='b')
        ax1.grid(True, linestyle='--', alpha=0.7)

        # Create right y-axis for time
        ax2 = ax1.twinx()
        ax2.plot(batch_sizes, time_values, 'r-', marker='s', label=time_label)
        ax2.set_ylabel(time_label, color='r')
        ax2.tick_params(axis='y', labelcolor='r')

        # Title and layout
        plt.title(f'{metric_name} vs {time_label} for Different Batch Sizes')
        fig.tight_layout()

        # Combine legends
        lines1, labels1 = ax1.get_legend_handles_labels()
        lines2, labels2 = ax2.get_legend_handles_labels()
        ax1.legend(lines1 + lines2, labels1 + labels2, loc='center right')

        # Save and show plot
        plt.savefig(filename, dpi=300)
        plt.show()
        plt.close()

    # Generate plots
    # Plot 1: mAP@0.5 vs Inference Time (per image)
    plot_dual_axis(
        metric_values=map_50_values,
        metric_name='mAP@0.5',
        time_values=inference_times_per_image,
        time_label='Inference Time (s/image)',
        filename='map50_vs_inference_time.png'
    )
    
    # Plot 2: mAP@0.5:0.95 vs Total Inference Time
    plot_dual_axis(
        metric_values=map_values,
        metric_name='mAP@0.5:0.95',
        time_values=total_inference_times,
        time_label='Total Inference Time (s)',
        filename='map_vs_total_inference_time.png'
    )
    
    # Plot 3: Recall@100 vs Inference Time (per image)
    plot_dual_axis(
        metric_values=mar_100_values,
        metric_name='Recall@100',
        time_values=inference_times_per_image,
        time_label='Inference Time (s/image)',
        filename='mar100_vs_inference_time.png'
    )

Using annotation file: C:\Users\SIU856526097\datasets\coco-pose-subset\annotations\instances_val2017.json
Selected 32 images for evaluation

Evaluating with batch size: 1
Processing 32 images
PyTorch version: 1.13.1+cpu
Using device: cpu


Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\SIU856526097/.cache\torch\hub\master.zip
YOLOv5  2025-4-13 Python-3.9.21 torch-1.13.1+cpu CPU

Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 79 detections
After NMS: 8 detections
Processed batch 1 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 56 detections
After NMS: 5 detections
Processed batch 2 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 108 detections
After NMS: 9 detections
Processed batch 3 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 73 detections
After NMS: 6 detections
Processed batch 4 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])


Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\SIU856526097/.cache\torch\hub\master.zip
YOLOv5  2025-4-13 Python-3.9.21 torch-1.13.1+cpu CPU

Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([4, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 79 detections
After NMS: 8 detections
Image 1 predictions shape: torch.Size([25200, 85])
After confidence filter: 56 detections
After NMS: 5 detections
Image 2 predictions shape: torch.Size([25200, 85])
After confidence filter: 108 detections
After NMS: 9 detections
Image 3 predictions shape: torch.Size([25200, 85])
After confidence filter: 73 detections
After NMS: 6 detections
Processed batch 1 with 4 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([4, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 93 detections
After NMS: 9 detections
Image 1 predictions shape: torch.Size([25200, 85])
After confidence filter: 34 detections
After NMS: 3 detections
Image 2 predictions shape: torch.Size([25200, 85])
After confidence filter: 38 detections
After NMS: 4 detections
Ima

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\SIU856526097/.cache\torch\hub\master.zip
YOLOv5  2025-4-13 Python-3.9.21 torch-1.13.1+cpu CPU

Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([8, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 79 detections
After NMS: 8 detections
Image 1 predictions shape: torch.Size([25200, 85])
After confidence filter: 56 detections
After NMS: 5 detections
Image 2 predictions shape: torch.Size([25200, 85])
After confidence filter: 108 detections
After NMS: 9 detections
Image 3 predictions shape: torch.Size([25200, 85])
After confidence filter: 73 detections
After NMS: 6 detections
Image 4 predictions shape: torch.Size([25200, 85])
After confidence filter: 93 detections
After NMS: 9 detections
Image 5 predictions shape: torch.Size([25200, 85])
After confidence filter: 34 detections
After NMS: 3 detections
Image 6 predictions shape: torch.Size([25200, 85])
After confidence filter: 38 detections
After NMS: 4 detections
Image 7 predictions shape: torch.Size([25200, 85])
After confidence filter: 26 detections
After NMS: 5 detections
Proc

In [3]:
import torch
from PIL import Image
import time
import numpy as np
import os
import json
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from typing import List, Dict
from torchvision.ops import nms
# Configuration
CONFIDENCE_THRESHOLD = 0.5
IMG_SIZE = 640
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DATASET_PATH = r"C:\Users\SIU856526097\datasets\coco-pose-subset"
MAX_IMAGES = 32  # Only process first 32 images
BATCH_SIZES = [1, 4, 8]  # Batch sizes to evaluate

class COCODataset(Dataset):
    def __init__(self, root_path, split='val2017', max_images=MAX_IMAGES):
        """Initialize COCO dataset with limited images"""
        self.root_path = root_path
        self.image_dir = os.path.join(root_path, 'images', split)
        self.max_images = max_images
        
        # Check for annotation files
        self.annotation_file = os.path.join(root_path, 'annotations', f'person_keypoints_{split}.json')
        if not os.path.exists(self.annotation_file):
            self.annotation_file = os.path.join(root_path, 'annotations', f'instances_{split}.json')
            if not os.path.exists(self.annotation_file):
                raise FileNotFoundError(f"Neither person_keypoints_{split}.json nor instances_{split}.json found")
        
        print(f"Using annotation file: {self.annotation_file}")
        
        # Load annotations
        with open(self.annotation_file) as f:
            self.annotations = json.load(f)
        
        # Create image id to annotations mapping
        self.image_info = {img['id']: img for img in self.annotations['images']}
        self.annotations_per_image = {}
        
        for ann in self.annotations['annotations']:
            if ann['image_id'] not in self.annotations_per_image:
                self.annotations_per_image[ann['image_id']] = []
            self.annotations_per_image[ann['image_id']].append(ann)
        
        # Filter images with annotations and limit to max_images
        self.valid_image_ids = [img_id for img_id in self.image_info.keys() 
                              if img_id in self.annotations_per_image][:max_images]
        
        print(f"Selected {len(self.valid_image_ids)} images for evaluation")
        
        # Transform for input images
        self.transform = T.Compose([
            T.Resize((IMG_SIZE, IMG_SIZE)),
            T.ToTensor(),
        ])
    
    def __len__(self):
        return len(self.valid_image_ids)
    
    def __getitem__(self, idx):
        img_id = self.valid_image_ids[idx]
        img_info = self.image_info[img_id]
        img_path = os.path.join(self.image_dir, img_info['file_name'])
        
        # Load image
        try:
            img = Image.open(img_path).convert('RGB')
        except:
            print(f"Error loading image: {img_path}")
            return {
                'image': torch.zeros((3, IMG_SIZE, IMG_SIZE)),
                'original_image': Image.new('RGB', (IMG_SIZE, IMG_SIZE)),
                'boxes': torch.zeros((0, 4)),
                'labels': torch.zeros(0, dtype=torch.int64),
                'original_size': (IMG_SIZE, IMG_SIZE),
                'image_id': img_id,
                'image_path': img_path
            }
        
        original_size = img.size  # (width, height)
        
        # Get annotations for this image
        annotations = self.annotations_per_image[img_id]
        
        # Prepare ground truth boxes and labels
        boxes = []
        labels = []
        
        for ann in annotations:
            x, y, w, h = ann['bbox']
            boxes.append([x, y, x + w, y + h])
            labels.append(0)  # 0 is for person class in COCO
        
        boxes = torch.tensor(boxes, dtype=torch.float32) if boxes else torch.zeros((0, 4))
        labels = torch.tensor(labels, dtype=torch.int64) if labels else torch.zeros(0, dtype=torch.int64)
        
        # Apply transformations
        img_tensor = self.transform(img)
        
        return {
            'image': img_tensor,
            'original_image': img,
            'boxes': boxes,
            'labels': labels,
            'original_size': original_size,
            'image_id': img_id,
            'image_path': img_path
        }

from torchvision.ops import nms

class YOLOv5Evaluator:
    def __init__(self):
        """Initialize YOLOv5 model with evaluation capabilities"""
        print(f"PyTorch version: {torch.__version__}")
        print(f"Using device: {DEVICE}")
        
        import warnings
        warnings.filterwarnings("ignore", category=FutureWarning)
        
        # Load model
        self.model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, autoshape=True, force_reload=True)
        self.model.to(DEVICE)
        self.model.eval()
        
        # Initialize metrics
        self.metric = MeanAveragePrecision(
            box_format='xyxy',
            iou_type='bbox',
            iou_thresholds=[0.5],
            rec_thresholds=[0.0, 0.1, 0.3, 0.5, 0.7, 0.9],
            max_detection_thresholds=[1, 10, 100],
            class_metrics=True
        )

    def evaluate_batch(self, batch: Dict) -> List[Dict]:
        """Evaluate batch of images with raw tensor handling"""
        images = batch['image'].to(DEVICE)
        original_sizes = batch['original_size']
        image_paths = batch['image_path']
        batch_size = images.shape[0]
        
        # Time inference
        start_time = time.time()
        with torch.no_grad():
            results = self.model(images, size=IMG_SIZE)
        inference_time = time.time() - start_time
        
        # Debug
        print(f"Results type: {type(results)}")
        if isinstance(results, torch.Tensor):
            print(f"Raw results shape: {results.shape}")
        else:
            print(f"Results attributes: {dir(results)}")
        
        # Ensure results is a tensor
        if not isinstance(results, torch.Tensor):
            raise ValueError(f"Expected torch.Tensor, got {type(results)}")
        if results.dim() != 3 or results.shape[0] != batch_size:
            raise ValueError(f"Unexpected results shape: {results.shape}. Expected [batch_size, num_detections, 85]")
        
        batch_results = []
        for i in range(batch_size):
            pred = results[i]  # [num_detections, 85]
            print(f"Image {i} predictions shape: {pred.shape}")
            
            # Extract components
            if pred.shape[0] == 0:
                boxes = np.zeros((0, 4))
                scores = np.zeros(0)
                labels = np.zeros(0, dtype=np.int64)
            else:
                # Convert to xyxy format
                x_center = pred[:, 0]
                y_center = pred[:, 1]
                w = pred[:, 2]
                h = pred[:, 3]
                conf = pred[:, 4]
                class_scores = pred[:, 5:]
                
                x1 = x_center - w / 2
                y1 = y_center - h / 2
                x2 = x_center + w / 2
                y2 = y_center + h / 2
                boxes = torch.stack([x1, y1, x2, y2], dim=1)  # [N, 4]
                scores = conf  # [N]
                labels = torch.argmax(class_scores, dim=1)  # [N]
                
                # Apply confidence filter first
                mask = scores >= CONFIDENCE_THRESHOLD
                boxes = boxes[mask]
                scores = scores[mask]
                labels = labels[mask]
                print(f"After confidence filter: {boxes.shape[0]} detections")
                
                # Apply NMS
                if boxes.shape[0] > 0:
                    keep = nms(boxes, scores, iou_threshold=0.45)  # IoU threshold for NMS
                    boxes = boxes[keep].cpu().numpy()
                    scores = scores[keep].cpu().numpy()
                    labels = labels[keep].cpu().numpy()
                    print(f"After NMS: {boxes.shape[0]} detections")
                else:
                    boxes = np.zeros((0, 4))
                    scores = np.zeros(0)
                    labels = np.zeros(0, dtype=np.int64)
            
            # Scale boxes back to original image size
            orig_w, orig_h = original_sizes[i]
            scale_x = orig_w / IMG_SIZE
            scale_y = orig_h / IMG_SIZE
            
            if len(boxes) > 0:
                boxes[:, 0] *= scale_x  # x1
                boxes[:, 1] *= scale_y  # y1
                boxes[:, 2] *= scale_x  # x2
                boxes[:, 3] *= scale_y  # y2
            
            # Convert to tensors for metrics
            boxes_tensor = torch.tensor(boxes, dtype=torch.float32)
            scores_tensor = torch.tensor(scores, dtype=torch.float32)
            labels_tensor = torch.tensor(labels, dtype=torch.int64)
            
            # Ensure correct shape
            if boxes_tensor.dim() == 1 and len(boxes_tensor) > 0:
                boxes_tensor = boxes_tensor.unsqueeze(0)
            if scores_tensor.dim() == 0 and len(scores_tensor) > 0:
                scores_tensor = scores_tensor.unsqueeze(0)
            if labels_tensor.dim() == 0 and len(labels_tensor) > 0:
                labels_tensor = labels_tensor.unsqueeze(0)
            
            # Prepare predictions for metrics
            pred_metrics = [{
                'boxes': boxes_tensor,
                'scores': scores_tensor,
                'labels': labels_tensor
            }]
            
            # Prepare targets
            target_boxes = batch['boxes'][i].cpu().float()
            target_labels = batch['labels'][i].cpu().long()
            
            if target_boxes.dim() == 1 and len(target_boxes) > 0:
                target_boxes = target_boxes.unsqueeze(0)
            if target_labels.dim() == 0 and len(target_labels) > 0:
                target_labels = target_labels.unsqueeze(0)
            
            targets = [{
                'boxes': target_boxes,
                'labels': target_labels
            }]
            
            # Update metrics
            try:
                self.metric.update(pred_metrics, targets)
            except Exception as e:
                print(f"Error updating metrics for image {image_paths[i]}: {e}")
                print(f"Prediction boxes shape: {boxes_tensor.shape}")
                print(f"Target boxes shape: {target_boxes.shape}")
                print(f"Sample prediction boxes: {boxes_tensor[:2]}")
                print(f"Sample target boxes: {target_boxes[:2]}")
                continue
            
            # Store results
            batch_results.append({
                'image_path': image_paths[i],
                'boxes': boxes,
                'scores': scores,
                'labels': [self.model.names[int(x)] for x in labels] if len(labels) > 0 else [],
                'time': inference_time / batch_size  # Average time per image
            })
        
        return batch_results
if __name__ == "__main__":
    try:
        dataset = COCODataset(DATASET_PATH)
    except Exception as e:
        print(f"Error loading dataset: {str(e)}")
        print("Please verify the dataset path and files exist")
        raise
    
    # Lists to store metrics for plotting
    map_50_values = []
    map_values = []
    mar_100_values = []
    inference_times_per_image = []
    total_inference_times = []  # For total inference time
    batch_sizes = BATCH_SIZES

    for batch_size in BATCH_SIZES:
        print(f"\n{'='*50}")
        print(f"Evaluating with batch size: {batch_size}")
        print(f"Processing {len(dataset)} images")
        print(f"{'='*50}")
        
        data_loader = DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=0,
            collate_fn=lambda x: x
        )
        
        evaluator = YOLOv5Evaluator()
        total_images = 0
        total_time = 0
        results = []
        
        for batch_idx, batch in enumerate(data_loader):
            batch_dict = {
                'image': torch.stack([item['image'] for item in batch]),
                'original_image': [item['original_image'] for item in batch],
                'boxes': [item['boxes'] for item in batch],
                'labels': [item['labels'] for item in batch],
                'original_size': [item['original_size'] for item in batch],
                'image_id': [item['image_id'] for item in batch],
                'image_path': [item['image_path'] for item in batch]
            }
            
            batch_results = evaluator.evaluate_batch(batch_dict)
            results.extend(batch_results)
            total_images += len(batch_results)
            total_time += sum(r['time'] for r in batch_results)
            
            print(f"Processed batch {batch_idx+1} with {len(batch)} images")
        
        metrics = evaluator.metric.compute()
        
        print("\nEvaluation Metrics:")
        print(f"mAP@0.5: {metrics['map_50'].item():.3f}")
        print(f"mAP@0.5-0.95: {metrics['map'].item():.3f}")
        print(f"Recall@100: {metrics['mar_100'].item():.3f}")
        
        print(f"\nTotal Images Processed: {total_images}")
        print(f"Total Inference Time: {total_time:.4f}s")
        print(f"Average Inference Time: {total_time/total_images:.4f}s per image")
        
        # Store metrics for plotting
        map_50_values.append(metrics['map_50'].item())
        map_values.append(metrics['map'].item())
        mar_100_values.append(metrics['mar_100'].item())
        inference_times_per_image.append(total_time / total_images)
        total_inference_times.append(total_time)
        
        # Save visualizations
        os.makedirs(f"visualizations_batch_{batch_size}", exist_ok=True)
        
        for idx, result in enumerate(results):
            try:
                img = Image.open(result['image_path'])
                fig, ax = plt.subplots(1, figsize=(12, 8))
                ax.imshow(img)
                
                for box, label, score in zip(result['boxes'], result['labels'], result['scores']):
                    x1, y1, x2, y2 = box
                    rect = patches.Rectangle(
                        (x1, y1), x2-x1, y2-y1,
                        linewidth=2, edgecolor='red', facecolor='none'
                    )
                    ax.add_patch(rect)
                    ax.text(
                        x1, y1-10, 
                        f"{label} {score:.2f}",
                        color='white', fontsize=10,
                        bbox=dict(facecolor='red', alpha=0.8, pad=2)
                    )
                
                plt.axis('off')
                save_path = os.path.join(f"visualizations_batch_{batch_size}", f"result_{idx}.png")
                plt.savefig(save_path, bbox_inches='tight', pad_inches=0.1, dpi=300)
                plt.close()
            except Exception as e:
                print(f"Could not visualize {result['image_path']}: {str(e)}")
        
        print(f"Saved visualizations to visualizations_batch_{batch_size} folder")
    
    # Plot multiple graphs
    def plot_dual_axis(metric_values, metric_name, time_values, time_label, filename):
        fig, ax1 = plt.subplots(figsize=(10, 6))

        # Plot metric on left y-axis
        ax1.plot(batch_sizes, metric_values, 'b-', marker='o', label=metric_name)
        ax1.set_xlabel('Batch Size')
        ax1.set_ylabel(metric_name, color='b')
        ax1.tick_params(axis='y', labelcolor='b')
        ax1.grid(True, linestyle='--', alpha=0.7)

        # Create right y-axis for time
        ax2 = ax1.twinx()
        ax2.plot(batch_sizes, time_values, 'r-', marker='s', label=time_label)
        ax2.set_ylabel(time_label, color='r')
        ax2.tick_params(axis='y', labelcolor='r')

        # Title and layout
        plt.title(f'{metric_name} vs {time_label} for Different Batch Sizes')
        fig.tight_layout()

        # Combine legends
        lines1, labels1 = ax1.get_legend_handles_labels()
        lines2, labels2 = ax2.get_legend_handles_labels()
        ax1.legend(lines1 + lines2, labels1 + labels2, loc='center right')

        # Save and show plot
        plt.savefig(filename, dpi=300)
        plt.show()
        plt.close()

    # Generate plots
    # Plot 1: mAP@0.5 vs Inference Time (per image)
    plot_dual_axis(
        metric_values=map_50_values,
        metric_name='mAP@0.5',
        time_values=inference_times_per_image,
        time_label='Inference Time (s/image)',
        filename='map50_vs_inference_time.png'
    )
    
    # Plot 2: mAP@0.5:0.95 vs Total Inference Time
    plot_dual_axis(
        metric_values=map_values,
        metric_name='mAP@0.5:0.95',
        time_values=total_inference_times,
        time_label='Total Inference Time (s)',
        filename='map_vs_total_inference_time.png'
    )
    
    # Plot 3: Recall@100 vs Inference Time (per image)
    plot_dual_axis(
        metric_values=mar_100_values,
        metric_name='Recall@100',
        time_values=inference_times_per_image,
        time_label='Inference Time (s/image)',
        filename='mar100_vs_inference_time.png'
    )

Using annotation file: C:\Users\SIU856526097\datasets\coco-pose-subset\annotations\instances_val2017.json
Selected 32 images for evaluation

Evaluating with batch size: 1
Processing 32 images
PyTorch version: 1.13.1+cpu
Using device: cpu


Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\SIU856526097/.cache\torch\hub\master.zip
YOLOv5  2025-4-13 Python-3.9.21 torch-1.13.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 45 detections
After NMS: 9 detections
Processed batch 1 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 34 detections
After NMS: 4 detections
Processed batch 2 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 75 detections
After NMS: 8 detections
Processed batch 3 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 50 detections
After NMS: 5 detections
Processed batch 4 with 1 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([1, 25200, 85])
I

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\SIU856526097/.cache\torch\hub\master.zip
YOLOv5  2025-4-13 Python-3.9.21 torch-1.13.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([4, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 45 detections
After NMS: 9 detections
Image 1 predictions shape: torch.Size([25200, 85])
After confidence filter: 34 detections
After NMS: 4 detections
Image 2 predictions shape: torch.Size([25200, 85])
After confidence filter: 75 detections
After NMS: 8 detections
Image 3 predictions shape: torch.Size([25200, 85])
After confidence filter: 50 detections
After NMS: 5 detections
Processed batch 1 with 4 images
Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([4, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 81 detections
After NMS: 10 detections
Image 1 predictions shape: torch.Size([25200, 85])
After confidence filter: 25 detections
After NMS: 4 detections
Image 2 predictions shape: torch.Size([25200, 85])
After confidence filter: 13 detections
After NMS: 1 detections
Ima

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\SIU856526097/.cache\torch\hub\master.zip
YOLOv5  2025-4-13 Python-3.9.21 torch-1.13.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


Results type: <class 'torch.Tensor'>
Raw results shape: torch.Size([8, 25200, 85])
Image 0 predictions shape: torch.Size([25200, 85])
After confidence filter: 45 detections
After NMS: 9 detections
Image 1 predictions shape: torch.Size([25200, 85])
After confidence filter: 34 detections
After NMS: 4 detections
Image 2 predictions shape: torch.Size([25200, 85])
After confidence filter: 75 detections
After NMS: 8 detections
Image 3 predictions shape: torch.Size([25200, 85])
After confidence filter: 50 detections
After NMS: 5 detections
Image 4 predictions shape: torch.Size([25200, 85])
After confidence filter: 81 detections
After NMS: 10 detections
Image 5 predictions shape: torch.Size([25200, 85])
After confidence filter: 25 detections
After NMS: 4 detections
Image 6 predictions shape: torch.Size([25200, 85])
After confidence filter: 13 detections
After NMS: 1 detections
Image 7 predictions shape: torch.Size([25200, 85])
After confidence filter: 20 detections
After NMS: 4 detections
Proc