In [5]:
import cv2
import numpy as np
from typing import List, Tuple, Dict
import time
import os

Ensure the aruco module is imported

In [6]:
from cv2 import aruco

In [7]:
class BoxDetector:
    def __init__(self, camera_matrix=None, dist_coeffs=None):
        """
        Initialize the box detector with camera calibration parameters.
        
        Args:
            camera_matrix: 3x3 camera intrinsic matrix
            dist_coeffs: Distortion coefficients
        """
        self.camera_matrix = camera_matrix if camera_matrix is not None else np.eye(3)
        self.dist_coeffs = dist_coeffs if dist_coeffs is not None else np.zeros((1, 5))
        
        # ArUco dictionary for marker detection
        self.aruco_dict = aruco.Dictionary_get(aruco.DICT_6X6_250)
        self.aruco_params = aruco.DetectorParameters_create()
        
        # Parameters for box detection
        self.min_box_area = 1000  # Minimum area to consider as a box
        self.canny_thresholds = (50, 150)
        
        # Box counting statistics
        self.total_boxes = 0
        self.boxes_with_aruco = 0
        
    def preprocess_image(self, image: np.ndarray) -> np.ndarray:
        """
        Preprocess the image for better box detection.
        
        Args:
            image: Input image
            
        Returns:
            Preprocessed image
        """
        # Convert to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        # Apply Gaussian blur to reduce noise
        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
        
        # Apply adaptive thresholding
        thresh = cv2.adaptiveThreshold(
            blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
            cv2.THRESH_BINARY, 11, 2
        )
        
        return thresh
    
    def detect_boxes(self, image: np.ndarray) -> Tuple[List[Dict], float, Dict]:
        """
        Detect boxes in the image and return their properties.
        
        Args:
            image: Input image
            
        Returns:
            Tuple containing:
            - List of dictionaries containing box properties
            - Processing time
            - Dictionary with box statistics
        """
        start_time = time.time()
        
        # Preprocess the image
        processed = self.preprocess_image(image)
        
        # Find contours
        edges = cv2.Canny(processed, *self.canny_thresholds)
        contours, _ = cv2.findContours(
            edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
        )
        
        boxes = []
        self.total_boxes = 0
        self.boxes_with_aruco = 0
        
        for contour in contours:
            # Filter small contours
            if cv2.contourArea(contour) < self.min_box_area:
                continue
                
            # Approximate the contour to a polygon
            epsilon = 0.02 * cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, epsilon, True)
            
            # Check if the polygon has 4 vertices (is rectangular)
            if len(approx) == 4:
                self.total_boxes += 1
                
                # Get box properties
                rect = cv2.minAreaRect(contour)
                box_center = rect[0]
                box_size = rect[1]
                box_angle = rect[2]
                
                # Get 3D position (assuming known box dimensions and camera parameters)
                x, y = box_center
                z = self.estimate_depth(box_size)
                
                boxes.append({
                    'position': (x, y, z),
                    'orientation': box_angle,
                    'size': box_size,
                    'contour': contour
                })
        
        # Detect ArUco markers
        corners, ids, _ = aruco.detectMarkers(
            image, self.aruco_dict, parameters=self.aruco_params
        )
        
        # Associate ArUco markers with detected boxes
        if ids is not None:
            self._associate_aruco_with_boxes(boxes, corners, ids)
        
        processing_time = time.time() - start_time
        
        # Compile box statistics
        stats = {
            'total_boxes': self.total_boxes,
            'boxes_with_aruco': self.boxes_with_aruco,
            'boxes_without_aruco': self.total_boxes - self.boxes_with_aruco
        }
        
        return boxes, processing_time, stats
    def estimate_depth(self, box_size: Tuple[float, float]) -> float:
        """
        Estimate the depth (Z coordinate) of a box based on its apparent size.
        This is a simplified calculation - in practice, you'd need proper 3D reconstruction.
        
        Args:
            box_size: Apparent size of the box in the image
            
        Returns:
            Estimated depth in centimeters
        """
        # This is a simplified calculation assuming a known real box size
        KNOWN_BOX_WIDTH = 50  # cm
        focal_length = self.camera_matrix[0, 0]
        
        # Use similar triangles to estimate depth
        depth = (KNOWN_BOX_WIDTH * focal_length) / box_size[0]
        return depth
    
    def _associate_aruco_with_boxes(
        self, boxes: List[Dict], 
        aruco_corners: List[np.ndarray], 
        aruco_ids: np.ndarray
    ) -> None:
        """
        Associate detected ArUco markers with boxes.
        
        Args:
            boxes: List of detected boxes
            aruco_corners: Corners of detected ArUco markers
            aruco_ids: IDs of detected ArUco markers
        """
        for box in boxes:
            box_contour = box['contour']
            for corners, id_ in zip(aruco_corners, aruco_ids):
                marker_center = np.mean(corners[0], axis=0)
                if cv2.pointPolygonTest(box_contour, tuple(marker_center), False) >= 0:
                    box['aruco_id'] = id_[0]
                    self.boxes_with_aruco += 1
                    break
    
    def visualize_results(
        self, image: np.ndarray, 
        boxes: List[Dict], 
        processing_time: float,
        stats: Dict,
        image_name: str
    ) -> np.ndarray:
        """
        Visualize the detection results on the image.
        
        Args:
            image: Original image
            boxes: List of detected boxes
            processing_time: Time taken for detection
            stats: Dictionary containing box statistics
            image_name: Name of the input image file
            
        Returns:
            Annotated image
        """
        result = image.copy()
        
        # Draw image name
        cv2.putText(
            result, f"Image: {image_name}", 
            (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2
        )
        
        # Draw box statistics
        cv2.putText(
            result, f"Total Boxes: {stats['total_boxes']}", 
            (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2
        )
        cv2.putText(
            result, f"With ArUco: {stats['boxes_with_aruco']}", 
            (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2
        )
        cv2.putText(
            result, f"Without ArUco: {stats['boxes_without_aruco']}", 
            (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2
        )
        
        for box in boxes:
            # Draw box contour
            cv2.drawContours(result, [box['contour']], 0, (0, 255, 0), 2)
            
            # Draw position and orientation
            x, y, z = box['position']
            cv2.putText(
                result, f"({x:.1f}, {y:.1f}, {z:.1f}cm)", 
                (int(x), int(y)), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2
            )
            
            # Draw ArUco ID if available
            if 'aruco_id' in box:
                cv2.putText(
                    result, f"ID: {box['aruco_id']}", 
                    (int(x), int(y) + 20), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2
                )
        
        # Add processing time
        cv2.putText(
            result, f"Processing time: {processing_time:.3f}s", 
            (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2
        )
        
        return result

In [8]:
def main():
    # Initialize detector
    detector = BoxDetector()
    
    # List of input images
    image_files = ['obj1.jpg', 'obj2.jpg', 'obj3.jpg', 'obj4.jpg']
    
    for image_name in image_files:
        # Check if file exists
        if not os.path.exists(image_name):
            print(f"Warning: Image file {image_name} not found")
            continue
            
        # Read image
        image = cv2.imread(image_name)
        if image is None:
            print(f"Error: Could not read image {image_name}")
            continue
            
        # Detect boxes
        boxes, processing_time, stats = detector.detect_boxes(image)
        
        # Visualize results
        result = detector.visualize_results(image, boxes, processing_time, stats, image_name)
        
        # Display results
        cv2.imshow(f'Box Detection - {image_name}', result)
        
        # Save results
        output_name = f'result_{image_name}'
        cv2.imwrite(output_name, result)
        print(f"Results for {image_name}:")
        print(f"Total boxes: {stats['total_boxes']}")
        print(f"Boxes with ArUco: {stats['boxes_with_aruco']}")
        print(f"Boxes without ArUco: {stats['boxes_without_aruco']}")
        print(f"Processing time: {processing_time:.3f}s")
        print("-" * 50)
        
    # Wait for key press and close windows
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [9]:
if __name__ == "__main__":
    main()

AttributeError: module 'cv2.aruco' has no attribute 'Dictionary_get'