In [None]:
import cv2
import numpy as np
from collections import deque
from scipy.optimize import linear_sum_assignment
from skimage.feature import hog
from skimage.color import rgb2gray
import joblib
import os

class RobustObjectTracker:
    """Improved object tracker with better separation of nearby objects"""
    
    def __init__(self, history_length=10, iou_threshold=0.3, min_persistence=5, 
                 max_disappeared=10, min_distance=50):
        """
        Args:
            history_length: Frames to keep in memory
            iou_threshold: Minimum overlap to consider same object
            min_persistence: No. of frames to be observed before confirming an object
            max_disappeared: No. of frames after which an object is removed from consideration
            min_distance: Minimum distance to consider objects separate (in pixels)
        """
        self.tracked_objects = {}
        self.next_id = 0
        self.iou_threshold = iou_threshold
        self.min_persistence = min_persistence
        self.max_disappeared = max_disappeared
        self.min_distance = min_distance
        
    def calculate_iou(self, box1, box2):
        """Calculate Intersection over Union between two boxes"""
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])
        
        inter = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1) # this calculates the value of intersection area
        area1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
        area2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
        union = area1 + area2 - inter
        
        return inter / union if union > 0 else 0
    
    # def calculate_distance(self, box1, box2):
    #     """Calculate center distance between two boxes"""
    #     c1 = ((box1[0]+box1[2])/2, (box1[1]+box1[3])/2)
    #     c2 = ((box2[0]+box2[2])/2, (box2[1]+box2[3])/2)
    #     return np.sqrt((c1[0]-c2[0])**2 + (c1[1]-c2[1])**2)
    
    def non_max_suppression(self, boxes):
        """Prevent merging of nearby objects based on center distance and box area"""

        # If there is only one or no box, no suppression is needed
        if len(boxes) <= 1:
            return boxes

        # Convert list of boxes to a NumPy array for easier vectorized computation
        boxes = np.array(boxes)

        # Compute the center point (cx, cy) of each box
        centers = np.column_stack((
            (boxes[:, 0] + boxes[:, 2]) / 2,  # center x
            (boxes[:, 1] + boxes[:, 3]) / 2   # center y
        ))

        # Initialize a distance matrix to store pairwise center distances
        dist_matrix = np.zeros((len(boxes), len(boxes)))
        
        # Calculate pairwise Euclidean distances between centers
        for i in range(len(boxes)):
            for j in range(i + 1, len(boxes)):
                dist = np.linalg.norm(centers[i] - centers[j])
                dist_matrix[i, j] = dist
                dist_matrix[j, i] = dist  # symmetric

        # Initialize a set to store indexes of boxes to be removed
        to_remove = set()

        # Compare all unique pairs of boxes
        for i in range(len(boxes)):
            for j in range(i + 1, len(boxes)):
                # If the distance between centers is below threshold, consider suppression
                if dist_matrix[i, j] < self.min_distance:
                    # Calculate area of both boxes
                    area_i = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
                    area_j = (boxes[j, 2] - boxes[j, 0]) * (boxes[j, 3] - boxes[j, 1])

                    # Remove the smaller box to avoid duplicate tracking
                    if area_i > area_j:
                        to_remove.add(j)
                    else:
                        to_remove.add(i)

        # Return only boxes that are not marked for removal
        return [box for i, box in enumerate(boxes) if i not in to_remove]

    
    def update_tracker(self, current_boxes):
        """Main tracking update method: matches current detections to existing tracks,
        updates existing tracks, initializes new tracks, and removes stale ones."""

        # Step 1: Apply Non-Max Suppression to remove overlapping boxes that are too close
        current_boxes = self.non_max_suppression(current_boxes)

        # Step 2: Initialize data structures for matched tracks and detections
        matched_detections = set()  # indexes of current_boxes that were matched
        matched_tracks = set()      # object IDs of tracks that were matched
        matches = []                # list of (object_id, detection_box) tuples

        # Step 3: Perform IoU-based matching if we have both detections and existing tracks
        if current_boxes and self.tracked_objects:
            # Create cost matrix: rows = existing tracks, columns = current detections
            cost_matrix = np.zeros((len(self.tracked_objects), len(current_boxes)))

            # Fill cost matrix with IoU values between tracked boxes and detected boxes
            for i, (obj_id, obj_data) in enumerate(self.tracked_objects.items()):
                for j, det_box in enumerate(current_boxes):
                    cost_matrix[i, j] = self.calculate_iou(obj_data['last_box'], det_box)

            # Step 3a: Use Hungarian algorithm to assign tracks to detections optimally
            # (maximize IoU, hence we minimize negative IoU)
            row_ind, col_ind = linear_sum_assignment(-cost_matrix)

            # Step 3b: Filter valid matches based on IoU threshold
            for i, j in zip(row_ind, col_ind):
                if cost_matrix[i, j] > self.iou_threshold: # if cost is greater than the threshold i.e. this object was tracked before, so it has same object id as the before one
                    obj_id = list(self.tracked_objects.keys())[i] # copying the same object id
                    matches.append((obj_id, current_boxes[j])) # adding it to the matched_object list
                    matched_tracks.add(obj_id) # adding its id to matched object list
                    matched_detections.add(j)

        # Step 4: updating the info of the object in the main data based on the latest observed frame
        for obj_id, box in matches:
            self.tracked_objects[obj_id]['last_box'] = box # storing the last observed box
            self.tracked_objects[obj_id]['disappeared'] = 0 # resetting the disappering counter to zero
            self.tracked_objects[obj_id]['history'].append(box) # adding current object rectangle to the corresponding object history 

        # Step 5: For unmatched detections, create new tracks with a new ID
        for j, box in enumerate(current_boxes):
            if j not in matched_detections:
                # initiating new object instance for the unmatched objects
                self.tracked_objects[self.next_id] = { 
                    'last_box': box,
                    'history': [box],
                    'disappeared': 0,
                    'label': None,
                    'confidence': 0
                }
                self.next_id += 1

        # Step 6: For unmatched existing tracks, increase disappearance count
        # and remove them if they've disappeared for too long
        for obj_id in list(self.tracked_objects.keys()):
            if obj_id not in matched_tracks:
                self.tracked_objects[obj_id]['disappeared'] += 1
                if self.tracked_objects[obj_id]['disappeared'] > self.max_disappeared:
                    del self.tracked_objects[obj_id]

        # Step 7: Filter and return only those tracks that have persisted long enough
        confirmed_objects = {}
        for obj_id, obj_data in self.tracked_objects.items():
            if len(obj_data['history']) >= self.min_persistence:
                confirmed_objects[obj_id] = obj_data

        return confirmed_objects

def auto_canny(image, sigma=0.2):
    """Automatic Canny edge detection"""
    v = np.median(image)
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    return cv2.Canny(image, lower, upper)

def detect_objects(frame):
    """Improved object detection with better contour filtering"""
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=5.0, tileGridSize=(8, 8))
    equalized = clahe.apply(gray)
    
    img_h, img_w = gray.shape
    kernel_size = max(3, int(min(img_h, img_w) * 0.002)) | 1
    blurred = cv2.GaussianBlur(equalized, (kernel_size, kernel_size), 0)
    
    edges = auto_canny(blurred)
    
    morph_kernel = max(3, int(min(img_h, img_w) * 0.005)) | 1
    kernel = np.ones((morph_kernel, morph_kernel), np.uint8)
    closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
    
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    min_area = max(500, img_h * img_w * 0.0005)
    max_area = img_h * img_w * 0.9
    boxes = []
    
    for cnt in contours:
        area = cv2.contourArea(cnt)
        x, y, w, h = cv2.boundingRect(cnt)
        aspect_ratio = w / float(h)
        
        if (min_area < area < max_area and 
            0.2 < aspect_ratio < 5 and # following checks are added to keep the object which are seperate from the edges
            x > 5 and y > 5 and 
            x + w < img_w - 5 and 
            y + h < img_h - 5):
            boxes.append([x, y, x + w, y + h])
    
    return boxes

def extract_hog_features(image):
    """Extract HOG features for classification"""
    gray = rgb2gray(image)
    features = hog(
        gray,
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(2, 2),
        block_norm='L2-Hys',
        visualize=False
    )
    return features

def main():
    # Load pre-trained object classification model from disk
    model = joblib.load("classifier.pkl")
    
    # Initialize video capture from default webcam
    video = cv2.VideoCapture(0)
    
    # Initialize the robust object tracker with a minimum separation distance
    tracker = RobustObjectTracker(min_distance=1)  # Helps prevent close object merges
    
    # Counter for saved image snapshots
    counter = 0
    
    # Ensure output directory exists for saving snapshots
    os.makedirs('samples', exist_ok=True)

    while True:
        # Capture a single frame from the video stream
        ret, frame = video.read()
        if not ret:
            break  # Exit loop if frame could not be read

        # Detect object bounding boxes from current frame
        current_boxes = detect_objects(frame)
        
        # Update object tracker with newly detected boxes
        tracked_objects = tracker.update_tracker(current_boxes)
        # tracked_objects = current_boxes  # Can bypass tracking if needed (debug)

        # Create a copy of the original frame for visualization
        result = frame.copy()
        
        # Loop over each tracked object
        for obj_id, obj_data in tracked_objects.items():
            x1, y1, x2, y2 = obj_data['last_box']  # Most recent bounding box
            
            # Crop the object region from the frame
            cropped_patch = frame[y1:y2, x1:x2]
            if cropped_patch.size == 0:
                continue  # Skip empty patches

            # Resize object patch to fixed size required by HOG + classifier
            patch = cv2.resize(cropped_patch, (64, 128))
            
            # If the object label is not yet confident, classify it
            if obj_data['confidence'] < 0.7:
                features = extract_hog_features(patch)
                pred_label = model.predict([features])[0]
                confidence = model.predict_proba([features])[0].max()
                obj_data['label'] = pred_label
                obj_data['confidence'] = confidence
            else:
                pred_label = obj_data['label']
                confidence = obj_data['confidence']
            
            # Determine bounding box color based on predicted label and confidence
            if confidence >= 0.40:
                if pred_label == "book" and confidence >= 0.20:
                    color = (255, 0, 0)          # Black for background
                elif pred_label == "background":
                    color = (0, 0, 0)        # Blue for book
                else:
                    color = (0, 255, 0)        # Green for other known objects
            else:
                pred_label = "unknown"         # Low confidence fallback
                color = (0, 0, 255)            # Red for unknown

            # Draw the bounding box and label text
            label_text = f"ID:{obj_id} {pred_label} ({confidence:.2f})"
            cv2.rectangle(result, (x1, y1), (x2, y2), color, 2)
            cv2.putText(result, label_text, (x1, y1 - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
            
            # Draw motion trail using the last 5 recorded bounding boxes
            history = obj_data['history'][-5:]
            for i in range(1, len(history)):
                prev = history[i - 1]
                curr = history[i]
                cv2.line(result, 
                         ((prev[0] + prev[2]) // 2, (prev[1] + prev[3]) // 2),
                         ((curr[0] + curr[2]) // 2, (curr[1] + curr[3]) // 2),
                         color, 2)

        # Display the processed result frame in a window
        cv2.imshow('Robust Object Tracking', result)
        
        # Keyboard interaction
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break  # Quit program
        elif key == ord('s'):
            # Save the current frame as a snapshot
            print("Saving snapshot...")
            cv2.imwrite(f'final_samples/final_{counter}.jpg', frame)
            counter += 1

    # Release video stream and close windows after exiting loop
    video.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

KeyboardInterrupt: 

: 