In [1]:
import os
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F  # Add this import
import torchvision.transforms as transforms
from torch.hub import load_state_dict_from_url
import numpy as np
from ultralytics import YOLO
import torchreid
from datetime import datetime
import threading
from collections import defaultdict
import time
import queue
from pathlib import Path
import logging
import csv
import math
from bytetracker import BYTETracker
import torch.nn as nn  # Add this import for neural network components
import logging
from typing import Dict, List
from skimage.feature import local_binary_pattern
from sklearn.cluster import KMeans

: 

# Improved Multi Camera Person Tracker

In [119]:
import numpy as np
import cv2
from collections import defaultdict
import logging
from bytetracker import BYTETracker
from dataclasses import dataclass
from typing import List, Tuple, Dict, Optional
import torch
import torch.nn as nn
import torch.nn.functional as F
from ultralytics import YOLO


@dataclass
class Detection:
    bbox: np.ndarray  # [x1, y1, x2, y2]
    confidence: float
    feature: np.ndarray


@dataclass
class Track:
    id: int
    camera_id: str
    detections: List[Detection]
    timestamps: List[float]
    last_seen: float
    first_seen: float
    track_quality: float = 1.0


class ImprovedMultiCameraTracker:
    def __init__(self):
        # Initialize logging
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger("ImprovedMultiCameraTracker")

        # Initialize trackers for each camera
        tracker_args = {
            'track_thresh': 0.5,      # Detection confidence threshold
            'track_buffer': 30,       # Track buffer size
            'match_thresh': 0.8,      # IoU threshold for association
            'frame_rate': 6           # Video frame rate
        }

        self.trackers = {
            'camera1': BYTETracker(**tracker_args),
            'camera2': BYTETracker(**tracker_args)
        }

        # Initialize YOLO model
        self.yolo_model = self.initialize_yolo()

        # Initialize ReID model
        self.reid_model = self.initialize_reid()

        # Track storage
        self.tracks: Dict[str, Dict[int, Track]] = {
            'camera1': {},
            'camera2': {}
        }

        # Cross-camera matching parameters
        self.reid_threshold = 0.7
        self.min_track_length = 5
        self.max_time_gap = 600  # Maximum seconds between camera appearances
        self.min_time_gap = 30   # Minimum seconds between camera appearances

        # Results storage
        self.camera1_tracks = set()
        self.camera2_tracks = set()
        self.cross_camera_matches = []

        # Door zones (adjust based on your camera setup)
        self.door_zones = {
            'camera1': [(1030, 0), (1700, 560)],
            'camera2': [(400, 0), (800, 470)]
        }

    def initialize_yolo(self):
        """Initialize YOLO model with optimal parameters"""
        model = YOLO("yolov8x.pt")
        model.conf = 0.5  # Higher confidence threshold for more reliable detections
        model.iou = 0.7   # Higher IOU threshold for better overlap handling
        return model

    def initialize_reid(self):
        """Initialize ReID model using OSNet architecture"""
        try:
            class OSNet(nn.Module):
                def __init__(self):
                    super().__init__()
                    # Base convolution layers
                    self.conv1 = nn.Conv2d(
                        3, 64, kernel_size=7, stride=2, padding=3)
                    self.bn1 = nn.BatchNorm2d(64)
                    self.relu = nn.ReLU(inplace=True)
                    self.maxpool = nn.MaxPool2d(
                        kernel_size=3, stride=2, padding=1)

                    # Feature layers
                    self.conv2 = nn.Conv2d(
                        64, 256, kernel_size=3, stride=2, padding=1)
                    self.bn2 = nn.BatchNorm2d(256)
                    self.conv3 = nn.Conv2d(
                        256, 512, kernel_size=3, stride=2, padding=1)
                    self.bn3 = nn.BatchNorm2d(512)

                    # Global pooling
                    self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
                    self.feat_dim = 512

                def forward(self, x):
                    x = self.conv1(x)
                    x = self.bn1(x)
                    x = self.relu(x)
                    x = self.maxpool(x)

                    x = self.conv2(x)
                    x = self.bn2(x)
                    x = self.relu(x)

                    x = self.conv3(x)
                    x = self.bn3(x)
                    x = self.relu(x)

                    x = self.avgpool(x)
                    return x.view(x.size(0), -1)

            model = OSNet()
            model.eval()

            if torch.cuda.is_available():
                model = model.cuda()
                self.logger.info("ReID model moved to GPU")
            else:
                self.logger.info("Running ReID model on CPU")

            return model

        except Exception as e:
            self.logger.error(f"Error loading ReID model: {e}")
            raise

    def extract_reid_features(self, frame: np.ndarray, bbox: np.ndarray) -> Optional[np.ndarray]:
        """Extract ReID features from person detection"""
        try:
            x1, y1, x2, y2 = map(int, bbox)
            if x1 < 0 or y1 < 0 or x2 <= x1 or y2 <= y1:
                return None

            person_img = frame[y1:y2, x1:x2]
            if person_img.size == 0:
                return None

            # Preprocess image
            img = cv2.resize(person_img, (128, 256))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # Normalize
            img = img.astype(np.float32) / 255.0
            mean = np.array([0.485, 0.456, 0.406])
            std = np.array([0.229, 0.224, 0.225])
            img = (img - mean) / std

            # Convert to tensor
            img = torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0).float()

            if torch.cuda.is_available():
                img = img.cuda()

            with torch.no_grad():
                features = self.reid_model(img)
                features = F.normalize(features, p=2, dim=1)
                return features.cpu().numpy()

        except Exception as e:
            self.logger.error(f"Error extracting ReID features: {e}")
            return None

    def process_frame(self, frame: np.ndarray, camera_id: str, timestamp: float) -> np.ndarray:
        """Process a single frame with improved tracking"""
        if frame is None:
            return None

        processed_frame = frame.copy()
        height, width = frame.shape[:2]

        try:
            # 1. Get YOLO detections
            yolo_results = self.yolo_model(frame)[0]
            current_detections = []

            # 2. Process each detection
            if len(yolo_results.boxes) > 0:
                dets_xyxy = []
                det_scores = []

                for det in yolo_results.boxes.data:
                    if int(det[5]) == 0:  # person class
                        bbox = det[:4].cpu().numpy()
                        conf = float(det[4].cpu().numpy())

                        if conf > 0.5:  # confidence threshold
                            features = self.extract_reid_features(frame, bbox)
                            if features is not None:
                                if isinstance(features, torch.Tensor):
                                    features = features.cpu().numpy()

                                current_detections.append({
                                    'bbox': bbox,
                                    'features': features,
                                    'conf': conf
                                })
                                dets_xyxy.append(bbox)
                                det_scores.append(conf)

                if dets_xyxy:
                    dets_xyxy = np.array(dets_xyxy)
                    det_scores = np.array(det_scores)

                    # Update tracker with detection results
                    online_targets = self.trackers[camera_id].update(
                        # numpy array with shape (N, 4), in format (x1, y1, x2, y2)
                        dets_xyxy,
                        # detection scores, numpy array with shape (N,)
                        det_scores,
                        det_scores     # classification scores, same as detection scores for this case
                    )
                else:
                    online_targets = []
            else:
                online_targets = []

            # 4. Update tracks with new detections
            active_tracks = set()

            for track in online_targets:
                try:
                    track_id = int(track.track_id)
                    bbox = track.tlbr  # Already in [x1, y1, x2, y2] format
                    active_tracks.add(track_id)

                    # Find corresponding detection
                    det_idx = self.find_matching_detection(
                        bbox, current_detections)
                    if det_idx is not None:
                        det = current_detections[det_idx]

                        if track_id not in self.tracks[camera_id]:
                            # Initialize new track
                            self.tracks[camera_id][track_id] = Track(
                                id=track_id,
                                camera_id=camera_id,
                                detections=[det],
                                timestamps=[timestamp],
                                last_seen=timestamp,
                                first_seen=timestamp
                            )

                            # Add to camera-specific sets
                            if camera_id == 'camera1':
                                self.camera1_tracks.add(track_id)
                            else:
                                self.camera2_tracks.add(track_id)
                        else:
                            # Update existing track
                            track = self.tracks[camera_id][track_id]
                            track.detections.append(det)
                            track.timestamps.append(timestamp)
                            track.last_seen = timestamp

                            # Update track quality
                            time_gap = timestamp - \
                                track.timestamps[-2] if len(
                                    track.timestamps) > 1 else 0
                            if time_gap < 1.0:  # Continuous tracking
                                track.track_quality = min(
                                    1.0, track.track_quality + 0.1)
                            else:
                                track.track_quality *= 0.9

                        # Draw bounding box and ID
                        cv2.rectangle(processed_frame,
                                      (int(bbox[0]), int(bbox[1])),
                                      (int(bbox[2]), int(bbox[3])),
                                      (0, 255, 0), 2)
                        cv2.putText(processed_frame, f"ID: {track_id}",
                                    (int(bbox[0]), int(bbox[1]-10)),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

                except Exception as e:
                    self.logger.error(f"Error processing track: {e}")
                    continue

            # 5. Clean up old tracks
            self.clean_old_tracks(camera_id, timestamp, active_tracks)

            # 6. Perform cross-camera matching for Camera 2
            if camera_id == 'camera2':
                self.match_across_cameras(timestamp)

            # 7. Draw additional information
            self.draw_info(processed_frame, camera_id)

            return processed_frame

        except Exception as e:
            self.logger.error(f"Error processing frame: {e}")
            return frame

    def find_matching_detection(self, bbox: np.ndarray, detections: List[Dict]) -> Optional[int]:
        """Find the detection that best matches the tracked bbox"""
        best_iou = 0.4  # Minimum IOU threshold
        best_idx = None

        for i, det in enumerate(detections):
            iou = self.calculate_iou(bbox, det['bbox'])
            if iou > best_iou:
                best_iou = iou
                best_idx = i

        return best_idx

    def calculate_iou(self, box1: np.ndarray, box2: np.ndarray) -> float:
        """Calculate IOU between two bounding boxes"""
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])

        intersection = max(0, x2 - x1) * max(0, y2 - y1)
        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
        union = area1 + area2 - intersection

        return intersection / union if union > 0 else 0

    def match_across_cameras(self, current_time: float):
        """Match tracks between cameras with improved reliability"""
        # Get candidate tracks from Camera 1
        camera1_candidates = []
        for track_id, track in self.tracks['camera1'].items():
            if (track.track_quality > 0.7 and
                    len(track.detections) >= self.min_track_length):

                time_gap = current_time - track.last_seen
                if self.min_time_gap <= time_gap <= self.max_time_gap:
                    camera1_candidates.append(track)

        # Get current Camera 2 tracks
        for track_id, track in self.tracks['camera2'].items():
            if track.id not in [m['camera2_id'] for m in self.cross_camera_matches]:
                best_match = None
                best_score = self.reid_threshold

                # Compare with Camera 1 candidates
                for c1_track in camera1_candidates:
                    reid_score = self.compute_reid_similarity(
                        c1_track.detections[-self.min_track_length:],
                        track.detections[-self.min_track_length:]
                    )

                    if reid_score > best_score:
                        best_score = reid_score
                        best_match = c1_track

                if best_match is not None:
                    self.cross_camera_matches.append({
                        'camera1_id': best_match.id,
                        'camera2_id': track.id,
                        'match_time': current_time,
                        'match_score': best_score,
                        'transit_time': current_time - best_match.last_seen
                    })

    def compute_reid_similarity(self, detections1: List[Dict], detections2: List[Dict]) -> float:
        """Compute ReID similarity between two sets of detections"""
        if not detections1 or not detections2:
            return 0.0

        # Compare all pairs of features with temporal weighting
        similarities = []
        for i, det1 in enumerate(detections1):
            for j, det2 in enumerate(detections2):
                sim = np.dot(det1['features'].flatten(),
                             det2['features'].flatten())
                # Weight recent features more heavily
                # Higher weights for recent detections
                weight = (i + 1) * (j + 1)
                similarities.append((sim, weight))

        if not similarities:
            return 0.0

        # Compute weighted average
        total_weight = sum(w for _, w in similarities)
        weighted_sim = sum(s * w for s, w in similarities) / total_weight

        return weighted_sim

    def clean_old_tracks(self, camera_id: str, current_time: float, active_tracks: set):
        """Remove old tracks that are no longer active"""
        for track_id in list(self.tracks[camera_id].keys()):
            track = self.tracks[camera_id][track_id]
            if (track_id not in active_tracks and
                    current_time - track.last_seen > 5.0):  # 5 seconds threshold
                del self.tracks[camera_id][track_id]

    def draw_info(self, frame: np.ndarray, camera_id: str):
        """Draw tracking information on frame"""
        # Draw door zones
        zone = self.door_zones[camera_id]
        cv2.rectangle(frame,
                      (int(zone[0][0]), int(zone[0][1])),
                      (int(zone[1][0]), int(zone[1][1])),
                      (255, 0, 255), 2)

        # Draw statistics
        stats = [
            f"Camera: {camera_id}",
            f"Active Tracks: {len(self.tracks[camera_id])}",
            f"Total Tracks: {len(self.camera1_tracks if camera_id == 'camera1' else self.camera2_tracks)}",
            f"Cross-Camera Matches: {len(self.cross_camera_matches)}"
        ]

        y = 30
        for stat in stats:
            cv2.putText(frame, stat, (10, y),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            y += 25

        # Draw recent cross-camera matches
        if camera_id == 'camera2' and self.cross_camera_matches:
            y = 150
            cv2.putText(frame, "Recent Matches:", (10, y),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
            y += 25

            for match in self.cross_camera_matches[-3:]:  # Show last 3 matches
                text = f"C1-{match['camera1_id']} -> C2-{match['camera2_id']} ({match['match_score']:.2f})"
                cv2.putText(frame, text, (10, y),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
                y += 25

    def get_tracking_results(self) -> dict:
        """Get comprehensive tracking results"""
        results = {
            'unique_camera1': len(self.camera1_tracks),
            'unique_camera2': len(self.camera2_tracks),
            'cross_camera_matches': len(self.cross_camera_matches),
            'cross_camera_details': self.cross_camera_matches
        }

        # Calculate transit times
        if self.cross_camera_matches:
            transit_times = [m['transit_time']
                             for m in self.cross_camera_matches]
            results.update({
                'average_transit_time': np.mean(transit_times),
                'min_transit_time': np.min(transit_times),
                'max_transit_time': np.max(transit_times)
            })

            # Add quality metrics
            camera1_track_qualities = [
                t.track_quality for t in self.tracks['camera1'].values()]
            camera2_track_qualities = [
                t.track_quality for t in self.tracks['camera2'].values()]

            if camera1_track_qualities:
                results['camera1_avg_quality'] = np.mean(
                    camera1_track_qualities)
            if camera2_track_qualities:
                results['camera2_avg_quality'] = np.mean(
                    camera2_track_qualities)

        return results

    def reset(self):
        """Reset tracker state"""
        self.tracks = {
            'camera1': {},
            'camera2': {}
        }
        self.camera1_tracks = set()
        self.camera2_tracks = set()
        self.cross_camera_matches = []

        # Reset BYTETrackers
        tracker_args = {
            'track_thresh': 0.5,
            'track_buffer': 30,
            'match_thresh': 0.8,
            'frame_rate': 6
        }
        self.trackers = {
            'camera1': BYTETracker(**tracker_args),
            'camera2': BYTETracker(**tracker_args)
        }

# PersonTracker Class

In [126]:
class PersonTracker:
    def __init__(self):
        # Initialize logging
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger("PersonTracker")

        # Door and counting zones
        self.doors = {
            'camera1': [(1030, 0), (1700, 560)],
            'camera2': [(400, 0), (800, 470)]
        }
        self.counting_zones = {
            'camera1': [(1030, 200), (1700, 300)],
            'camera2': [(400, 200), (800, 300)]
        }

        # Initialize models
        self.yolo_model = YOLO("yolov8x.pt")
        self.reid_model = self.initialize_reid()

        # Tracking state
        self.tracks: Dict[int, Dict] = {}
        self.next_id = 0
        self.track_features = defaultdict(list)
        self.tracked_individuals = {}
        self.completed_tracks = set()
        self.entry_count = 0
        self.camera1_entries = set()
        self.camera1_to_camera2 = set()

        # Tracking parameters
        self.max_age = 30  # frames to keep track (5 seconds at 6fps)
        self.min_hits = 3  # minimum detections before track confirmation
        self.iou_threshold = 0.3
        self.reid_threshold = 0.7
        self.max_feature_history = 5

        self.logger.info("Person tracker initialized successfully")

    def initialize_reid(self):
        """Initialize ReID model using OSNet architecture"""
        try:
            class OSNet(nn.Module):
                def __init__(self):
                    super().__init__()
                    # Base convolution layers
                    self.conv1 = nn.Conv2d(
                        3, 64, kernel_size=7, stride=2, padding=3)
                    self.bn1 = nn.BatchNorm2d(64)
                    self.relu = nn.ReLU(inplace=True)
                    self.maxpool = nn.MaxPool2d(
                        kernel_size=3, stride=2, padding=1)

                    # Feature layers
                    self.conv2 = nn.Conv2d(
                        64, 256, kernel_size=3, stride=2, padding=1)
                    self.bn2 = nn.BatchNorm2d(256)
                    self.conv3 = nn.Conv2d(
                        256, 512, kernel_size=3, stride=2, padding=1)
                    self.bn3 = nn.BatchNorm2d(512)

                    # Global pooling
                    self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
                    self.feat_dim = 512

                def forward(self, x):
                    x = self.conv1(x)
                    x = self.bn1(x)
                    x = self.relu(x)
                    x = self.maxpool(x)

                    x = self.conv2(x)
                    x = self.bn2(x)
                    x = self.relu(x)

                    x = self.conv3(x)
                    x = self.bn3(x)
                    x = self.relu(x)

                    x = self.avgpool(x)
                    return x.view(x.size(0), -1)

            model = OSNet()
            model.eval()

            if torch.cuda.is_available():
                model = model.cuda()
                self.logger.info("ReID model moved to GPU")
            else:
                self.logger.info("Running ReID model on CPU")

            return model

        except Exception as e:
            self.logger.error(f"Error loading ReID model: {e}")
            raise

    def extract_reid_features(self, frame, bbox, camera_id):
        """Extract ReID features with camera-specific normalization"""
        try:
            x1, y1, x2, y2 = bbox
            person_img = frame[y1:y2, x1:x2]
            if person_img.size == 0 or person_img.shape[0] < 20 or person_img.shape[1] < 20:
                return None

            # Camera-specific color normalization
            if camera_id == 'camera1':
                person_img = cv2.addWeighted(
                    person_img, 1.1, person_img, 0, 10)  # Enhance contrast
            else:
                person_img = cv2.addWeighted(
                    person_img, 0.9, person_img, 0, -10)  # Reduce contrast

            # Multiple crops for robustness
            features_list = []
            crops = [
                person_img,  # Original
                cv2.flip(person_img, 1),  # Horizontal flip
                person_img[:-20, :],  # Top crop
                person_img[20:, :],   # Bottom crop
            ]

            for crop in crops:
                # Preprocess
                img = cv2.resize(crop, (128, 256))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # Normalize
                img = img.astype(np.float32) / 255.0
                mean = np.array([0.485, 0.456, 0.406])
                std = np.array([0.229, 0.224, 0.225])
                img = (img - mean) / std

                # Convert to tensor
                img = torch.from_numpy(img).permute(
                    2, 0, 1).unsqueeze(0).float()

                if torch.cuda.is_available():
                    img = img.cuda()

                with torch.no_grad():
                    feat = self.reid_model(img)
                    feat = torch.nn.functional.normalize(feat, p=2, dim=1)
                    features_list.append(feat.cpu().numpy())

            # Average all features
            features = np.mean(features_list, axis=0)
            return features

        except Exception as e:
            self.logger.error(f"Error extracting ReID features: {e}")
            return None

    def is_in_door_area(self, bbox, camera_id):
        """Check if detection is in door area"""
        x1, y1, x2, y2 = bbox
        door_coords = self.doors[camera_id]

        center_x = (x1 + x2) / 2
        center_y = (y1 + y2) / 2

        door_x1, door_y1 = door_coords[0]
        door_x2, door_y2 = door_coords[1]

        return (door_x1 <= center_x <= door_x2 and
                door_y1 <= center_y <= door_y2)

    def calculate_iou(self, box1, box2):
        """Calculate IoU between two bounding boxes"""
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])

        intersection = max(0, x2 - x1) * max(0, y2 - y1)
        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
        union = area1 + area2 - intersection

        return intersection / union if union > 0 else 0

    def extract_appearance_features(self, frame, bbox):
        """Extract clothing and appearance features"""
        try:
            x1, y1, x2, y2 = bbox
            person_img = frame[y1:y2, x1:x2]
            if person_img.size == 0:
                return None

            # Split person into regions (head, upper body, lower body)
            height = y2 - y1
            head_region = person_img[0:height//4]
            upper_body = person_img[height//4:2*height//3]
            lower_body = person_img[2*height//3:]

            features = {}

            # Color features for clothing
            for region_name, region in [('upper', upper_body), ('lower', lower_body)]:
                if region.size == 0:
                    continue

                # Convert to HSV for better color analysis
                hsv = cv2.cvtColor(region, cv2.COLOR_BGR2HSV)

                # Calculate color histogram
                hist_h = cv2.calcHist([hsv], [0], None, [30], [0, 180])
                hist_s = cv2.calcHist([hsv], [1], None, [32], [0, 256])
                hist_v = cv2.calcHist([hsv], [2], None, [32], [0, 256])

                # Normalize histograms
                hist_h = cv2.normalize(hist_h, hist_h).flatten()
                hist_s = cv2.normalize(hist_s, hist_s).flatten()
                hist_v = cv2.normalize(hist_v, hist_v).flatten()

                features[f'{region_name}_color'] = np.concatenate(
                    [hist_h, hist_s, hist_v])

                # Dominant colors
                pixels = region.reshape(-1, 3)
                from sklearn.cluster import KMeans
                kmeans = KMeans(n_clusters=3, n_init=1)
                kmeans.fit(pixels)
                features[f'{region_name}_dominant_colors'] = kmeans.cluster_centers_

                # Texture features using Local Binary Patterns
                gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
                from skimage.feature import local_binary_pattern
                radius = 3
                n_points = 8 * radius
                lbp = local_binary_pattern(
                    gray, n_points, radius, method='uniform')
                hist_lbp = np.histogram(
                    lbp, bins=n_points+2, range=(0, n_points+2))[0]
                hist_lbp = hist_lbp.astype('float32') / sum(hist_lbp)
                features[f'{region_name}_texture'] = hist_lbp

            return features

        except Exception as e:
            self.logger.error(f"Error extracting appearance features: {e}")
            return None

    def compare_appearance_features(self, features1, features2):
        """Compare appearance features between two detections"""
        if not features1 or not features2:
            return 0.0

        scores = []

        # Compare color histograms
        for region in ['upper', 'lower']:
            color_key = f'{region}_color'
            if color_key in features1 and color_key in features2:
                color_sim = cv2.compareHist(
                    features1[color_key].reshape(-1, 1),
                    features2[color_key].reshape(-1, 1),
                    cv2.HISTCMP_CORREL
                )
                scores.append(max(0, color_sim))  # Ensure non-negative

            # Compare dominant colors
            dom_key = f'{region}_dominant_colors'
            if dom_key in features1 and dom_key in features2:
                # Calculate minimum color differences between dominant colors
                colors1 = features1[dom_key]
                colors2 = features2[dom_key]
                color_diffs = []
                for c1 in colors1:
                    min_diff = min(np.linalg.norm(c1 - c2) for c2 in colors2)
                    color_diffs.append(min_diff)
                color_sim = np.exp(-np.mean(color_diffs) / 100)
                scores.append(color_sim)

            # Compare texture features
            tex_key = f'{region}_texture'
            if tex_key in features1 and tex_key in features2:
                texture_sim = np.sum(np.minimum(
                    features1[tex_key], features2[tex_key]))
                scores.append(texture_sim)

        return np.mean(scores) if scores else 0.0

    def match_detections(self, detections, timestamp, camera_id):
        """Enhanced matching with appearance features"""
        if not self.tracks:
            return {i: self.next_id + i for i in range(len(detections))}

        matched_track_ids = {}
        unmatched_detections = list(range(len(detections)))

        # Calculate all similarity matrices
        reid_matrix = np.zeros((len(detections), len(self.tracks)))
        appearance_matrix = np.zeros((len(detections), len(self.tracks)))
        spatial_matrix = np.zeros((len(detections), len(self.tracks)))

        # Calculate similarities
        for i, det in enumerate(detections):
            det_appearance = self.extract_appearance_features(
                frame, det['bbox'])

            for j, (track_id, track) in enumerate(self.tracks.items()):
                person_info = self.tracked_individuals.get(track_id)
                if not person_info:
                    continue

                # ReID similarity
                reid_matrix[i, j] = self.calculate_reid_similarity(
                    det['features'], person_info)

                # Appearance similarity
                if hasattr(person_info, 'appearance_features'):
                    appearance_matrix[i, j] = self.compare_appearance_features(
                        det_appearance, person_info.appearance_features)

                # Spatial similarity
                if person_info.last_position is not None:
                    spatial_matrix[i, j] = self.calculate_spatial_similarity(
                        det['bbox'], person_info, timestamp)

        # Combined matching
        while unmatched_detections:
            best_match = None
            best_score = self.reid_threshold

            for i in unmatched_detections:
                for j, track_id in enumerate(self.tracks.keys()):
                    person_info = self.tracked_individuals[track_id]

                    # Combined score with weighted components
                    reid_score = reid_matrix[i, j]
                    appearance_score = appearance_matrix[i, j]
                    spatial_score = spatial_matrix[i, j]

                    # Adjust weights based on scenario
                    if person_info.last_camera == camera_id:
                        # Same camera: balance all features
                        score = (0.4 * reid_score +
                                 0.3 * appearance_score +
                                 0.3 * spatial_score)
                    else:
                        # Cross-camera: rely more on ReID and appearance
                        score = (0.5 * reid_score +
                                 0.5 * appearance_score)

                    if score > best_score:
                        best_score = score
                        best_match = (i, j, track_id)

            if best_match:
                i, j, track_id = best_match
                matched_track_ids[i] = track_id
                unmatched_detections.remove(i)
            else:
                break

        # Create new tracks for remaining detections
        for det_idx in unmatched_detections:
            matched_track_ids[det_idx] = self.next_id
            self.next_id += 1

        return matched_track_ids

    def is_valid_transition(self, person_info, current_time):
        """Check if transition time is valid"""
        if not person_info.entered_camera1:
            return False

        transit_time = current_time - person_info.camera1_entry_time
        return 30 <= transit_time <= 600  # 30s to 10min

    def clean_old_tracks(self, current_time, active_tracks):
        """Remove old tracks"""
        for track_id in list(self.tracks.keys()):
            if (track_id not in active_tracks and
                    current_time - self.tracks[track_id]['last_seen'] > self.max_age):
                del self.tracks[track_id]
                if track_id in self.track_features:
                    del self.track_features[track_id]

    def process_frame(self, frame, camera_id, timestamp):
        """Process frame using integrated tracking approach"""
        if frame is None:
            return None

        processed_frame = frame.copy()
        current_detections = []

        # Run YOLO detection
        detections = self.yolo_model(frame)[0]

        # Process YOLO detections
        for det in detections.boxes.data:
            if int(det[5]) == 0:  # person class
                bbox = det[:4].cpu().numpy().astype(int)
                conf = float(det[4].cpu().numpy())

                if conf > 0.5:  # confidence threshold
                    features = self.extract_reid_features(frame, bbox)
                    if features is not None:
                        if isinstance(features, torch.Tensor):
                            features = features.cpu().numpy()

                        current_detections.append({
                            'bbox': bbox,
                            'features': features,
                            'conf': conf
                        })

        # Match detections with existing tracks
        matched_track_ids = self.match_detections(
            current_detections, timestamp)

        # Update visualization
        active_tracks = set()
        for det_idx, track_id in matched_track_ids.items():
            det = current_detections[det_idx]
            bbox = det['bbox']
            active_tracks.add(track_id)

            # Update track info
            self.tracks[track_id] = {
                'bbox': bbox,
                'last_seen': timestamp,
                'hits': self.tracks.get(track_id, {}).get('hits', 0) + 1
            }

            # Update feature history
            if track_id not in self.track_features:
                self.track_features[track_id] = []
            self.track_features[track_id].append(det['features'])
            if len(self.track_features[track_id]) > self.max_feature_history:
                self.track_features[track_id].pop(0)

            # Only process confirmed tracks
            if self.tracks[track_id]['hits'] >= self.min_hits:
                cv2.rectangle(processed_frame,
                              (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                              (0, 255, 0), 2)
                cv2.putText(processed_frame, f"ID: {track_id}",
                            (bbox[0], bbox[1]-10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

                # Update person info
                if track_id not in self.tracked_individuals:
                    self.tracked_individuals[track_id] = PersonInfo(track_id)
                person_info = self.tracked_individuals[track_id]
                person_info.update_position(bbox, timestamp)
                person_info.update_features(det['features'])

                # Process entries if in door area
                if self.is_in_door_area(bbox, camera_id):
                    if not person_info.entry_recorded:
                        if camera_id == 'camera1':
                            if track_id not in self.camera1_entries:
                                self.camera1_entries.add(track_id)
                                person_info.entered_camera1 = True
                                person_info.camera1_entry_time = timestamp
                        elif camera_id == 'camera2' and person_info.entered_camera1:
                            if track_id not in self.camera1_to_camera2:
                                self.camera1_to_camera2.add(track_id)
                        person_info.entry_recorded = True
                        self.entry_count += 1

        # Clean up old tracks
        self.clean_old_tracks(timestamp, active_tracks)

        # Draw door areas
        door_coords = self.doors[camera_id]
        cv2.rectangle(processed_frame,
                      (int(door_coords[0][0]), int(door_coords[0][1])),
                      (int(door_coords[1][0]), int(door_coords[1][1])),
                      (255, 0, 255), 2)

        # Draw entry count
        cv2.putText(processed_frame, f"Valid Entries: {self.entry_count}",
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        return processed_frame

    def process_videos(self, video_dir, output_dir=None):
        """Process videos grouped by date with proper cleanup"""
        if output_dir is None:
            output_dir = os.path.join(video_dir, 'tracking_results')

        try:
            # Group videos by date
            videos_by_date = defaultdict(list)
            for video_file in Path(video_dir).glob("Camera_*_*.mp4"):
                date = self.extract_date_from_filename(video_file)
                if date:
                    videos_by_date[date].append(video_file)

            # Process each date's videos separately
            for date, video_files in videos_by_date.items():
                self.reset_tracking()
                self.logger.info(f"\nProcessing videos for date: {date}")

                # Sort to ensure Camera_1 processes first
                for video_file in sorted(video_files):
                    camera_id = "camera1" if "Camera_1" in str(
                        video_file) else "camera2"
                    self.logger.info(f"Processing {video_file}")

                    cap = cv2.VideoCapture(str(video_file))
                    if not cap.isOpened():
                        self.logger.error(
                            f"Error opening video file: {video_file}")
                        continue

                    fps = int(cap.get(cv2.CAP_PROP_FPS))
                    frame_count = 0
                    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

                    try:
                        while cap.isOpened():
                            ret, frame = cap.read()
                            if not ret:
                                break

                            frame_count += 1
                            timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
                            processed_frame = self.process_frame(
                                frame, camera_id, timestamp)

                            if processed_frame is not None:
                                cv2.imshow(
                                    f"Camera {camera_id}", processed_frame)
                                key = cv2.waitKey(1) & 0xFF
                                if key == ord('q'):
                                    self.logger.info(
                                        "Processing interrupted by user")
                                    raise KeyboardInterrupt
                                elif key == ord('p'):  # Pause functionality
                                    self.logger.info(
                                        "Playback paused - press any key to continue")
                                    cv2.waitKey(0)

                            # Log progress
                            if frame_count % 100 == 0:
                                progress = (frame_count / total_frames) * 100
                                self.logger.info(
                                    f"Processed {frame_count}/{total_frames} frames ({progress:.1f}%) from {camera_id}")

                    except KeyboardInterrupt:
                        self.logger.info("Processing interrupted by user")
                        break
                    except Exception as e:
                        self.logger.error(f"Error processing frame: {e}")
                    finally:
                        cap.release()
                        cv2.destroyWindow(f"Camera {camera_id}")

                # Save results for this date if we have processed frames
                if frame_count > 0 and output_dir:
                    self.save_tracking_data(output_dir, date)

        except KeyboardInterrupt:
            self.logger.info("Processing interrupted by user")
        except Exception as e:
            self.logger.error(f"Error during video processing: {e}")
        finally:
            cv2.destroyAllWindows()
            self.logger.info("Video processing completed or interrupted")

        return self.analyze_tracks()

    def reset_tracking(self):
        """Reset tracking states for new date"""
        self.tracks.clear()
        self.track_features.clear()
        self.tracked_individuals.clear()
        self.completed_tracks.clear()
        self.camera1_entries.clear()
        self.camera1_to_camera2.clear()
        self.entry_count = 0
        self.next_id = 0
        self.logger.info("Reset tracking state for new date")

    def extract_date_from_filename(self, filename):
        """Extract date from filename format Camera_X_YYYYMMDD"""
        try:
            date_str = str(filename).split('_')[-1].split('.')[0]
            return date_str
        except Exception as e:
            self.logger.error(f"Error extracting date from filename: {e}")
            return None

    def analyze_tracks(self):
        """Analyze tracking results and generate statistics"""
        results = {
            'total_unique_individuals': len(self.tracked_individuals),
            'total_entries': self.entry_count,
            'camera1_entries': len(self.camera1_entries),
            'camera2_entries': len(set(pid for pid, info in self.tracked_individuals.items()
                                   if hasattr(info, 'camera_times') and 'camera2' in info.camera_times)),
            'camera1_to_camera2_count': len(self.camera1_to_camera2),
            'camera1_to_camera2_ids': list(self.camera1_to_camera2),
            'transitions': [],
            'tracking_quality': {
                'total_tracks': len(self.tracked_individuals),
                'completed_tracks': len(self.completed_tracks),
                'active_tracks': len(self.tracks)
            }
        }

        # Calculate average track length and quality
        track_lengths = []
        track_qualities = []
        for person_id, person_info in self.tracked_individuals.items():
            if hasattr(person_info, 'prev_positions') and person_info.prev_positions:
                track_length = len(person_info.prev_positions)
                track_lengths.append(track_length)
                track_qualities.append(person_info.track_quality)

        if track_lengths:
            results['tracking_quality']['average_track_length'] = sum(
                track_lengths) / len(track_lengths)
            results['tracking_quality']['average_track_quality'] = sum(
                track_qualities) / len(track_qualities)

        return results

    def save_tracking_data(self, output_dir, date):
        """Save tracking data to CSV files"""
        os.makedirs(output_dir, exist_ok=True)

        # Save entries data
        entries_file = os.path.join(output_dir, f'entries_{date}.csv')
        with open(entries_file, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['Date', 'Person_ID', 'Camera_ID',
                            'Entry_Time', 'Exit_Time'])

            for person_id, person_info in self.tracked_individuals.items():
                if hasattr(person_info, 'camera_times'):
                    for camera_id, times in person_info.camera_times.items():
                        writer.writerow([
                            date,
                            person_id,
                            camera_id,
                            f"{times.get('first', ''):.2f}" if times.get(
                                'first') else '',
                            f"{times.get('last', ''):.2f}" if times.get(
                                'last') else ''
                        ])

        # Save summary statistics
        summary_file = os.path.join(output_dir, f'tracking_summary_{date}.csv')
        tracking_results = self.analyze_tracks()
        with open(summary_file, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['Date', 'Metric', 'Value'])
            for metric, value in tracking_results.items():
                if isinstance(value, (int, float)):
                    writer.writerow([date, metric, value])

        self.logger.info(f"Saved tracking data to {output_dir}")

    def update_person_info(self, person_id, frame, bbox, camera_id, timestamp, features):
        """Update person information with improved tracking"""
        if person_id not in self.tracked_individuals:
            self.tracked_individuals[person_id] = PersonInfo(person_id)

        person_info = self.tracked_individuals[person_id]
        person_info.update_features(features)
        person_info.update_position(bbox, timestamp)

        # Camera-specific updates
        if camera_id == 'camera1':
            if not person_info.entered_camera1:
                if self.is_in_door_area(bbox, camera_id):
                    person_info.entered_camera1 = True
                    person_info.camera1_entry_time = timestamp
                    self.camera1_entries.add(person_id)
                    self.logger.info(f"New entry in Camera 1: ID {person_id}")

        elif camera_id == 'camera2':
            if person_info.entered_camera1:
                transit_time = timestamp - person_info.camera1_entry_time
                if 30 <= transit_time <= 300:  # 30s to 5min
                    if person_id not in self.camera1_to_camera2:
                        self.camera1_to_camera2.add(person_id)
                        self.logger.info(
                            f"Valid transition to Camera 2: ID {person_id}")

        # Update camera times
        if camera_id not in person_info.camera_times:
            person_info.camera_times[camera_id] = {
                'first': timestamp,
                'last': timestamp
            }
        else:
            person_info.camera_times[camera_id]['last'] = timestamp

# PersonInfo

In [124]:
class PersonInfo:
    def __init__(self, person_id):
        # Basic identification
        self.person_id = person_id

        # Appearance tracking
        self.features = []  # ReID features history
        self.appearances = []  # Image patches history

        # Position tracking
        self.prev_positions = []  # List of (position, timestamp) tuples
        self.last_position = None
        self.last_seen = None
        self.last_camera = None
        self.last_bbox = None
        self.track_quality = 1.0
        self.consecutive_misses = 0

        # Entry/Exit tracking
        self.entry_recorded = False
        self.exit_recorded = False
        self.entered_camera1 = False
        self.has_exited_camera1 = False
        self.camera1_entry_time = None
        self.camera1_exit_time = None

        # Camera timestamps
        self.camera_times = {}  # {camera_id: {'first': timestamp, 'last': timestamp}}

        # Track status
        self.hits = 0  # Number of times detected
        self.time_since_update = 0

    def update_features(self, new_features):
        """Store multiple features for better matching"""
        feat = np.array(new_features).flatten()
        feat = feat / np.linalg.norm(feat)  # Normalize feature vector
        self.features.append(feat)
        if len(self.features) > 5:  # Keep last 5 features
            self.features.pop(0)

    def update_appearance(self, image):
        """Store appearance image patches"""
        if image.size > 0:  # Only store valid images
            self.appearances.append(image.copy())
            if len(self.appearances) > 5:  # Keep last 5 appearances
                self.appearances.pop(0)

    def update_position(self, bbox, timestamp):
        """Update position with timestamp and track quality"""
        if bbox is None:
            self.consecutive_misses += 1
            self.track_quality *= 0.9
            return False

        center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)

        # Store position history
        self.prev_positions.append((center, timestamp))
        if len(self.prev_positions) > 60:  # 10 seconds at 6fps
            self.prev_positions.pop(0)

        # Update tracking info
        self.last_position = center
        self.last_bbox = bbox
        self.last_seen = timestamp
        self.hits += 1
        self.consecutive_misses = 0

        # Update track quality
        self.track_quality = min(1.0, self.track_quality + 0.1)
        self.time_since_update = 0

        return True

    def get_velocity(self):
        """Calculate current velocity from recent positions"""
        if len(self.prev_positions) < 2:
            return None

        recent_pos = self.prev_positions[-2:]
        time_diff = recent_pos[1][1] - recent_pos[0][1]

        if time_diff > 0:
            dx = recent_pos[1][0][0] - recent_pos[0][0][0]
            dy = recent_pos[1][0][1] - recent_pos[0][0][1]
            return (dx/time_diff, dy/time_diff)

        return None

    def predict_position(self, timestamp):
        """Predict position at given timestamp using velocity"""
        if self.last_position is None or self.last_seen is None:
            return None

        velocity = self.get_velocity()
        if velocity is None:
            return self.last_position

        time_gap = timestamp - self.last_seen
        predicted_x = self.last_position[0] + velocity[0] * time_gap
        predicted_y = self.last_position[1] + velocity[1] * time_gap

        return (predicted_x, predicted_y)

    def get_track_status(self):
        """Get current status of the track"""
        return {
            'id': self.person_id,
            'quality': self.track_quality,
            'hits': self.hits,
            'misses': self.consecutive_misses,
            'last_camera': self.last_camera,
            'time_since_update': self.time_since_update,
            'entered_camera1': self.entered_camera1,
            'has_exited_camera1': self.has_exited_camera1
        }

    def is_track_valid(self):
        """Check if track is still valid based on quality and hits"""
        return (self.track_quality > 0.3 and
                self.hits >= 3 and
                self.consecutive_misses <= 10)

# Main

In [120]:
def main():
    # Initialize tracker
    tracker = ImprovedMultiCameraTracker()

    # Process videos
    for video_file in video_files:
        camera_id = 'camera1' if 'Camera_1' in str(video_file) else 'camera2'
        cap = cv2.VideoCapture(str(video_file))

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
            processed_frame = tracker.process_frame(
                frame, camera_id, timestamp)

            if processed_frame is not None:
                cv2.imshow(f"Camera {camera_id}", processed_frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

        cap.release()

    # Get results
    results = tracker.get_tracking_results()

In [None]:
if __name__ == "__main__":
    main()

In [127]:
# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

try:
    # Initialize tracker
    tracker = ImprovedPersonTracker()
    logger.info("Person tracker initialized successfully")

    # Define video directory paths
    video_dir = os.path.join('C:\\Users', 'mc1159', 'OneDrive - University of Exeter',
                             'Documents', 'VISIONARY', 'Durham Experiment', 'test_data')

    output_dir = os.path.join(video_dir, 'tracking_results')
    os.makedirs(output_dir, exist_ok=True)

    # Process videos and get results
    tracker.process_videos(video_dir, output_dir)
    results = tracker.analyze_tracks()

    # Print tracking results
    print("\nTracking Results:")
    print(f"Total unique individuals: {results['total_unique_individuals']}")
    print(f"Total entries in Camera 1: {results['camera1_entries']}")
    print(f"Total entries in Camera 2: {results['camera2_entries']}")
    print(
        f"People moving from Camera 1 to Camera 2: {results['camera1_to_camera2_count']}")

    if 'average_transit_time' in results:
        print(
            f"Average transit time between cameras: {results['average_transit_time']:.2f} seconds")

    # Log completion
    logger.info("Video processing completed successfully")

except FileNotFoundError as e:
    logger.error(f"Error: Video directory or files not found - {e}")
except Exception as e:
    logger.error(f"Error during tracking: {e}")
    raise

2025-01-02 22:56:25,039 - INFO - ReID model moved to GPU
2025-01-02 22:56:25,039 - INFO - Person tracker initialized successfully
2025-01-02 22:56:25,039 - INFO - Person tracker initialized successfully
2025-01-02 22:56:25,049 - INFO - Reset tracking state for new date
2025-01-02 22:56:25,050 - INFO - 
Processing videos for date: 20241101
2025-01-02 22:56:25,051 - INFO - Processing C:\Users\mc1159\OneDrive - University of Exeter\Documents\VISIONARY\Durham Experiment\test_data\Camera_1_20241101.mp4



0: 384x640 6 persons, 3 chairs, 2 potted plants, 1 dining table, 1 tv, 3 laptops, 1 keyboard, 100.6ms
Speed: 0.0ms preprocess, 100.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 2 chairs, 1 potted plant, 2 tvs, 3 laptops, 1 keyboard, 98.0ms
Speed: 3.0ms preprocess, 98.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 4 chairs, 2 potted plants, 2 dining tables, 2 tvs, 4 laptops, 1 keyboard, 93.8ms
Speed: 3.0ms preprocess, 93.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 3 chairs, 2 potted plants, 2 dining tables, 1 tv, 4 laptops, 1 keyboard, 83.5ms
Speed: 0.0ms preprocess, 83.5ms inference, 16.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 4 chairs, 2 potted plants, 2 dining tables, 1 tv, 5 laptops, 1 keyboard, 95.5ms
Speed: 3.4ms preprocess, 95.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 

2025-01-02 22:56:42,397 - INFO - Processed 100/3596 frames (2.8%) from camera1



0: 384x640 10 persons, 5 chairs, 1 potted plant, 2 dining tables, 3 tvs, 1 laptop, 121.1ms
Speed: 0.0ms preprocess, 121.1ms inference, 3.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 4 chairs, 1 potted plant, 1 dining table, 3 tvs, 1 laptop, 110.4ms
Speed: 4.1ms preprocess, 110.4ms inference, 5.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 3 chairs, 1 potted plant, 2 dining tables, 3 tvs, 2 laptops, 103.3ms
Speed: 4.0ms preprocess, 103.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 3 chairs, 1 potted plant, 1 dining table, 3 tvs, 2 laptops, 104.4ms
Speed: 1.7ms preprocess, 104.4ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 5 chairs, 1 potted plant, 2 dining tables, 2 tvs, 3 laptops, 93.6ms
Speed: 4.0ms preprocess, 93.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 1 bottle, 3 chairs, 1 potted pl

2025-01-02 22:57:00,978 - INFO - Processed 200/3596 frames (5.6%) from camera1



0: 384x640 9 persons, 3 chairs, 2 dining tables, 1 tv, 1 laptop, 106.1ms
Speed: 0.0ms preprocess, 106.1ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 3 chairs, 3 dining tables, 1 tv, 1 laptop, 94.9ms
Speed: 12.0ms preprocess, 94.9ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 3 chairs, 1 tv, 1 laptop, 99.9ms
Speed: 0.0ms preprocess, 99.9ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 3 chairs, 1 potted plant, 2 tvs, 3 laptops, 100.2ms
Speed: 0.0ms preprocess, 100.2ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 2 chairs, 1 potted plant, 1 tv, 3 laptops, 88.3ms
Speed: 3.0ms preprocess, 88.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 3 chairs, 1 potted plant, 1 dining table, 1 tv, 3 laptops, 86.9ms
Speed: 5.0ms preprocess, 86.9ms inference, 0.0ms postprocess per image 

2025-01-02 22:57:18,157 - INFO - Processed 300/3596 frames (8.3%) from camera1



0: 384x640 11 persons, 5 chairs, 1 potted plant, 1 dining table, 3 tvs, 2 laptops, 100.2ms
Speed: 0.0ms preprocess, 100.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 5 chairs, 1 potted plant, 1 dining table, 3 tvs, 1 laptop, 94.4ms
Speed: 3.0ms preprocess, 94.4ms inference, 5.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 5 chairs, 1 potted plant, 2 dining tables, 1 tv, 93.9ms
Speed: 1.9ms preprocess, 93.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 5 chairs, 1 potted plant, 1 dining table, 1 tv, 1 laptop, 103.9ms
Speed: 0.0ms preprocess, 103.9ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 5 chairs, 1 potted plant, 1 dining table, 1 tv, 1 laptop, 87.9ms
Speed: 3.0ms preprocess, 87.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 4 chairs, 1 potted plant, 1 dining table, 2 tvs, 

2025-01-02 22:57:35,818 - INFO - Processed 400/3596 frames (11.1%) from camera1



0: 384x640 8 persons, 3 chairs, 1 dining table, 3 tvs, 3 laptops, 94.7ms
Speed: 15.6ms preprocess, 94.7ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 3 chairs, 1 dining table, 2 tvs, 3 laptops, 96.3ms
Speed: 4.0ms preprocess, 96.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 4 chairs, 1 dining table, 2 tvs, 3 laptops, 94.6ms
Speed: 8.3ms preprocess, 94.6ms inference, 3.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 4 chairs, 1 dining table, 2 tvs, 2 laptops, 94.2ms
Speed: 3.0ms preprocess, 94.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 4 chairs, 1 dining table, 2 tvs, 2 laptops, 81.5ms
Speed: 8.5ms preprocess, 81.5ms inference, 18.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 3 chairs, 1 dining table, 2 tvs, 1 laptop, 94.0ms
Speed: 3.0ms preprocess, 94.0ms inference, 0.0ms postprocess per imag

2025-01-02 22:57:53,741 - INFO - Processed 500/3596 frames (13.9%) from camera1



0: 384x640 9 persons, 2 chairs, 1 potted plant, 2 dining tables, 2 tvs, 2 laptops, 1 remote, 83.3ms
Speed: 16.6ms preprocess, 83.3ms inference, 15.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 2 chairs, 1 potted plant, 2 dining tables, 1 tv, 2 laptops, 1 remote, 95.1ms
Speed: 4.0ms preprocess, 95.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 2 chairs, 1 potted plant, 2 dining tables, 1 tv, 2 laptops, 1 remote, 97.3ms
Speed: 4.0ms preprocess, 97.3ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 2 chairs, 1 potted plant, 2 dining tables, 1 tv, 2 laptops, 1 remote, 97.0ms
Speed: 11.6ms preprocess, 97.0ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 2 chairs, 1 potted plant, 2 dining tables, 1 tv, 1 laptop, 96.3ms
Speed: 2.1ms preprocess, 96.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons,

2025-01-02 22:58:11,342 - INFO - Processed 600/3596 frames (16.7%) from camera1



0: 384x640 11 persons, 3 chairs, 1 potted plant, 1 tv, 3 laptops, 197.9ms
Speed: 4.0ms preprocess, 197.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 2 chairs, 1 potted plant, 1 tv, 3 laptops, 99.9ms
Speed: 4.0ms preprocess, 99.9ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 2 chairs, 1 potted plant, 1 dining table, 1 tv, 1 laptop, 1 remote, 97.0ms
Speed: 6.0ms preprocess, 97.0ms inference, 5.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 6 chairs, 1 potted plant, 1 dining table, 3 tvs, 3 laptops, 97.3ms
Speed: 4.0ms preprocess, 97.3ms inference, 3.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 persons, 5 chairs, 3 tvs, 1 laptop, 1 remote, 99.1ms
Speed: 4.0ms preprocess, 99.1ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 8 chairs, 1 potted plant, 2 dining tables, 3 tvs, 5 laptops, 98.8ms
Speed: 5.0ms prep

2025-01-02 22:58:30,258 - INFO - Processed 700/3596 frames (19.5%) from camera1



0: 384x640 8 persons, 5 chairs, 3 dining tables, 1 tv, 91.3ms
Speed: 1.5ms preprocess, 91.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 5 chairs, 1 dining table, 1 tv, 1 laptop, 91.3ms
Speed: 5.0ms preprocess, 91.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 5 chairs, 1 dining table, 1 tv, 1 laptop, 92.4ms
Speed: 3.0ms preprocess, 92.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 4 chairs, 1 potted plant, 1 dining table, 2 tvs, 95.6ms
Speed: 3.7ms preprocess, 95.6ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 1 bowl, 5 chairs, 2 potted plants, 1 dining table, 2 tvs, 1 laptop, 94.0ms
Speed: 5.0ms preprocess, 94.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 6 chairs, 2 potted plants, 1 dining table, 2 tvs, 1 laptop, 93.2ms
Speed: 4.9ms preprocess, 93.2ms inference

2025-01-02 22:58:41,821 - INFO - Processing interrupted by user
2025-01-02 22:58:41,861 - INFO - Saved tracking data to C:\Users\mc1159\OneDrive - University of Exeter\Documents\VISIONARY\Durham Experiment\test_data\tracking_results
2025-01-02 22:58:41,866 - INFO - Video processing completed or interrupted
2025-01-02 22:58:41,872 - INFO - Video processing completed successfully



Tracking Results:
Total unique individuals: 10
Total entries in Camera 1: 4
Total entries in Camera 2: 0
People moving from Camera 1 to Camera 2: 0
