# CCTV Analysis
This notebook demonstrates the capabilities of our CCTV analysis system for multi-camera person tracking and analysis.


## Initial Setup

In [1]:
import os
import cv2
import numpy as np
from ultralytics import YOLO
import torchreid
import torch
from collections import defaultdict
import os
import time
from datetime import datetime
from scipy.spatial.distance import cdist
from scipy.optimize import linear_sum_assignment
import scipy.spatial.distance as distance
from pathlib import Path


Run 'pip install torchvision==0.19' to fix torchvision or 'pip install -U torch torchvision' to update both.
For a full compatibility table see https://github.com/pytorch/vision#installation


  Referenced from: <CAF361F5-1CAC-3EBE-9FC4-4B823D275CAA> /Users/chenm/miniconda3/envs/reid-env/lib/python3.8/site-packages/torchvision/image.so
  warn(


In [2]:
# Set the working directory
working_directory = os.path.join(os.path.expanduser("~"), "Library", "CloudStorage", 
                              "OneDrive-UniversityofExeter", "Documents", "VISIONARY", 
                              "Durham Experiment", "test_data")
# working_directory = os.path.join(os.path.expanduser("~"), "Library", "CloudStorage", 
#                               "OneDrive-UniversityofExeter", "Documents", "VISIONARY", 
#                               "Durham Experiment", "processed_data_3")
os.chdir(working_directory)

# Verify the current working directory
print(f"Current working directory: {os.getcwd()}")

Current working directory: /Users/chenm/Library/CloudStorage/OneDrive-UniversityofExeter/Documents/VISIONARY/Durham Experiment/test_data


## Preprocessing videos

In [3]:
# Get the .mp4 files in the folder
mp4_files = list(Path(working_directory).glob("*.mp4"))
# Print the .mp4 files without showing the parent directories
print([file.name for file in mp4_files])

['Camera_2_20241101.mp4', 'Camera_1_20241101.mp4']


In [4]:
# Filter the files for Camera_1 and Camera_2
camera_1_files = [file for file in mp4_files if file.name.startswith("Camera_1_")]
camera_2_files = [file for file in mp4_files if file.name.startswith("Camera_2_")]

# Sort the files by date extracted from the filename
camera_1_files_sorted = sorted(camera_1_files, key=lambda x: x.stem.split('_')[-1])
camera_2_files_sorted = sorted(camera_2_files, key=lambda x: x.stem.split('_')[-1])

print("Camera 1 files sorted by date:", [file.name for file in camera_1_files_sorted])
print("Camera 2 files sorted by date:", [file.name for file in camera_2_files_sorted])

del mp4_files, camera_1_files, camera_2_files

Camera 1 files sorted by date: ['Camera_1_20241101.mp4']
Camera 2 files sorted by date: ['Camera_2_20241101.mp4']


In [5]:
video_path = camera_1_files_sorted[0]

## Initate Video Analysis

## Samples

In [6]:
class TrackingState:
    ACTIVE = 'active'          # Fully visible
    OCCLUDED = 'occluded'      # Temporarily hidden
    TENTATIVE = 'tentative'    # New track
    LOST = 'lost'              # Missing too long

In [16]:
class TrackingState:
    ACTIVE = 'active'          # Fully visible
    OCCLUDED = 'occluded'      # Temporarily hidden
    TENTATIVE = 'tentative'    # New track
    LOST = 'lost'              # Missing too long

class PersonTracker:
    def __init__(self, video_path, output_dir="tracked_persons"):
        # Initialize YOLO model
        self.detector = YOLO("yolo11x.pt")
        
        # Initialize ReID model
        self.reid_model = torchreid.models.build_model(
            name='osnet_x1_0',
            num_classes=1000,
            pretrained=True
        )
        self.reid_model = self.reid_model.cuda() if torch.cuda.is_available() else self.reid_model
        self.reid_model.eval()
        
        # Initialize video capture
        self.cap = cv2.VideoCapture(video_path)
        self.frame_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.frame_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
        
        # Create output directory
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)
        
        # Initialize tracking variables
        self.active_tracks = {}  # Currently active tracks
        self.person_features = {}  # Historical features for each ID
        self.person_timestamps = {}  # Timestamp information
        self.next_id = 0
        
        # Tracking parameters
        self.similarity_threshold = 0.7
        self.max_disappeared = self.fps * 2  # Max frames to keep track without detection
        self.min_detection_confidence = 0.5
        self.feature_weight = 0.4   # Weight for ReID features in matching
        self.position_weight = 0.3  # Weight for absolute position (IoU)
        self.motion_weight = 0.3    # Weight for relative motion prediction
        
    def extract_features(self, person_crop):
        """Extract ReID features from person crop"""
        try:
            # Preprocess image for ReID
            img = cv2.resize(person_crop, (128, 256))
            img = torch.from_numpy(img).float()
            img = img.permute(2, 0, 1).unsqueeze(0)
            if torch.cuda.is_available():
                img = img.cuda()
                
            # Extract features
            with torch.no_grad():
                features = self.reid_model(img)
            return features.cpu().numpy()
        except Exception as e:
            print(f"Error extracting features: {e}")
            return None
            
    def calculate_box_center(self, box):
        """Calculate center point of a bounding box"""
        return [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2]

    def calculate_velocity(self, current_box, previous_box):
        """Calculate velocity vector between two boxes"""
        current_center = self.calculate_box_center(current_box)
        previous_center = self.calculate_box_center(previous_box)
        return [current_center[0] - previous_center[0], 
                current_center[1] - previous_center[1]]

    def predict_next_position(self, box, velocity):
        """Predict next position based on current position and velocity"""
        center = self.calculate_box_center(box)
        predicted_center = [center[0] + velocity[0], center[1] + velocity[1]]
        width = box[2] - box[0]
        height = box[3] - box[1]
        return [predicted_center[0] - width/2, predicted_center[1] - height/2,
                predicted_center[0] + width/2, predicted_center[1] + height/2]

    def calculate_motion_similarity(self, current_boxes, tracked_boxes, tracked_velocities):
        """Calculate motion-based similarity"""
        n_detections = len(current_boxes)
        n_tracks = len(tracked_boxes)
        motion_sim = np.zeros((n_detections, n_tracks))
        
        for i, current_box in enumerate(current_boxes):
            current_center = self.calculate_box_center(current_box)
            for j, (tracked_box, velocity) in enumerate(zip(tracked_boxes, tracked_velocities)):
                # Predict where the tracked box should be
                predicted_box = self.predict_next_position(tracked_box, velocity)
                predicted_center = self.calculate_box_center(predicted_box)
                
                # Calculate distance between prediction and actual position
                distance = np.sqrt(
                    (current_center[0] - predicted_center[0])**2 +
                    (current_center[1] - predicted_center[1])**2
                )
                # Convert distance to similarity (closer = more similar)
                motion_sim[i, j] = np.exp(-distance / 100.0)  # 100 is a scaling factor
                
        return motion_sim

    def detect_occlusion(self, box1, box2):
        """
        Detect if box1 is occluded by box2.
        Returns: 
            - is_occluded (bool): True if box1 is occluded by box2
            - occlusion_score (float): Degree of occlusion (0 to 1)
        """
        # Calculate IoU
        iou = self.calculate_iou(box1, box2)
        
        # Calculate centers and areas
        center1 = self.calculate_box_center(box1)
        center2 = self.calculate_box_center(box2)
        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
        
        # Calculate vertical position (y-coordinate)
        y1 = box1[3]  # bottom of box1
        y2 = box2[3]  # bottom of box2
        
        # Factors that suggest box1 is behind box2:
        # 1. Significant overlap
        overlap_factor = 1.0 if iou > 0.3 else 0.0
        
        # 2. Box2 is closer to camera (generally larger and lower in frame)
        size_factor = 1.0 if area2 > area1 else 0.0
        position_factor = 1.0 if y2 > y1 else 0.0
        
        # 3. Box1 is partially contained within box2
        contained_horizontally = (
            (box1[0] > box2[0] and box1[0] < box2[2]) or
            (box1[2] > box2[0] and box1[2] < box2[2])
        )
        contained_vertically = (
            (box1[1] > box2[1] and box1[1] < box2[3]) or
            (box1[3] > box2[1] and box1[3] < box2[3])
        )
        containment_factor = 1.0 if (contained_horizontally and contained_vertically) else 0.0
        
        # Calculate occlusion score (weighted combination of factors)
        occlusion_score = (
            0.4 * overlap_factor +
            0.2 * size_factor +
            0.2 * position_factor +
            0.2 * containment_factor
        )
        
        # Determine if occluded based on score threshold
        is_occluded = occlusion_score > 0.5
        
        return is_occluded, occlusion_score

    def calculate_similarity_matrix(self, current_features, current_boxes, tracked_features, tracked_boxes):
        """Calculate similarity matrix combining appearance, position, and motion"""
        n_detections = len(current_features)
        n_tracks = len(tracked_features)
        
        if n_detections == 0 or n_tracks == 0:
            return np.array([])
            
        # Calculate appearance similarity
        appearance_sim = 1 - distance.cdist(
            np.array([f.flatten() for f in current_features]), 
            np.array([f.flatten() for f in tracked_features]), 
            metric='cosine'
        )
        
        # Calculate position similarity using IoU
        position_sim = np.zeros((n_detections, n_tracks))
        for i, box1 in enumerate(current_boxes):
            for j, box2 in enumerate(tracked_boxes):
                position_sim[i, j] = self.calculate_iou(box1, box2)
        
        # Calculate velocities for tracked objects
        tracked_velocities = []
        for track_id in list(self.active_tracks.keys())[:n_tracks]:
            if 'previous_box' in self.active_tracks[track_id]:
                velocity = self.calculate_velocity(
                    self.active_tracks[track_id]['box'],
                    self.active_tracks[track_id]['previous_box']
                )
            else:
                velocity = [0, 0]  # No velocity for new tracks
            tracked_velocities.append(velocity)
        
        # Calculate motion similarity
        motion_sim = self.calculate_motion_similarity(current_boxes, tracked_boxes, tracked_velocities)
        
        # Combine all similarities
        similarity_matrix = (
            self.feature_weight * appearance_sim + 
            self.position_weight * position_sim +
            self.motion_weight * motion_sim
        )
        
        return similarity_matrix
    
    @staticmethod
    def calculate_iou(box1, box2):
        """Calculate IoU between two boxes"""
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])
        
        intersection = max(0, x2 - x1) * max(0, y2 - y1)
        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
        union = area1 + area2 - intersection
        
        return intersection / (union + 1e-6)
    
    def update_feature_history(self, track_id, features):
        """Maintain rolling window of recent features"""
        self.appearance_history[track_id].append(features)
        if len(self.appearance_history[track_id]) > self.max_history_length:
            self.appearance_history[track_id].pop(0)
            
        # Update feature representation using exponential moving average
        if track_id in self.person_features:
            alpha = 0.7  # Weight for historical features
            current_features = self.person_features[track_id]
            updated_features = alpha * current_features + (1 - alpha) * features
            self.person_features[track_id] = updated_features
        else:
            self.person_features[track_id] = features

    def recover_lost_tracklet(self, features, current_box, frame_time):
        """Attempt to recover lost tracks"""
        best_match_id = None
        best_match_score = 0
        
        # Check recently lost tracks
        lost_tracks_to_remove = []
        for lost_id, lost_info in self.lost_tracks.items():
            # Skip if lost track is too old
            if frame_time - lost_info['last_seen'] > self.max_lost_age:
                lost_tracks_to_remove.append(lost_id)
                continue
                
            # Calculate appearance similarity
            lost_features = lost_info['features']
            appearance_sim = 1 - distance.cosine(features.flatten(), lost_features.flatten())
            
            # Calculate position similarity based on predicted movement
            predicted_box = self.predict_next_position(
                lost_info['box'],
                lost_info['velocity']
            )
            position_sim = self.calculate_iou(current_box, predicted_box)
            
            # Combine similarities
            match_score = (
                self.feature_weight * appearance_sim +
                self.position_weight * position_sim
            )
            
            # Check temporal consistency
            if match_score > 0.6 and match_score > best_match_score:
                best_match_score = match_score
                best_match_id = lost_id
        
        # Clean up old lost tracks
        for lost_id in lost_tracks_to_remove:
            del self.lost_tracks[lost_id]
            
        return best_match_id if best_match_score > 0.6 else None

    def update_tracks(self, frame, detections, frame_time):
        """Update tracks with new detections"""
        current_boxes = []
        current_features = []
        
        # Process new detections
        for box, conf in detections:
            if conf < self.min_detection_confidence:
                continue
                
            x1, y1, x2, y2 = map(int, box)
            person_crop = frame[y1:y2, x1:x2]
            if person_crop.size == 0:
                continue
                
            features = self.extract_features(person_crop)
            if features is not None:
                current_boxes.append([x1, y1, x2, y2])
                current_features.append(features)
        
        # Get tracked boxes and features
        tracked_boxes = []
        tracked_features = []
        tracked_ids = []
        
        for track_id, track_info in self.active_tracks.items():
            tracked_boxes.append(track_info['box'])
            tracked_features.append(track_info['features'])
            tracked_ids.append(track_id)
        
        # Calculate similarity matrix
        similarity_matrix = self.calculate_similarity_matrix(
            current_features, current_boxes,
            tracked_features, tracked_boxes
        )
        
        # Perform matching
        matched_indices = []
        if similarity_matrix.size > 0:
            row_ind, col_ind = linear_sum_assignment(-similarity_matrix)
            matched_indices = list(zip(row_ind, col_ind))
        
        # Process matches
        unmatched_detections = []
        matched_track_ids = set()
        
        for detection_idx, track_idx in matched_indices:
            similarity = similarity_matrix[detection_idx, track_idx]
            if similarity >= self.similarity_threshold:
                track_id = tracked_ids[track_idx]
                matched_track_ids.add(track_id)
                
                # Update track
                self.active_tracks[track_id].update({
                    'previous_box': self.active_tracks[track_id]['box'],
                    'box': current_boxes[detection_idx],
                    'features': current_features[detection_idx],
                    'last_seen': frame_time,
                    'disappeared': 0
                })
                
                # Update timestamps
                self.person_timestamps[track_id]['last_appearance'] = frame_time
                
                # Save person image
                self.save_person_image(track_id, 
                    frame[current_boxes[detection_idx][1]:current_boxes[detection_idx][3],
                          current_boxes[detection_idx][0]:current_boxes[detection_idx][2]])
            else:
                unmatched_detections.append(detection_idx)
        
        # Add unmatched detections as new tracks
        for detection_idx in range(len(current_features)):
            if not any(detection_idx == m[0] for m in matched_indices):
                new_id = self.next_id
                self.next_id += 1
                
                self.active_tracks[new_id] = {
                    'state': TrackingState.TENTATIVE,
                    'occlusion_counter': 0,
                    'box': current_boxes[detection_idx],
                    'features': current_features[detection_idx],
                    'last_seen': frame_time,
                    'disappeared': 0,
                    'velocity': [0, 0]  # Initialize velocity for new tracks
                }
                
                self.person_features[new_id] = [current_features[detection_idx]]
                self.person_timestamps[new_id] = {
                    'first_appearance': frame_time,
                    'last_appearance': frame_time
                }
                
                # Save person image
                self.save_person_image(new_id, 
                    frame[current_boxes[detection_idx][1]:current_boxes[detection_idx][3],
                          current_boxes[detection_idx][0]:current_boxes[detection_idx][2]])
        
        # Update disappeared tracks
        current_time = frame_time
        tracks_to_remove = []
        
        for track_id in self.active_tracks:
            if track_id not in matched_track_ids:
                self.active_tracks[track_id]['disappeared'] += 1
                if self.active_tracks[track_id]['disappeared'] > self.max_disappeared:
                    tracks_to_remove.append(track_id)
        
        # Remove old tracks
        for track_id in tracks_to_remove:
            del self.active_tracks[track_id]
    
    def save_person_image(self, person_id, frame):
        """Save person image to output directory"""
        person_dir = os.path.join(self.output_dir, f"person_{person_id}")
        os.makedirs(person_dir, exist_ok=True)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
        cv2.imwrite(os.path.join(person_dir, f"{timestamp}.jpg"), frame)
    
    def process_video(self):
        frame_count = 0
        
        while True:
            ret, frame = self.cap.read()
            if not ret:
                break
                
            frame_time = frame_count / self.fps
            frame_count += 1
            
            # Detect persons using YOLO
            results = self.detector(frame, classes=[0])  # class 0 is person
            
            # Process detections
            detections = []
            for result in results:
                boxes = result.boxes.cpu().numpy()
                for box in boxes:
                    detections.append((box.xyxy[0], box.conf[0]))
            
            # Update tracking
            self.update_tracks(frame, detections, frame_time)
            
            # Visualize results
            for track_id, track_info in self.active_tracks.items():
                box = track_info['box']
                cv2.rectangle(frame, (int(box[0]), int(box[1])), 
                            (int(box[2]), int(box[3])), (0, 255, 0), 2)
                cv2.putText(frame, f"ID: {track_id}", 
                          (int(box[0]), int(box[1])-10),
                          cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
            
            # Display frame
            cv2.imshow('Tracking', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        
        self.cap.release()
        cv2.destroyAllWindows()
        
        return self.generate_report()
    
    def generate_report(self):
        """Generate tracking report"""
        report = {
            'total_unique_persons': self.next_id,
            'person_details': {}
        }
        
        for person_id in self.person_timestamps.keys():
            report['person_details'][person_id] = {
                'first_appearance': self.person_timestamps[person_id]['first_appearance'],
                'last_appearance': self.person_timestamps[person_id]['last_appearance'],
                'duration': self.person_timestamps[person_id]['last_appearance'] - 
                          self.person_timestamps[person_id]['first_appearance'],
                'image_path': os.path.join(self.output_dir, f"person_{person_id}")
            }
            
        return report

In [17]:
video_path = video_path  # Replace with your video path
tracker = PersonTracker(video_path)
report = tracker.process_video()
print(f"Total unique persons detected: {report['total_unique_persons']}")
for person_id, details in report['person_details'].items():
    print(f"\nPerson ID: {person_id}")
    print(f"First appearance: {details['first_appearance']:.2f}s")
    print(f"Last appearance: {details['last_appearance']:.2f}s")
    print(f"Duration in video: {details['duration']:.2f}s")
    print(f"Images saved in: {details['image_path']}")

Successfully loaded imagenet pretrained weights from "/Users/chenm/.cache/torch/checkpoints/osnet_x1_0_imagenet.pth"

0: 384x640 8 persons, 186.6ms
Speed: 1.7ms preprocess, 186.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 190.8ms
Speed: 1.5ms preprocess, 190.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 197.0ms
Speed: 1.5ms preprocess, 197.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)



2024-12-28 14:56:27.974 python[3686:15990045] +[IMKClient subclass]: chose IMKClient_Modern
2024-12-28 14:56:27.974 python[3686:15990045] +[IMKInputSession subclass]: chose IMKInputSession_Modern


0: 384x640 7 persons, 193.8ms
Speed: 1.4ms preprocess, 193.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 192.4ms
Speed: 1.4ms preprocess, 192.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 199.2ms
Speed: 1.4ms preprocess, 199.2ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 178.8ms
Speed: 1.3ms preprocess, 178.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 182.7ms
Speed: 1.4ms preprocess, 182.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 176.4ms
Speed: 1.4ms preprocess, 176.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 181.7ms
Speed: 1.3ms preprocess, 181.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 182.0ms
Speed: 1.4ms preprocess, 182.0ms inference, 1.0ms postprocess per i

KeyboardInterrupt: 