# CCTV Cross-Camera Person Tracking and Analysis
# ============================================

This notebook implements cross-camera person tracking and re-identification using:

- YOLOX for person detection
- ByteTracker for single-camera tracking
- Deep Person ReID for cross-camera person re-identification
- InsightFace for demographic analysis

## Setup and Imports

In [1]:
from cctv_analysis.matcher import PersonMatcher
from cctv_analysis.demographics import DemographicAnalyzer
from cctv_analysis.reid import PersonReID
from cctv_analysis.detector import PersonDetector
import os
import sys
import cv2
import numpy as np
import torch
from datetime import datetime
import pandas as pd
from tqdm.notebook import tqdm

# Add project root to path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

  check_for_updates()


## Configuration

In [3]:
# Check GPU availability
if torch.cuda.is_available():
    print(f"Using GPU: {torch.cuda.get_device_name()}")
    device = 'cuda'
else:
    print("GPU not available, using CPU")
    device = 'cpu'

# Initialize models
detector = PersonDetector(
    model_path='../models/detector/yolox_l.pth',
    model_size='l',
    device=device
)

reid_model = PersonReID(
    model_path='../models/reid/osnet_x1_0.pth',
    device=device
)

demographic_analyzer = DemographicAnalyzer(device=device)

# Initialize matcher
matcher = PersonMatcher(similarity_threshold=0.75)

Using GPU: NVIDIA T1000 8GB
Using GPU: NVIDIA T1000 8GB
GPU Memory Available: 8.00 GB
Successfully loaded YOLOX-L model
Using GPU: NVIDIA T1000 8GB
GPU Memory Available: 8.00 GB
Successfully loaded pretrained weights from ../models/reid/osnet_x1_0.pth
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CUDAExecutionProvider': {'device_id': '0', 'has_user_compute_stream': '0', 'cudnn_conv1d_pad_to_nc1d': '0', 'user_compute_stream': '0', 'gpu_external_alloc': '0', 'gpu_mem_limit': '18446744073709551615', 'enable_cuda_graph': '0', 'gpu_external_free': '0', 'gpu_external_empty_cache': '0', 'arena_extend_strategy': 'kNextPowerOfTwo', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'do_copy_in_default_stream': '1', 'cudnn_conv_use_max_workspace': '1', 'tunable_op_enable': '0', 'tunable_op_tuning_enable': '0', 'tunable_op_max_tuning_duration_ms': '0', 'enable_skip_layer_norm_strict_mode': '0', 'prefer_nhwc': '0', 'use_ep_level_unified_stream': '0', 'use_tf32': '1', '

## Analyse the CCTV footage

In [4]:
def process_video(video_path, camera_id, start_time):
    """Process video and add detections to matcher"""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video: {video_path}")
        return

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Process frames with progress bar
    for frame_idx in tqdm(range(frame_count), desc=f"Processing Camera {camera_id}"):
        ret, frame = cap.read()
        if not ret:
            break

        # Calculate timestamp
        timestamp = start_time + pd.Timedelta(seconds=frame_idx/fps)

        # Detect persons
        detections = detector.detect(frame, conf_thresh=0.5)

        for i, det in enumerate(detections):
            x1, y1, x2, y2, conf = det
            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])

            # Extract person crop
            person_crop = frame[y1:y2, x1:x2]
            if person_crop.size == 0:
                continue

            # Extract ReID features
            features = reid_model.extract_features(person_crop)

            # Analyze demographics
            demographics = demographic_analyzer.analyze(person_crop)

            # Add to matcher
            matcher.add_person(
                camera_id=camera_id,
                person_id=len(matcher.camera1_persons if camera_id ==
                              1 else matcher.camera2_persons),
                timestamp=timestamp,
                features=features,
                demographics=demographics[0] if demographics else None
            )

        # Clear GPU memory periodically
        if device == 'cuda' and frame_idx % 100 == 0:
            torch.cuda.empty_cache()

    cap.release()

In [5]:
# Process videos
video1_path = '../data/videos/video1.mp4'
video2_path = '../data/videos/video2.mp4'

# Assuming videos start at these times (adjust as needed)
video1_start = pd.Timestamp('2024-01-01 09:00:00')
video2_start = pd.Timestamp('2024-01-01 09:00:00')

# Process both videos
process_video(video1_path, camera_id=1, start_time=video1_start)
process_video(video2_path, camera_id=2, start_time=video2_start)

Processing Camera 1:   0%|          | 0/617 [00:00<?, ?it/s]

Processing Camera 2:   0%|          | 0/219 [00:00<?, ?it/s]

In [8]:
# Test on a single frame
cap = cv2.VideoCapture('../data/videos/video1.mp4')
ret, frame = cap.read()
cap.release()

if ret:
    # Try different confidence thresholds
    for conf in [0.1, 0.2, 0.3, 0.4, 0.5]:
        detections = detector.detect(frame, conf_thresh=conf)
        print(f"\nConfidence threshold: {conf}")
        print(f"Number of detections: {detections}")


Confidence threshold: 0.1
Number of detections: []

Confidence threshold: 0.2
Number of detections: []

Confidence threshold: 0.3
Number of detections: []

Confidence threshold: 0.4
Number of detections: []

Confidence threshold: 0.5
Number of detections: []


In [None]:
# Initialize detector with more sensitive settings
detector = PersonDetector(
    model_path='../models/detector/yolox_l.pth',
    model_size='l',
    device='cuda'
)


def process_frame(frame, timestamp, camera_id):
    """Process a single frame"""
    # Use lower confidence threshold for detection
    detections = detector.detect(frame, conf_thresh=0.3)  # Lowered from 0.5

    for det in detections:
        x1, y1, x2, y2, conf = det
        x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])

        # Extract person crop
        person_crop = frame[y1:y2, x1:x2]
        if person_crop.size == 0:
            continue

        # Get ReID features and demographics
        features = reid_model.extract_features(person_crop)
        demographics = demographic_analyzer.analyze(person_crop)

        # Add to matcher
        matcher.add_person(
            camera_id=camera_id,
            person_id=len(matcher.camera1_persons if camera_id == 1
                          else matcher.camera2_persons),
            timestamp=timestamp,
            features=features,
            demographics=demographics[0] if demographics else None
        )

    return len(detections)

# Optional: Add visualization to check detections


def visualize_detections(video_path, output_path, max_frames=None):
    """Visualize detections in video"""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video: {video_path}")
        return

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    writer = cv2.VideoWriter(
        output_path,
        cv2.VideoWriter_fourcc(*'mp4v'),
        fps,
        (width, height)
    )

    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if max_frames and frame_count >= max_frames:
            break

        # Detect persons
        detections = detector.detect(frame, conf_thresh=0.3)

        # Draw detections
        frame_viz = detector.draw_detections(frame, detections)

        writer.write(frame_viz)
        frame_count += 1

        if frame_count % 100 == 0:
            print(f"Processed {frame_count} frames")

    cap.release()
    writer.release()
    print(f"Visualization saved to {output_path}")


# Visualize first few frames to check detection quality
visualize_detections(
    '../data/videos/video1.mp4',
    '../data/detection_test_cam1.mp4',
    max_frames=1000  # Adjust as needed
)

Using GPU: NVIDIA T1000 8GB
GPU Memory Available: 8.00 GB
Successfully loaded YOLOX-L model


AttributeError: module 'cv2' has no attribute 'CAP_PROP_HEIGHT'

In [8]:
# Initialize matcher with more lenient parameters
matcher = PersonMatcher(
    similarity_threshold=0.5,  # Lower threshold for more matches
    max_time_diff=3600  # Maximum 1 hour time difference
)

# After processing videos, print detailed statistics
matcher.print_matching_stats()

# Visualize some example matches
matcher.visualize_matches(n_samples=5)

# Get matches and analyze results
matches = matcher.get_matches()
df_matches = pd.DataFrame(matches)

if not df_matches.empty:
    print("\nMatching Results:")
    print(f"Total matches found: {len(df_matches)}")

    # Show similarity score distribution
    print("\nSimilarity Score Statistics:")
    print(df_matches['similarity_score'].describe())

    # Show time difference distribution
    print("\nTime Difference Statistics (seconds):")
    print(df_matches['time_difference'].describe())

    # Demographics analysis
    demographics_df = pd.DataFrame(
        [m['demographics'] for m in matches if m['demographics']])
    if not demographics_df.empty:
        print("\nDemographic breakdown:")
        print("\nGender distribution:")
        print(demographics_df['gender'].value_counts())
        print("\nAge group distribution:")
        print(demographics_df['age_group'].value_counts())

    # Save detailed results
    output_path = '../data/analysis_results.csv'
    df_matches.to_csv(output_path, index=False)
    print(f"\nDetailed results saved to {output_path}")
else:
    print("No matches found")


Matching Statistics:
Number of people detected in Camera 1: 0
Number of people detected in Camera 2: 0
Number of matches found: 0
No matches to visualize
No matches found


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

if not df_matches.empty:
    # Create a figure with two subplots
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Plot similarity score distribution
    sns.histplot(df_matches['similarity_score'], ax=ax1)
    ax1.set_title('Distribution of Similarity Scores')
    ax1.set_xlabel('Similarity Score')

    # Plot time difference distribution
    sns.histplot(df_matches['time_difference'], ax=ax2)
    ax2.set_title('Distribution of Time Differences')
    ax2.set_xlabel('Time Difference (seconds)')

    plt.tight_layout()
    plt.show()

In [10]:
if not df_matches.empty:
    output_path = '../data/analysis_results.csv'
    df_matches.to_csv(output_path, index=False)
    print(f"Results saved to {output_path}")

In [None]:
# Function to create detailed detection information
import seaborn as sns
import matplotlib.pyplot as plt
import json


def create_detailed_detections(persons, camera_id):
    detections = []
    for person in persons:
        detection = {
            'camera_id': camera_id,
            'person_id': person.id,
            'timestamp': person.timestamp,
            'formatted_time': person.timestamp.strftime('%Y-%m-%d %H:%M:%S'),
            'gender': person.demographics.get('gender') if person.demographics else None,
            'age_group': person.demographics.get('age_group') if person.demographics else None,
            'confidence': person.demographics.get('confidence') if person.demographics else None,
            'feature_vector_mean': np.mean(person.features) if person.features is not None else None,
            'feature_vector_std': np.std(person.features) if person.features is not None else None,
            'feature_vector_norm': np.linalg.norm(person.features) if person.features is not None else None
        }
        detections.append(detection)
    return detections


# Create detailed detections
camera1_detailed = create_detailed_detections(
    matcher.camera1_persons, camera_id=1)
camera2_detailed = create_detailed_detections(
    matcher.camera2_persons, camera_id=2)
all_detailed = camera1_detailed + camera2_detailed

# Convert to DataFrame
df_detailed = pd.DataFrame(all_detailed)

# Basic statistics
print("Detection Statistics:")
print("-" * 50)
print(f"Total detections: {len(df_detailed)}")
print(f"Camera 1 detections: {len(camera1_detailed)}")
print(f"Camera 2 detections: {len(camera2_detailed)}")

# Time analysis
print("\nTime Analysis:")
print("-" * 50)
if not df_detailed.empty:
    time_range = df_detailed['timestamp'].max() - \
        df_detailed['timestamp'].min()
    print(f"Time range covered: {time_range}")

    # Detections per minute
    duration_minutes = time_range.total_seconds() / 60
    detections_per_minute = len(df_detailed) / duration_minutes
    print(f"Average detections per minute: {detections_per_minute:.2f}")

# Demographic analysis
print("\nDemographic Analysis:")
print("-" * 50)
if 'gender' in df_detailed.columns:
    print("\nGender distribution by camera:")
    print(pd.crosstab(df_detailed['camera_id'],
          df_detailed['gender'], margins=True))

if 'age_group' in df_detailed.columns:
    print("\nAge group distribution by camera:")
    print(pd.crosstab(df_detailed['camera_id'],
          df_detailed['age_group'], margins=True))

# Feature vector analysis
print("\nFeature Vector Analysis:")
print("-" * 50)
numeric_cols = ['feature_vector_mean',
                'feature_vector_std', 'feature_vector_norm']
print(df_detailed[numeric_cols].describe())

# Export to CSV
output_path = '../data/detailed_detections.csv'
df_detailed.to_csv(output_path, index=False)
print(f"\nDetailed detections saved to {output_path}")

# Optional: Export feature vectors to a separate file
feature_vectors = {
    'camera1': {str(p.id): p.features.tolist() if p.features is not None else None
                for p in matcher.camera1_persons},
    'camera2': {str(p.id): p.features.tolist() if p.features is not None else None
                for p in matcher.camera2_persons}
}

feature_path = '../data/feature_vectors.json'
with open(feature_path, 'w') as f:
    json.dump(feature_vectors, f)
print(f"Feature vectors saved to {feature_path}")

# Visualizations

# Create visualizations
plt.figure(figsize=(15, 10))

# 1. Detections over time
plt.subplot(2, 2, 1)
df_detailed['timestamp'].hist(bins=50)
plt.title('Detections Over Time')
plt.xlabel('Time')
plt.ylabel('Number of Detections')

# 2. Gender distribution
plt.subplot(2, 2, 2)
if 'gender' in df_detailed.columns:
    df_detailed['gender'].value_counts().plot(kind='bar')
    plt.title('Gender Distribution')
    plt.xlabel('Gender')
    plt.ylabel('Count')

# 3. Age group distribution
plt.subplot(2, 2, 3)
if 'age_group' in df_detailed.columns:
    df_detailed['age_group'].value_counts().plot(kind='bar')
    plt.title('Age Group Distribution')
    plt.xlabel('Age Group')
    plt.ylabel('Count')

# 4. Feature vector norms
plt.subplot(2, 2, 4)
if 'feature_vector_norm' in df_detailed.columns:
    df_detailed['feature_vector_norm'].hist(bins=50)
    plt.title('Feature Vector Norms')
    plt.xlabel('Norm')
    plt.ylabel('Count')

plt.tight_layout()
plt.show()

# Display first few rows of the detailed data
print("\nFirst few detailed detections:")
display(df_detailed.head())

Detection Statistics:
--------------------------------------------------
Total detections: 0
Camera 1 detections: 0
Camera 2 detections: 0

Time Analysis:
--------------------------------------------------

Demographic Analysis:
--------------------------------------------------

Feature Vector Analysis:
--------------------------------------------------


KeyError: "None of [Index(['feature_vector_mean', 'feature_vector_std', 'feature_vector_norm'], dtype='object')] are in the [columns]"