In [15]:
# Install required packages (if not already installed)
!pip install torch torchvision opencv-python matplotlib ultralytics supervision



In [16]:

import cv2
import numpy as np
import torch
import torch.nn as nn
from torchvision import models
from pathlib import Path
from matplotlib import cm
from collections import defaultdict
from ultralytics import YOLO
import supervision as sv
import time
import sys

# Configuration
MODEL_PATH = r"C:\Users\ACEPC\Desktop\DeepVision Crowd Monitoring\models\best_crowd_counter_unified.pth"
ALERT_THRESHOLD = 30  # Adjust based on your needs
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ImageNet normalization
IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)

print(f"Using device: {DEVICE}")

Using device: cpu


In [17]:
# ============================================================================
# MODEL ARCHITECTURE
# ============================================================================

def create_csrnet():
    """Create CSRNet model architecture"""
    print("Building CSRNet architecture...")
    
    vgg = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
    features = list(vgg.features.children())
    
    frontend = nn.Sequential(*features[0:23])
    
    backend = nn.Sequential(
        nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2),
        nn.ReLU(inplace=True),
        nn.Conv2d(512, 512, kernel_size=3, padding=4, dilation=4),
        nn.ReLU(inplace=True),
        nn.Conv2d(512, 256, kernel_size=3, padding=1, dilation=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(256, 128, kernel_size=3, padding=1, dilation=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(128, 1, kernel_size=1, padding=0),
    )
    
    model = nn.Sequential(frontend, backend)
    print("‚úì CSRNet architecture created")
    
    return model


def load_trained_model(model_path, device=DEVICE):
    """Load trained CSRNet model"""
    print(f"Loading CSRNet from: {model_path}")
    
    model = create_csrnet()
    checkpoint = torch.load(model_path, map_location=device)
    
    if 'model_state' in checkpoint:
        model.load_state_dict(checkpoint['model_state'])
        print(f"‚úì Loaded checkpoint from epoch {checkpoint.get('epoch', 'unknown')}")
    else:
        model.load_state_dict(checkpoint)
        print("‚úì Loaded model weights")
    
    model.to(device)
    model.eval()
    
    print(f"‚úì CSRNet ready on {device}")
    return model


In [18]:
# ============================================================================
# PERSON TRACKER
# ============================================================================

class PersonTracker:
    """Multi-object tracker to prevent re-counting"""
    
    def __init__(self):
        self.tracker = sv.ByteTrack(
            track_activation_threshold=0.25,
            lost_track_buffer=30,
            minimum_matching_threshold=0.8,
            frame_rate=30
        )
        
        self.unique_ids = set()
        self.track_history = defaultdict(list)
        
    def update(self, detections):
        """Update tracker with new detections"""
        tracked_detections = self.tracker.update_with_detections(detections)
        
        if tracked_detections.tracker_id is not None:
            for track_id in tracked_detections.tracker_id:
                self.unique_ids.add(int(track_id))
        
        return tracked_detections
    
    def get_total_unique_count(self):
        """Get total unique people seen"""
        return len(self.unique_ids)
    
    def reset(self):
        """Reset tracker"""
        self.tracker = sv.ByteTrack(
            track_activation_threshold=0.25,
            lost_track_buffer=30,
            minimum_matching_threshold=0.8,
            frame_rate=30
        )
        self.unique_ids = set()
        self.track_history.clear()


In [19]:
# ============================================================================
# ENHANCED CROWD COUNTER
# ============================================================================

class EnhancedCrowdCounter:
    """Enhanced counter with detection + tracking + density"""
    
    def __init__(self, csrnet_model, device=DEVICE, csrnet_input_size=(512, 512)):
        self.csrnet = csrnet_model
        self.device = device
        self.mean = IMAGENET_MEAN
        self.std = IMAGENET_STD
        
        # CSRNet input size (Height, Width) - MUST match training size!
        # Your training: 512x512 with downsample factor 8
        self.csrnet_input_size = csrnet_input_size
        print(f"‚úì CSRNet will process images at: {csrnet_input_size[0]}x{csrnet_input_size[1]}")
        print(f"  (Matches training TARGET_SIZE)")
        
        # Load YOLOv8
        print("Loading YOLOv8 person detector...")
        self.yolo = YOLO('yolov8n.pt')
        print("‚úì YOLOv8 loaded")
        
        # Initialize tracker
        self.tracker = PersonTracker()
        
        # Annotators
        self.box_annotator = sv.BoxAnnotator(thickness=2)
        self.trace_annotator = sv.TraceAnnotator(thickness=2, trace_length=50)
    
    def preprocess_frame(self, frame):
        """Preprocess frame for CSRNet - resize to training size"""
        # Resize to CSRNet training size
        frame_resized = cv2.resize(frame, 
                                   (self.csrnet_input_size[1], self.csrnet_input_size[0]), 
                                   interpolation=cv2.INTER_LINEAR)
        
        # Convert BGR to RGB
        img_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)
        
        # Normalize
        img_normalized = img_rgb.astype(np.float32) / 255.0
        img_normalized = (img_normalized - self.mean) / self.std
        
        # Convert to tensor
        img_tensor = torch.from_numpy(img_normalized).permute(2, 0, 1).unsqueeze(0)
        img_tensor = img_tensor.to(self.device, dtype=torch.float32)
        
        return img_tensor
    
    def predict_density(self, frame):
        """Get density map from CSRNet"""
        with torch.no_grad():
            img_tensor = self.preprocess_frame(frame)
            density_map = self.csrnet(img_tensor)
            density_np = density_map.squeeze().cpu().numpy()
            count = float(density_np.sum())
        return density_np, count
    
    def detect_people(self, frame):
        """Detect people using YOLOv8"""
        results = self.yolo(frame, classes=[0], verbose=False)[0]
        detections = sv.Detections.from_ultralytics(results)
        return detections
    
    def predict_with_tracking(self, frame):
        """Complete prediction pipeline"""
        # Detect people
        detections = self.detect_people(frame)
        
        # Track people
        tracked_detections = self.tracker.update(detections)
        
        # Get counts
        detection_count = len(tracked_detections)
        unique_count = self.tracker.get_total_unique_count()
        
        # Get density map
        density_map, density_count = self.predict_density(frame)
        
        # Annotate frame
        annotated_frame = frame.copy()
        
        # Draw bounding boxes and IDs
        if len(tracked_detections) > 0:
            # Draw boxes
            annotated_frame = self.box_annotator.annotate(
                scene=annotated_frame,
                detections=tracked_detections
            )
            
            # Draw tracking traces
            annotated_frame = self.trace_annotator.annotate(
                scene=annotated_frame,
                detections=tracked_detections
            )
            
            # Draw labels manually
            if tracked_detections.tracker_id is not None:
                for bbox, track_id in zip(tracked_detections.xyxy, tracked_detections.tracker_id):
                    x1, y1, x2, y2 = map(int, bbox)
                    label = f"ID:{track_id}"
                    
                    # Draw label background
                    label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
                    cv2.rectangle(annotated_frame, 
                                (x1, y1 - label_size[1] - 10), 
                                (x1 + label_size[0] + 10, y1), 
                                (0, 255, 0), -1)
                    
                    # Draw label text
                    cv2.putText(annotated_frame, label, 
                              (x1 + 5, y1 - 5), 
                              cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
        
        return annotated_frame, density_map, detection_count, unique_count, density_count
    
    def create_heatmap_overlay(self, density_map, original_frame, alpha=0.4):
        """Create density heatmap overlay"""
        h, w = original_frame.shape[:2]
        density_resized = cv2.resize(density_map, (w, h), interpolation=cv2.INTER_CUBIC)
        
        density_max = density_resized.max()
        if density_max > 0:
            density_normalized = density_resized / density_max
        else:
            density_normalized = density_resized
        
        heatmap = cm.jet(density_normalized)[:, :, :3]
        heatmap = (heatmap * 255).astype(np.uint8)
        
        overlay = cv2.addWeighted(original_frame, 1-alpha, heatmap, alpha, 0)
        
        return overlay


In [None]:
# ============================================================================
# LIVE WEBCAM PROCESSING
# ============================================================================

def draw_info_panel(frame, det_count, unique_count, density_count, fps, is_alert):
    """Draw information panel on frame"""
    # Create semi-transparent background
    overlay = frame.copy()
    h, w = frame.shape[:2]
    
    # Top panel
    cv2.rectangle(overlay, (0, 0), (w, 180), (0, 0, 0), -1)
    cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame)
    
    # Title
    cv2.putText(frame, "CSRNet Crowd Counter - Live Feed", 
                (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
    
    # Alert status
    if is_alert:
        alert_text = f"ALERT! Count: {det_count} > Threshold: {ALERT_THRESHOLD}"
        color = (0, 0, 255)  # Red
    else:
        alert_text = f"Normal - Count: {det_count}"
        color = (0, 255, 0)  # Green
    
    cv2.putText(frame, alert_text, 
                (10, 65), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
    
    # Statistics
    stats_y = 100
    stats = [
        f"Detected: {det_count} people",
        f"Unique Tracked: {unique_count} people",
        f"Density Estimate: {int(density_count)}",
        f"FPS: {fps:.1f}"
    ]
    
    for i, stat in enumerate(stats):
        cv2.putText(frame, stat, 
                   (10, stats_y + i*25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    
    # Instructions at bottom
    cv2.putText(frame, "Press 'Q' to quit | 'R' to reset tracking | 'S' to save frame", 
                (10, h - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    
    return frame


def run_webcam_detection(counter, camera_index=0):
    """Run live webcam detection"""
    print("\n" + "="*60)
    print("STARTING LIVE WEBCAM DETECTION")
    print("="*60)
    print(f"Camera Index: {camera_index}")
    print(f"Alert Threshold: {ALERT_THRESHOLD}")
    print("\nControls:")
    print("  Q - Quit")
    print("  R - Reset tracking")
    print("  S - Save current frame")
    print("="*60 + "\n")
    
    # Open webcam
    cap = cv2.VideoCapture(camera_index)
    
    if not cap.isOpened():
        print("‚ùå Error: Cannot access webcam!")
        print("\nTroubleshooting:")
        print("  1. Check if another application is using the webcam")
        print("  2. Try different camera_index: 0, 1, 2...")
        print("  3. Check camera permissions")
        return
    
    # Set camera properties
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
    
    print("‚úì Webcam opened successfully")
    print("‚úì Starting detection loop...\n")
    
    # Statistics tracking
    frame_count = 0
    detection_counts = []
    density_counts = []
    fps_list = []
    
    try:
        while True:
            start_time = time.time()
            
            # Read frame
            ret, frame = cap.read()
            if not ret:
                print("‚ùå Failed to grab frame")
                break
            
            frame_count += 1
            
            # Process frame
            annotated, density_map, det_count, unique_count, dens_count = counter.predict_with_tracking(frame)
            
            # Create heatmap overlay
            overlay = counter.create_heatmap_overlay(density_map, annotated, alpha=0.3)
            
            # Update statistics
            detection_counts.append(det_count)
            density_counts.append(dens_count)
            
            # Keep last 100 frames
            if len(detection_counts) > 100:
                detection_counts.pop(0)
                density_counts.pop(0)
            
            # Calculate FPS
            fps = 1.0 / (time.time() - start_time)
            fps_list.append(fps)
            if len(fps_list) > 30:
                fps_list.pop(0)
            avg_fps = np.mean(fps_list)
            
            # Check alert
            is_alert = det_count > ALERT_THRESHOLD
            
            # Draw info panel
            display_frame = draw_info_panel(overlay, det_count, unique_count, 
                                          dens_count, avg_fps, is_alert)
            
            # Display
            cv2.imshow('CSRNet Crowd Counter - Live', display_frame)
            
            # Keyboard controls
            key = cv2.waitKey(1) & 0xFF
            
            if key == ord('q') or key == ord('Q'):
                print("\n‚úì Quitting...")
                break
            
            elif key == ord('r') or key == ord('R'):
                print("\nüîÑ Resetting tracker...")
                counter.tracker.reset()
                detection_counts.clear()
                density_counts.clear()
                print("‚úì Tracker reset complete")
            
            elif key == ord('s') or key == ord('S'):
                filename = f"crowd_capture_{frame_count}.jpg"
                cv2.imwrite(filename, display_frame)
                print(f"‚úì Frame saved: {filename}")
            
            # Print stats every 30 frames
            if frame_count % 30 == 0:
                avg_det = np.mean(detection_counts) if detection_counts else 0
                max_det = max(detection_counts) if detection_counts else 0
                print(f"Frame {frame_count} | Det: {det_count} | Unique: {unique_count} | "
                      f"Avg: {avg_det:.1f} | Max: {max_det} | FPS: {avg_fps:.1f}")
    
    except KeyboardInterrupt:
        print("\n‚úì Interrupted by user")
    
    finally:
        # Cleanup
        cap.release()
        cv2.destroyAllWindows()
        
        # Final statistics
        print("\n" + "="*60)
        print("SESSION SUMMARY")
        print("="*60)
        print(f"Total Frames Processed: {frame_count}")
        print(f"Total Unique People: {unique_count}")
        if detection_counts:
            print(f"Average Detection: {np.mean(detection_counts):.1f}")
            print(f"Max Detection: {max(detection_counts)}")
        if fps_list:
            print(f"Average FPS: {np.mean(fps_list):.1f}")
        print("="*60)

In [20]:

if __name__ == "__main__":
    print("="*60)
    print("üöÄ CSRNet CROWD COUNTER - WEBCAM TESTING")
    print("="*60)
    
    # Check if model exists
    model_path = Path(MODEL_PATH)
    
    if not model_path.exists():
        print("\n" + "="*60)
        print("‚ö†Ô∏è  MODEL FILE NOT FOUND")
        print("="*60)
        print(f"Expected path: {MODEL_PATH}")
        print(f"\nThe models directory exists at:")
        print(f"  {model_path.parent}")
        print(f"\nBut the model file doesn't exist yet.")
        print("\nüìù NEXT STEPS:")
        print("  1. Train your CSRNet model first")
        print("  2. Save the trained model to the path above")
        print("  3. The saved file should be named: best_crowd_counter_unified.pth")
        print("  4. Then run this script again")
        print("\nüí° TIP: Make sure your training script saves the model like this:")
        print("     torch.save(model.state_dict(), MODEL_PATH)")
        print("     or")
        print("     torch.save({'model_state': model.state_dict(), 'epoch': epoch}, MODEL_PATH)")
        print("="*60)
        sys.exit(1)
    
    print(f"\n‚úì Model file found: {model_path.name}")
    print(f"‚úì File size: {model_path.stat().st_size / (1024*1024):.2f} MB")
    
    try:
        # Load CSRNet
        print("\n" + "="*60)
        print("LOADING MODELS")
        print("="*60)
        csrnet = load_trained_model(MODEL_PATH)
        
        # Create enhanced counter
        counter = EnhancedCrowdCounter(
            csrnet, 
            csrnet_input_size=(512, 512)  # ‚úÖ Matches your training: 512x512 (H, W)
        )
        
        print("\n‚úì All models loaded successfully!")
        print("‚úì Ready to start webcam detection\n")
        
        # Run webcam detection
        run_webcam_detection(counter, camera_index=0)
        
    except Exception as e:
        print(f"\n‚ùå Error occurred: {str(e)}")
        import traceback
        traceback.print_exc()
        sys.exit(1)
    
    print("\n‚úì Program terminated successfully")

üöÄ CSRNet CROWD COUNTER - WEBCAM TESTING

‚úì Model file found: best_crowd_counter_unified.pth
‚úì File size: 185.33 MB

LOADING MODELS
Loading CSRNet from: C:\Users\ACEPC\Desktop\DeepVision Crowd Monitoring\models\best_crowd_counter_unified.pth
Building CSRNet architecture...
‚úì CSRNet architecture created
‚úì Loaded checkpoint from epoch 191
‚úì CSRNet ready on cpu
‚úì CSRNet will process images at: 512x512
  (Matches training TARGET_SIZE)
Loading YOLOv8 person detector...
‚úì YOLOv8 loaded

‚úì All models loaded successfully!
‚úì Ready to start webcam detection


STARTING LIVE WEBCAM DETECTION
Camera Index: 0
Alert Threshold: 30

Controls:
  Q - Quit
  R - Reset tracking
  S - Save current frame

‚úì Webcam opened successfully
‚úì Starting detection loop...


‚úì Interrupted by user

SESSION SUMMARY
Total Frames Processed: 26
Total Unique People: 2
Average Detection: 1.2
Max Detection: 2
Average FPS: 0.2

‚úì Program terminated successfully
