In [1]:
import cv2
import torch
import numpy as np
from pathlib import Path
from deep_sort_realtime.deepsort_tracker import DeepSort
from collections import deque
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
from PIL import Image
from transformers import pipeline
import datetime
import os
from langchain.llms import Ollama

# Suppress the FutureWarning
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
model.classes = [0]  # Only detect persons (class 0 in COCO dataset)
model.conf = 0.5  # Confidence threshold
model.cpu()  # Ensure model runs on CPU

# Load gender classification model
gender_model = load_model('cctv_gender_classifier.h5')
gender_classes = ['man', 'woman']

# Initialize DeepSort
tracker = DeepSort(max_age=10, 
                   n_init=3,
                   nms_max_overlap=1.0,
                   max_cosine_distance=0.3,
                   nn_budget=None,
                   override_track_class=None,
                   embedder="mobilenet",
                   half=True,
                   bgr=True,
                   embedder_gpu=False,
                   embedder_model_name=None,
                   embedder_wts=None,
                   polygon=False,
                   today=None)

# Load the action recognition pipeline
action_pipe = pipeline("image-classification", model="rvv-karma/Human-Action-Recognition-VIT-Base-patch16-224", framework="tf")

def preprocess_body(body_crop, target_size=(126, 126)):
    try:
        body_crop = cv2.resize(body_crop, target_size)
        body_crop = body_crop.astype("float32") / 255.0
        body_crop = img_to_array(body_crop)
        body_crop = np.expand_dims(body_crop, axis=0)
        return body_crop
    except Exception as e:
        print(f"Error in preprocess_body: {e}")
        return None

# Initialize variables
total_persons = 0
gender_counts = {'man': 0, 'woman': 0}
frame_skip = 2
processing_times = []
fighting_count = 0
lone_woman_flag = False
surrounded_woman_flag = False
sos_events = []
alerts = []
warnings = []
gender_counts_over_time = []

# New variables for performance metrics
total_frames = 0
correct_detections = 0
false_positives = 0
false_negatives = 0
gender_correct = 0
gender_total = 0

# Open video file
video = cv2.VideoCapture(r"C:\Users\ybadr\OneDrive\Desktop\SIH VIDS\Horrifying CCTV footage shows girl abducted, molested in Bengaluru.mp4")
frame_count = 0

# Rest of your existing code...

while video.isOpened():
    ret, frame = video.read()
    if not ret:
        break
    
    total_frames += 1
    frame_count += 1
    if frame_count % frame_skip != 0:
        continue
    
    # Resize frame for faster processing
    frame = cv2.resize(frame, (640, 480))
    
    # YOLOv5 detection
    results = model(frame)
    detections = results.xyxy[0].cpu().numpy()
    
    # Prepare detections for DeepSort
    deepsort_detections = []
    for det in detections:
        x1, y1, x2, y2, conf, cls = det
        deepsort_detections.append(([x1, y1, x2 - x1, y2 - y1], conf, int(cls)))
    
    # Update DeepSort
    tracks = tracker.update_tracks(deepsort_detections, frame=frame)
    
    # Reset gender counts for each frame
    gender_counts = {'man': 0, 'woman': 0}
    
    for track in tracks:
        if not track.is_confirmed():
            continue
        
        track_id = track.track_id
        ltrb = track.to_ltrb()
        x1, y1, x2, y2 = map(int, ltrb)
        
        # Perform gender classification if not already done
        if not hasattr(track, 'gender'):
            body_crop = frame[y1:y2, x1:x2]
            preprocessed_body = preprocess_body(body_crop)
            if preprocessed_body is not None:
                try:
                    gender_conf = gender_model.predict(preprocessed_body)[0]
                    gender_idx = np.argmax(gender_conf)
                    
                    if gender_conf[gender_idx] > 0.7:  # Confidence threshold
                        gender_label = gender_classes[gender_idx]
                        track.gender = gender_label
                        track.gender_confidence = gender_conf[gender_idx]
                        total_persons += 1
                        
                        # Update gender classification metrics
                        gender_total += 1
                        # Assume ground truth is available (you may need to modify this)
                        if gender_label == "actual_gender":  # Replace with actual ground truth
                            gender_correct += 1
                except Exception as e:
                    print(f"Error in gender classification: {e}")
        
        # Count genders in current frame
        if hasattr(track, 'gender'):
            gender_counts[track.gender] += 1
        
        # Draw bounding box and label
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        label = f"ID {track_id}"
        if hasattr(track, 'gender'):
            label += f" ({track.gender})"
        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        # Update YOLO metrics (assuming ground truth is available)
        correct_detections += 1  # This should be based on IoU with ground truth
    
    # Perform action recognition
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_image = Image.fromarray(rgb_frame)
    action_result = action_pipe(pil_image)
    
    # Check for fighting action
    sos_detected = False
    for item in action_result:
        if item['label'] == 'Fighting' and item['score'] > 0.97:
            sos_detected = True
            fighting_count += 1
            # Rest of your SOS detection code...
    
    # Rest of your existing code...

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Calculate performance metrics
yolo_precision = correct_detections / (correct_detections + false_positives) if (correct_detections + false_positives) > 0 else 0
yolo_recall = correct_detections / (correct_detections + false_negatives) if (correct_detections + false_negatives) > 0 else 0
yolo_f1 = 2 * (yolo_precision * yolo_recall) / (yolo_precision + yolo_recall) if (yolo_precision + yolo_recall) > 0 else 0

gender_accuracy = gender_correct / gender_total if gender_total > 0 else 0

fighting_accuracy = fighting_count / total_frames  # This is a simplified metric

# Print performance metrics
print(f"YOLO Detection Metrics:")
print(f"Precision: {yolo_precision:.4f}")
print(f"Recall: {yolo_recall:.4f}")
print(f"F1 Score: {yolo_f1:.4f}")
print(f"\nGender Classification Accuracy: {gender_accuracy:.4f}")
print(f"\nFighting Detection Rate: {fighting_accuracy:.4f}")

# Calculate modified accuracy (example: weighted average of YOLO and gender accuracy)
modified_accuracy = 0.6 * yolo_f1 + 0.3 * gender_accuracy + 0.1 * fighting_accuracy
print(f"\nModified Accuracy: {modified_accuracy:.4f}")



Using cache found in C:\Users\ybadr/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-9-6 Python-3.12.3 torch-2.4.0 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 












All PyTorch model weights were used when initializing TFViTForImageClassification.

All the weights of TFViTForImageClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTForImageClassification for predictions without further training.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

In [2]:
# Close the video file
video.release()
cv2.destroyAllWindows()