In [11]:
import cv2
import numpy as np
from ultralytics import YOLO
from tensorflow.keras.models import load_model

# Parameters
FRAME_SIZE = (224, 224)  
MAX_SEQ_LENGTH = 20      
MAX_DETECTIONS = 2
CONFIDENCE_THRESHOLD = 0.8
cnn_rnn_model = load_model('suspicious_detection5.keras')  
yolo_model = YOLO('yolov8n.pt')

def preprocess_frame_with_yolo(frame, yolo_model, max_detections=MAX_DETECTIONS):
    frame_resized = cv2.resize(frame, FRAME_SIZE)
    results = yolo_model.predict(frame_resized, verbose=False)

    features = np.zeros((4096,), dtype=np.float32)  # Initialize a zero vector of size 4096
    boxes = []

    if results[0].boxes:
        detections = results[0].boxes.data.cpu().numpy()  # Extract detections
        for det in detections[:max_detections]:
            x1, y1, x2, y2, conf, cls = det
            boxes.append((int(x1), int(y1), int(x2), int(y2)))

        # Flatten the detections and truncate/pad to fit the feature vector size
        detection_features = detections.flatten()
        feature_length = min(len(detection_features), 4096)
        features[:feature_length] = detection_features[:feature_length]  # Fill available data

    return features, boxes

# Real-time prediction function
def predict_suspicious_behavior_realtime(yolo_model, cnn_rnn_model):
    video_capture = cv2.VideoCapture(0)  # Webcam or video feed
    frame_buffer = []  # Initialize frame buffer

    while True:
        ret, frame = video_capture.read()
        if not ret:
            break

        # Preprocess frame with YOLO
        frame_features, boxes = preprocess_frame_with_yolo(frame, yolo_model)

        # Add features to buffer
        frame_buffer.append(frame_features)
        if len(frame_buffer) > MAX_SEQ_LENGTH:
            frame_buffer.pop(0)  # Remove oldest frame if buffer exceeds length

        # Initialize default label
        label = "Normal"  # Default label when no prediction is made
        prediction_value = 0.0  # Default prediction value

        # Prediction when buffer is ready
        if len(frame_buffer) == MAX_SEQ_LENGTH:
            input_sequence = np.expand_dims(frame_buffer, axis=0)  # Add batch dimension
            input_sequence = input_sequence.reshape((1, MAX_SEQ_LENGTH, 4096))  # Ensure correct shape
            prediction = cnn_rnn_model.predict(input_sequence, verbose=0)
            prediction_value = prediction[0][0] if prediction.ndim > 1 else prediction[0]

            # Determine label
            label = "Suspicious" if prediction_value > CONFIDENCE_THRESHOLD else "Normal"

        # Draw bounding boxes and label on the frame
        for (x1, y1, x2, y2) in boxes:
            color = (0, 0, 255) if label == "Suspicious" else (0, 255, 0)
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, f'{label} ({prediction_value:.2f})', (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Show frame
        cv2.imshow('Real-time Suspicious Behavior Detection', frame)

        # Exit on 'q'
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    cv2.destroyAllWindows()

# Run real-time prediction
predict_suspicious_behavior_realtime(yolo_model, cnn_rnn_model)

In [109]:
def preprocess_frame_with_yolo(frame, yolo_model, max_detections=MAX_DETECTIONS):
    frame_resized = cv2.resize(frame, FRAME_SIZE)
    results = yolo_model.predict(frame_resized, verbose=False)

    features = np.zeros((4096,), dtype=np.float32)  # Initialize a zero vector of size 4096
    boxes = []

    if results[0].boxes:
        detections = results[0].boxes.data.cpu().numpy()  # Extract detections
        for det in detections[:max_detections]:
            x1, y1, x2, y2, conf, cls = det
            boxes.append((int(x1), int(y1), int(x2), int(y2)))

        # Flatten the detections and truncate/pad to fit the feature vector size
        detection_features = detections.flatten()
        feature_length = min(len(detection_features), 4096)
        features[:feature_length] = detection_features[:feature_length]  # Fill available data

    return features, boxes


# Behavior labels map
labels_map = {
    'V_': [1, 0, 0, 0, 0, 0, 0, 0, 0],
    'RO': [0, 1, 0, 0, 0, 0, 0, 0, 0],
    'SH': [0, 0, 1, 0, 0, 0, 0, 0, 0],
    'VA': [0, 0, 0, 1, 0, 0, 0, 0, 0],   
    'CL': [0, 0, 0, 0, 1, 0, 0, 0, 0],
    'ME': [0, 0, 0, 0, 0, 1, 0, 0, 0],  
    'SI': [0, 0, 0, 0, 0, 0, 1, 0, 0],   
    'ST': [0, 0, 0, 0, 0, 0, 0, 1, 0],  
    'WA': [0, 0, 0, 0, 0, 0, 0, 0, 1]
}

# List of suspicious behaviors
suspicious_behaviors = {'V_', 'RO', 'SH', 'VA'}

# Real-time prediction function with behavior labeling
def predict_suspicious_behavior_realtime(yolo_model, cnn_rnn_model):
    video_capture = cv2.VideoCapture(0)  # Webcam or video feed
    frame_buffer = []  # Initialize frame buffer

    while True:
        ret, frame = video_capture.read()
        if not ret:
            break

        # Preprocess frame with YOLO
        frame_features, boxes = preprocess_frame_with_yolo(frame, yolo_model)

        # Add features to buffer
        frame_buffer.append(frame_features)
        if len(frame_buffer) > MAX_SEQ_LENGTH:
            frame_buffer.pop(0)  # Remove oldest frame if buffer exceeds length

        # Initialize default label
        label = "Normal"
        behavior = "NV"  # Default behavior for Non-Violence
        prediction_value = 0.0  # Default prediction value

        # Prediction when buffer is ready
        if len(frame_buffer) == MAX_SEQ_LENGTH:
            input_sequence = np.expand_dims(frame_buffer, axis=0)  # Add batch dimension
            input_sequence = input_sequence.reshape((1, MAX_SEQ_LENGTH, 4096))  # Ensure correct shape
            predictions = cnn_rnn_model.predict(input_sequence, verbose=0)
            
            # Extract behavior and confidence
            behavior_index = np.argmax(predictions[0])  # Get the predicted label index
            behavior = list(labels_map.keys())[behavior_index]  # Map index to behavior
            prediction_value = predictions[0][behavior_index]

            # Determine if the behavior is suspicious based on threshold
            if behavior in suspicious_behaviors and prediction_value > CONFIDENCE_THRESHOLD:
                label = "Suspicious"
            else:
                label = "Normal"

        # Draw bounding boxes and label on the frame
        for (x1, y1, x2, y2) in boxes:
            color = (0, 0, 255) if label == "Suspicious" else (0, 255, 0)
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, f'{behavior} ({label}: {prediction_value:.2f})', (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Show frame
        cv2.imshow('Real-time Suspicious Behavior Detection', frame)

        # Exit on 'q'
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    cv2.destroyAllWindows()

# Run real-time prediction
predict_suspicious_behavior_realtime(yolo_model, cnn_rnn_model)

In [39]:
import cv2
import numpy as np
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

# Initialize VGG16 model for feature extraction
vgg16_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
feature_extractor = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Define frame size and other constants
FRAME_SIZE = (640, 480)  # Frame resize for YOLO
VGG_FRAME_SIZE = (224, 224)  # Frame size for VGG16 input
MAX_SEQ_LENGTH = 20  # Buffer size for sequential frames
CONFIDENCE_THRESHOLD = 0.6  # Prediction confidence threshold

# Behavior labels map
labels_map = {
    'V_': [1, 0, 0, 0, 0, 0, 0, 0, 0],
    'RO': [0, 1, 0, 0, 0, 0, 0, 0, 0],
    'SH': [0, 0, 1, 0, 0, 0, 0, 0, 0],
    'VA': [0, 0, 0, 1, 0, 0, 0, 0, 0],   
    'CL': [0, 0, 0, 0, 1, 0, 0, 0, 0],
    'ME': [0, 0, 0, 0, 0, 1, 0, 0, 0],  
    'SI': [0, 0, 0, 0, 0, 0, 1, 0, 0],   
    'ST': [0, 0, 0, 0, 0, 0, 0, 1, 0],  
    'WA': [0, 0, 0, 0, 0, 0, 0, 0, 1]
}

# List of suspicious behaviors
suspicious_behaviors = {'V_', 'RO', 'SH', 'VA'}

# Function to preprocess frame with VGG16
def preprocess_frame_with_vgg16(frame):
    frame_resized = cv2.resize(frame, VGG_FRAME_SIZE)  # Resize frame to VGG16 input size
    frame_preprocessed = preprocess_input(np.expand_dims(frame_resized, axis=0))  # Preprocess frame
    feature_map = feature_extractor.predict(frame_preprocessed)  # Extract VGG16 features
    
    # Flatten the feature map to a 1D vector
    flattened_features = feature_map.flatten()  # Flatten to a 1D vector
    
    # Optionally, truncate or pad the vector to a fixed size (4096 in this case)
    feature_length = min(len(flattened_features), 4096)
    flattened_features = flattened_features[:feature_length]
    
    # Ensure the feature vector is exactly 4096 in length
    return flattened_features

# YOLOv8 Human Detection function
def preprocess_frame_with_yolo(frame, yolo_model, max_detections=2):
    frame_resized = cv2.resize(frame, FRAME_SIZE)
    results = yolo_model.predict(frame_resized, verbose=False)

    boxes = []
    if results[0].boxes:
        detections = results[0].boxes.data.cpu().numpy()  # Extract detections
        for det in detections:
            x1, y1, x2, y2, conf, cls = det
            # Only consider detections classified as 'human' (if class IDs are available)
            if int(cls) == 0:  # Assuming 0 is the 'human' class ID in YOLO
                if conf >= 0.6:
                    boxes.append((int(x1), int(y1), int(x2), int(y2),conf))

    # Sort boxes first by area (width * height) and then by confidence
    boxes_sorted = sorted(boxes, key=lambda b: ((b[2] - b[0]) * (b[3] - b[1]), b[4]), reverse=True)

    # Return the top `max_detections` bounding boxes (default to 1 if not specified)
    return boxes_sorted[:max_detections]

# Real-time prediction function with behavior labeling using both VGG16 and YOLOv8
def predict_suspicious_behavior_realtime(yolo_model, cnn_rnn_model):
    video_capture = cv2.VideoCapture(0)  # Webcam or video feed
    video_capture.set(cv2.CAP_PROP_FPS, 60)
    frame_buffer = []  # Initialize frame buffer
    
    while True:
        ret, frame = video_capture.read()
        if not ret:
            break

        # 1. Preprocess frame with YOLO (for human detection)
        boxes = preprocess_frame_with_yolo(frame, yolo_model)

        # 2. Preprocess frame with VGG16 (for behavior prediction)
        frame_features = preprocess_frame_with_vgg16(frame)

        # Add features to buffer
        frame_buffer.append(frame_features)
        if len(frame_buffer) > MAX_SEQ_LENGTH:
            frame_buffer.pop(0)  # Remove oldest frame if buffer exceeds length

        # Initialize default label
        label = "Normal"
        behavior = "ST"  # Default behavior for Non-Violence
        prediction_value = 0.0  # Default prediction value

        # Prediction when buffer is ready
        if len(frame_buffer) == MAX_SEQ_LENGTH:
            input_sequence = np.expand_dims(frame_buffer, axis=0)  # Add batch dimension
            input_sequence = input_sequence.reshape((1, MAX_SEQ_LENGTH, 4096))  # Ensure correct shape
            predictions = cnn_rnn_model.predict(input_sequence, verbose=0)
            
            # Extract behavior and confidence
            behavior_index = np.argmax(predictions[0])  # Get the predicted label index
            behavior = list(labels_map.keys())[behavior_index]  # Map index to behavior
            prediction_value = predictions[0][behavior_index]

            # Determine if the behavior is suspicious based on threshold
            if behavior in suspicious_behaviors and prediction_value > CONFIDENCE_THRESHOLD:
                label = "Suspicious"
            else:
                label = "Normal"

        # 3. Draw bounding boxes and label on the frame
        for (x1, y1, x2, y2, conf) in boxes:
            color = (0, 0, 255) if label == "Suspicious" else (0, 255, 0)
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, f'{behavior} ({label}: {prediction_value:.2f})', 
                        (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Show frame with bounding boxes and labels
        cv2.imshow('Real-time Suspicious Behavior Detection', frame)
        fps = video_capture.get(cv2.CAP_PROP_FPS)
        print(f"Actual FPS: {fps}")
        # Exit on 'q'
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    cv2.destroyAllWindows()

# Run real-time prediction
predict_suspicious_behavior_realtime(yolo_model, cnn_rnn_model)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 549ms/step
Actual FPS: 30.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step
Actual FPS: 30.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 223ms/step
Actual FPS: 30.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 218ms/step
Actual FPS: 30.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step
Actual FPS: 30.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step
Actual FPS: 30.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step
Actual FPS: 30.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 215ms/step
Actual FPS: 30.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 207ms/step
Actual FPS: 30.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 211ms/step
Actual FPS: 30.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 210ms/step
Actual FPS: 30.0
[1m1/1[0

In [36]:
cnn_rnn_model.summary()