# Social Distancing and Mask Detection

In [1]:
# Import necessary libraries
import cv2
import torch
import numpy as np 
import face_detection
from tensorflow.keras.models import load_model
from focal_loss import BinaryFocalLoss
from scipy.spatial import distance as dist
from tensorflow.keras.applications.mobilenet import preprocess_input as pre_mobile
from face_detection.dsfd.detect import DSFDDetector
from ultralytics import YOLO




In [2]:
# Load the trained mask detector model
mask_detector = load_model("models/mask_detector_model.keras", custom_objects=None, compile=True)

# Initialize the YOLO model
model = YOLO("models/yolov8n.pt")

# Import names of the classes
classesFile = "other/coco.names"

# Read the class names from the file and store them in a list
classes = None
with open(classesFile, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

In [1]:
# Initialize the DSFD face detector with specified parameters
detector = DSFDDetector(confidence_threshold=0.6, 
                        nms_iou_threshold=0.5, 
                        device='cuda' if torch.cuda.is_available() else 'cpu',
                        max_resolution=1080, 
                        fp16_inference=False,
                        clip_boxes=True)

# Open the video file
cap = cv2.VideoCapture("./media/testvideo.mp4")

# Initialize Output Video Stream
video_fps = int(cap.get(cv2.CAP_PROP_FPS))
video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
video_n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

out_stream = cv2.VideoWriter(
    "./media/video_2_processed.mp4", 
    cv2.VideoWriter_fourcc('X','V','I','D'),
    video_fps,
    (video_width,video_height))

# Define input dimensions
input_width = 1920
input_height = 1080

# Process the video frame by frame
while cap.isOpened():
    
    # Read a frame from the video capture
    ret,img = cap.read()
    
    # If no frame is returned (end of video), exit the loop
    if img is None:
        break
    
    # Make a copy of the frame for processing
    img2 = img.copy()
    
    # Initialize lists for each frame    
    violate=[]
    centroids = []
    coordinates_people = [[(),(),]]
    people_too_close = []
    bounding_boxes = []
    
    # Use YOLOv8 to detect people in the frame
    results = model.predict(img)
    result = results[0]
    not_violate = []

    # Append coordinates of detected people
    for box in result.boxes:
        # Get the class ID and confidence score
        class_id = box.cls[0].item()
        conf = round(box.conf[0].item(), 2)
        
        # Get the bounding box coordinates and round them to integers
        x0, y0, x1, y1 = [round(i) for i in box.xyxy[0].tolist()]
        
        # Check if the confidence is above 0.4 and the detected object is a person
        if conf >= 0.4 and class_id == 0:
            
            # Append the bounding box coordinates to the list
            bounding_boxes.append([x0, y0, x1, y1])
            
            # Calculate the height of the bounding box
            height = int((y1 - y0))
            
            # Calculate the safe distance based on the height
            safe_distance = int((2 * height )/ 1.7)
            
            # Append the coordinates and safe distance to the list
            coordinates_people.append([ (x0, y0) ,   (x1, y1) ,   safe_distance   ])
            
            # Calculate the centroid of the bounding box
            center_x = int((x0+x1) / 2)
            center_y = int((y0+y1) / 2)
            
            # Append the centroid to the list
            centroids.append((center_x, center_y))
    
    # Remove the initial placeholder
    coordinates_people.pop(0)
    

    # If at least 2 people are detected in the frame, check if the distance between them is safe
    if len(coordinates_people) >= 2:
        
        # Calculate the pairwise Euclidean distances between the centroids of detected people
        euclidean_distance = dist.cdist(centroids, centroids, metric="euclidean")
        
        # Compare the euclidean distances to the safe distance for each pair of people
        for i in range(len(coordinates_people)):
            for j in range (i+1,len(coordinates_people)):
                
                if euclidean_distance[i,j] < coordinates_people[i][2] or euclidean_distance[i,j] < coordinates_people[j][2]:                                                                                                                       
                    if [coordinates_people[i][0],coordinates_people[i][1]] not in people_too_close: people_too_close.append([coordinates_people[i][0], coordinates_people[i][1]])   
                    if [coordinates_people[j][0],coordinates_people[j][1]] not in people_too_close: people_too_close.append([coordinates_people[j][0], coordinates_people[j][1]])

                        
        # Detect faces only in people who are too close to each other
        for idx in range(len(people_too_close)):
            
            # Extract the region of interest (ROI) for each person who is too close
            person_rgb = img[people_too_close[idx][0][1]:people_too_close[idx][1][1], people_too_close[idx][0][0]:people_too_close[idx][1][0]]
            
            # Draw a rectangle around the detected person
            cv2.rectangle(img2, people_too_close[idx][0], people_too_close[idx][1], (0,255,255), 3)
            
            # Apply Gaussian blur to the region of interest (ROI) to reduce noise
            person_rgb = cv2.GaussianBlur(person_rgb, (5,5), cv2.BORDER_DEFAULT) 
            
            # Detect faces within the ROI
            detections = detector.detect(person_rgb, shrink=1.0)
            
            # Initialize variables to find the largest face detected
            test_area = 0
            biggest_face = ((0,0),(0,0))
            faces=[]
            face_test = []
            
            
            
            for i in range(len(detections)):
                
                # Convert the detection to a numpy array
                detection = np.array(detections[i]) 
                
                # Replace any negative values in the detection with 0
                detection = np.where(detection<0,0,detection)  
                
                # Calculate the coordinates of the face bounding box relative to the original image
                face_x0 = people_too_close[idx][0][0] + int(detection[0])    
                face_x1 = people_too_close[idx][0][0] + int(detection[2])
                face_y0 = people_too_close[idx][0][1] + int(detection[1])
                face_y1 = people_too_close[idx][0][1] + int(detection[3])
                
                # Calculate the area of the face bounding box
                face_area = int((face_x0 - face_x1) * (face_y0 - face_y1))
                
                 # Append the face coordinates and area to the list of faces
                faces.append([ (face_x0, face_y0), (face_x1, face_y1), face_area])

                # Draw a rectangle around each detected face
                cv2.rectangle(img, faces[i][0], faces[i][1], (55,0,255), 3)
            
            
            # Find the largest face detected
            for j in range(len(faces)):
                if faces[j][2] > test_area:
                    biggest_face=(faces[j][0], faces[j][1])
                    test_area = faces[j][2]

            # Append the largest face to the face_test list
            face_test.append(biggest_face)
            
            
            for var in range(len(face_test)):
                try:
                    # Draw a rectangle around the largest face
                    cv2.rectangle(img2, (face_test[var][0]), (face_test[var][1]), (255,255,0), 3)
                    
                    # Extract the face region and preprocess it for mask detection
                    face_rgb = img[face_test[var][0][1]:face_test[var][1][1], face_test[var][0][0]:face_test[var][1][0],::-1]
                    face_arr = cv2.resize(face_rgb, (224, 224), interpolation=cv2.INTER_NEAREST)    
                    face_arr = pre_mobile(face_arr)   
                    face_arr = cv2.GaussianBlur(face_arr, (5,5), cv2.BORDER_DEFAULT)
                    face_arr = np.expand_dims(face_arr, axis=0)           
                    
                    # Predict if the person is wearing a mask
                    check_if_mask = mask_detector.predict(face_arr)
                    
                    if check_if_mask[0][0] > 0.89:
                        violate.append(people_too_close[idx])
                        
                        # Draw a circle on the detected face without a mask
                        cv2.circle(img2, face_test[var][0], 3, (0, 255,0),2)

                except:
                    continue

    
    # Draw red bounding box if violating the rules
    for m in range(len(violate)):
        cv2.rectangle(img2, violate[m][0], violate[m][1], (0,0,255), 3)     
    
    # If a frame was successfully read write it to the output video stream
    if ret:
        out_stream.write(img2)
    
    # Display the processed frame in a window named "Video"
    cv2.imshow("Video", img2)
    
    # If 'q' was pressed, exit the loop
    if cv2.waitKey(1)==ord('q'):
        break
        
# Release the video capture object
cap.release()    

# Release the output video stream
out_stream.release()

# Close all OpenCV windows
cv2.destroyAllWindows()

