# S3FD Face Recognition

This is the file that runs S3FD for the face detection model. The model is implemented with Caffe.

In [33]:
import os
import cv2
import numpy as np

In [None]:

# Load model
prototxt_path = "../ssic_image-corpus/scripts/benchmark/sfd_models/models/VGGNet/WIDER_FACE/SFD_trained/deploy.prototxt"
caffemodel_path = "../ssic_image-corpus/scripts/benchmark/sfd_models/models/VGGNet/WIDER_FACE/SFD_trained/SFD.caffemodel"
net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)


image = cv2.imread("sample_image.png")                          # Read image (BGR)
(h, w) = image.shape[:2]                                # Original dimensions
blob = cv2.dnn.blobFromImage(
    image, 
    scalefactor=1.0,                     # No scaling (pixel range [0,255])
    size=(640, 640),                     # Input size defined in .prototxt
    mean=(104.0, 177.0, 123.0),         # Mean subtraction (BGR)
    swapRB=False,                        # OpenCV loads as BGR, no swap needed
    crop=False                           # Don't crop (resize while preserving aspect ratio)
)


net.setInput(blob)
detections = net.forward()          


for i in range(detections.shape[2]):
    confidence = detections[0, 0, i, 2]  # Confidence score
    if confidence > 0.5:                
        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])  # Scale to original image
        (x1, y1, x2, y2) = box.astype("int")
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) # Draw box


cv2.imshow("Output", image)
cv2.waitKey(0)

-1

In [13]:
for det in detections:
    print(det)

[[[0.         1.         0.99896395 ... 0.15981525 0.7683409  0.35251033]
  [0.         1.         0.05801576 ... 0.52500653 0.08713106 0.6077765 ]
  [0.         1.         0.05171308 ... 0.21051191 0.3296745  0.2721469 ]
  ...
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]]]


In [None]:
import numpy as np

def convert_to_yolo_format(detections, image_width, image_height, class_id=0, outputPath=None):

    yolo_detections = []
    
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.5:  # Apply confidence threshold
            # Get normalized box coordinates (x1, y1, x2, y2)
            box = detections[0, 0, i, 3:7]
            
            # Convert to (x_center, y_center, width, height) and normalize
            x_center = (box[0] + box[2]) / 2.0  # (x1 + x2)/2
            y_center = (box[1] + box[3]) / 2.0  # (y1 + y2)/2
            width = box[2] - box[0]             # x2 - x1
            height = box[3] - box[1]            # y2 - y1
            
            # Append YOLO-format detection
            yolo_detections.append([
                class_id,
                x_center,      
                y_center,      
                height,        
            ])
    
    # Save YOLO-format detections to file
    # with open(outputPath, "w") as file:
    #     for detection in yolo_detections:
    #         file.write(" ".join([str(x) for x in detection]) + "\n")

    return yolo_detections

In [None]:

yolo_results = convert_to_yolo_format(detections, image.shape[1], image.shape[0])

# Example output for one face:
# [[0, 0.45, 0.6, 0.1, 0.15, 0.98]]  # [class, x_center, y_center, w, h, conf]

print(yolo_results)

[[0, 0.7115015983581543, 0.2561627924442291, 0.113678575, 0.19269508, 0.99896395]]


In [None]:
def yolo_to_bbox(yolo_det, img_width, img_height):

    x_center, y_center, w, h = yolo_det[1], yolo_det[2], yolo_det[3], yolo_det[4]
    x1 = int((x_center - w / 2) * img_width)
    y1 = int((y_center - h / 2) * img_height)
    x2 = int((x_center + w / 2) * img_width)
    y2 = int((y_center + h / 2) * img_height)
    return (x1, y1, x2, y2)

# Check if the converted box matches OpenCV's original detection
for det in yolo_results:
    bbox = yolo_to_bbox(det, image.shape[1], image.shape[0])
    print("YOLO-to-BBox:", bbox)

YOLO-to-BBox: (314, 57, 368, 126)


In [None]:
import cv2
import numpy as np

def yolo_to_bbox_and_plot(yolo_detections, image, color=(0, 255, 255), thickness=2):

    img_height, img_width = image.shape[:2]
    output_image = image.copy()
    
    for det in yolo_detections:
        class_id, x_center, y_center, w, h, confidence = det
        
        # Convert YOLO to pixel coordinates
        x1 = int((x_center - w / 2) * img_width)
        y1 = int((y_center - h / 2) * img_height)
        x2 = int((x_center + w / 2) * img_width)
        y2 = int((y_center + h / 2) * img_height)
        
        # Draw the bounding box
        cv2.rectangle(output_image, (x1, y1), (x2, y2), color, thickness)
        
        # Label with confidence
        label = f"Face: {confidence:.2f}"
        cv2.putText(output_image, label, (x1, y1 - 10), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)
    
    return output_image

# Plot original OpenCV detections (green boxes)
opencv_image = image.copy()
for i in range(detections.shape[2]):
    confidence = detections[0, 0, i, 2]
    if confidence > 0.5:
        box = detections[0, 0, i, 3:7] * np.array([image.shape[1], image.shape[0], image.shape[1], image.shape[0]])
        (x1, y1, x2, y2) = box.astype("int")
        cv2.rectangle(opencv_image, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Green

# Plot YOLO-format detections (yellow boxes)
yolo_image = yolo_to_bbox_and_plot(yolo_results, image)

# Combine side-by-side for comparison
combined_image = np.hstack([opencv_image, yolo_image])

# Display
cv2.imshow("Comparison: OpenCV (Green) vs YOLO (Yellow)", combined_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

### combining all work together

In [None]:

# Load model
prototxt_path = "../ssic_image-corpus/scripts/benchmark/sfd_models/models/VGGNet/WIDER_FACE/SFD_trained/deploy.prototxt"
caffemodel_path = "../ssic_image-corpus/scripts/benchmark/sfd_models/models/VGGNet/WIDER_FACE/SFD_trained/SFD.caffemodel"
net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)

In [None]:
def save_yolo_labels(yolo_results, txt_output_path):

    with open(txt_output_path, "w") as file:
        for detection in yolo_results:
            file.write(" ".join([str(x) for x in detection]) + "\n")

In [None]:
def runSingleFile(imagePath, outputPath, net):
    # --- 2. Load and Preprocess Image ---
    image = cv2.imread(imagePath)                     
    (h, w) = image.shape[:2]                           
    blob = cv2.dnn.blobFromImage(
        image, 
        scalefactor=1.0,                
        size=(640, 640),              
        mean=(104.0, 177.0, 123.0),        
        swapRB=False,                       
        crop=False                          
    )


    net.setInput(blob)

    yolo_results = convert_to_yolo_format(detections, image.shape[1], image.shape[0], class_id=0, outputPath=outputPath)

    # optionally plotting

    # opencv_image = image.copy()
    # for i in range(detections.shape[2]):
    #     confidence = detections[0, 0, i, 2]
    #     if confidence > 0.5:
    #         box = detections[0, 0, i, 3:7] * np.array([image.shape[1], image.shape[0], image.shape[1], image.shape[0]])
    #         (x1, y1, x2, y2) = box.astype("int")
    #         cv2.rectangle(opencv_image, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Green

    # yolo_image = yolo_to_bbox_and_plot(yolo_results, image)

    # combined_image = np.hstack([opencv_image, yolo_image])

    # # Display
    # cv2.imshow("Comparison: OpenCV (Green) vs YOLO (Yellow)", combined_image)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

In [None]:
# Initialize the network (do this once globally to avoid reloading for each image)
prototxt_path = "../ssic_image-corpus/scripts/benchmark/sfd_models/models/VGGNet/WIDER_FACE/SFD_trained/deploy.prototxt"
caffemodel_path = "../ssic_image-corpus/scripts/benchmark/sfd_models/models/VGGNet/WIDER_FACE/SFD_trained/SFD.caffemodel"
net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)


In [None]:

def convert_to_yolo_format(detections, image_width, image_height, class_id=0, outputPath=None):

    yolo_detections = []
    
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.5:  # Apply confidence threshold
            # Get normalized box coordinates (x1, y1, x2, y2)
            box = detections[0, 0, i, 3:7]
            
            # Convert to (x_center, y_center, width, height) and normalize
            x_center = (box[0] + box[2]) / 2.0  # (x1 + x2)/2
            y_center = (box[1] + box[3]) / 2.0  # (y1 + y2)/2
            width = box[2] - box[0]             # x2 - x1
            height = box[3] - box[1]            # y2 - y1
            
            # Append YOLO-format detection
            yolo_detections.append([
                class_id,
                x_center,        
                y_center,        
                width,           
                height,          
                confidence      
            ])
    
    # Save YOLO-format detections to file
    # with open(outputPath, "w") as file:
    #     for detection in yolo_detections:
    #         file.write(" ".join([str(x) for x in detection]) + "\n")

    return yolo_detections

def save_yolo_labels(yolo_results, txt_output_path):

    with open(txt_output_path, "w") as file:
        for detection in yolo_results:
            file.write(" ".join([str(x) for x in detection]) + "\n")

def runSingleFile(imagePath, outputPath, net):
    try:
        image = cv2.imread(imagePath)
        if image is None:
            print(f"Warning: Could not read image {imagePath}")
            return None
            
        (h, w) = image.shape[:2]
        blob = cv2.dnn.blobFromImage(
            image, 
            scalefactor=1.0,
            size=(640, 640),
            mean=(104.0, 177.0, 123.0),
            swapRB=False,
            crop=False
        )

        net.setInput(blob)
        detections = net.forward()
        
        # Get base filename without extension
        base_name = os.path.splitext(os.path.basename(imagePath))[0]
        # txt_output_path = os.path.join(outputPath, f"{base_name}.txt")
        
        yolo_results = convert_to_yolo_format(detections, w, h)

        print(outputPath)

        save_yolo_labels(yolo_results, outputPath)
        
        return f"Processed {imagePath} -> {outputPath}"
        
    except Exception as e:
        return f"Error processing {imagePath}: {str(e)}"

def process_folder(input_folder, output_folder, num_processes=None):
    allImage = os.listdir(input_folder)
    # print(allImage)
    
    for x in allImage:
        originalName, ext = os.path.splitext(x)
        imagePath = os.path.join(input_folder, x)
        outputPath = os.path.join(output_folder, originalName + ".txt")
        runSingleFile(imagePath, outputPath, net)
        # print(f"Processed {x}")

In [None]:
input_folder = "../ssic_image-corpus/data_phase-4_consensus/face/image_only/"
output_folder = "../ssic_image-corpus/scripts/benchmark/s3fd_model_output"
process_folder(input_folder, output_folder)