In [1]:
import cv2
import numpy as np
from imutils.video import VideoStream
import imutils
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model

In [2]:
# Load YOLO
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Load COCO class names
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

# Load the ID card detector model from disk
idCardNet = load_model(r"C:\Users\Deepak\Downloads\id_card detector\id_card_detect.model")

In [8]:
def detect_and_predict_id_card(frame, net, idCardNet):
    height, width, channels = frame.shape
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    class_ids = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5 and classes[class_id] == "person":
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                
                # Adjust the bounding box to cover from the head to just above the legs
                head_to_leg_start_y = y + int(0.9 * h)
                boxes.append([x, y, w, head_to_leg_start_y - y])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]

            # Ensure the bounding box is within frame dimensions
            if y >= 0 and y + h <= height and x >= 0 and x + w <= width:
                # Extract the upper body region
                upper_body = frame[y:y+h, x:x+w]
                if upper_body.size > 0:
                    upper_body_rgb = cv2.cvtColor(upper_body, cv2.COLOR_BGR2RGB)
                    upper_body_resized = cv2.resize(upper_body_rgb, (224, 224))
                    upper_body_array = img_to_array(upper_body_resized)
                    upper_body_array = np.expand_dims(upper_body_array, axis=0)

                    # Predict using the ID card detection model
                    prediction = idCardNet.predict(upper_body_array)[0][0]

                    # Assuming the model outputs a single value indicating the presence of ID card
                    label = "ID Card" if prediction > 0.5 else "No ID Card"
                    color = (0, 255, 0) if label == "ID Card" else (0, 0, 255)

                    label = "{}: {:.2f}%".format(label, prediction * 100)
                    cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                    cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)

    return frame

In [9]:
# Initialize the video stream
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()

while True:
    frame = vs.read()
    frame = imutils.resize(frame, width=400)
    
    frame = detect_and_predict_id_card(frame, net, idCardNet)

    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    if key == ord("q") or cv2.getWindowProperty("Frame", cv2.WND_PROP_VISIBLE) < 1:
        break

cv2.destroyAllWindows()
vs.stream.release()
vs.stop()

[INFO] starting video stream...
