In [3]:
!export LD_LIBRARY_PATH=/usr/local/lib64:$LD_LIBRARY_PATH

In [1]:
import numpy as np
import cv2
import face_recognition

In [2]:
#Read image
image_path = "/home/samuel/Downloads/istockphoto-1166584467-612x612.jpg"
weights = "/home/samuel/Documents/Bloverse/bloverse-projects/IDK/object_detection/object_detection_app/yolov3_files/yolov3.weights"
config = "/home/samuel/Documents/Bloverse/bloverse-projects/IDK/object_detection/object_detection_app/yolov3_files/yolov3.cfg"

image = cv2.imread(image_path)

width = image.shape[1]
height = image.shape[0]
scale = 0.00392

#read class names from text file
classes = "/home/samuel/Documents/Bloverse/bloverse-projects/IDK/object_detection/object_detection_app/yolov3_files/yolov3.txt"
with open(classes, 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# Generate different BBox colors for different classes
COLORS = np.random.uniform(
    0, 255, size=(len(classes), 3))

# Read pretrained model and config file
net = cv2.dnn.readNet(weights, config)

# Creatr input blob
blob = cv2.dnn.blobFromImage(
    image, scale, (416, 416), (0, 0, 0),
    True, crop=False)

# Set input blob for the network
net.setInput(blob)

In [3]:
def get_output_layers(net):
    # Get the output layer names in the achitecture

    layer_names = net.getLayerNames()
    output_layers = [layer_names[i-1] for i in net.getUnconnectedOutLayers()]

    return output_layers

def draw_bounding_box(
    img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    # Function to draw bounding box on the detected object 
    # with the class name

    label = str(classes[class_id])
    color = COLORS[class_id]
    cv2.rectangle(img, (x,y), (x_plus_w, y_plus_h), color, 2)
    cv2.putText(
        img, label, (x, y), cv2.FONT_HERSHEY_SIMPLEX,
        0.5, color, 2)

# Run inference through the network and get predictions from output layer
outs = net.forward(get_output_layers(net))

In [4]:
# Initialization
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.7
nums_threshold = 0.4

# From each detection from the output layer get the 
# confidence, class_id, bounding_box params

for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > conf_threshold:
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)

            w = int(detection[2] * width)
            h = int(detection[3] * height)
            x = center_x - w/2
            y = center_y - h/2
            class_ids.append(class_id)
            confidences.append(float(confidence))
            boxes.append([x, y, w, h])


In [11]:
len(class_ids)

14

In [5]:
# Applying Non-max Suppresion
indices = cv2.dnn.NMSBoxes(
    boxes, confidences, conf_threshold, nums_threshold)

labels_class = []
labels_confidence = []
labels_bbox = []
for i in indices:
    #i = i[0]
    box = boxes[i]
    x = box[0]
    y = box[1]
    w = box[2]
    h = box[3]
    
    labels_confidence.append(confidences[i])
    labels_bbox.append([x, y, w, h])
    labels_class.append(str(classes[class_ids[i]]))
    
    draw_bounding_box(
        image, class_ids[i], confidences[i], round(x),
        round(y), round(x+w), round(y+h))

In [21]:
import face_recognition
image = face_recognition.load_image_file(image_path)
face_locations = face_recognition.face_locations(image)

In [25]:
if len(labels_class) != len(face_locations):    
    # i.e there are some objects except from a person without face
    diff = len(labels_class) - len(face_locations)
    n = np.append([], np.repeat("Null", diff))
    face_locations = [*face_locations, *n]

all_image_metadata = {}
count = 1
for i in zip(labels_class, labels_confidence, labels_bbox, face_locations):
    label = i[0]
    confidence = i[1]
    object_bbox = i[2]
    face_bbox = i[3]
    image_metadata = {
        "label": label,
        "confidence": confidence,
        "object_bbox": object_bbox,
    }
    face_metadata = {
        "face_bbox": face_bbox
    }
    if face_bbox != np.nan:
        all_image_metadata.update(
            {
                "object_metadata_" + str(count) : image_metadata,
                "face_dict": face_metadata
            })
        count += 1
    else:
        all_image_metadata.update(
            {"object_metadata_" + str(count) : image_metadata})

In [24]:
for i in face_locations:
    if i == "Null":
        print (i)

Null
Null


In [26]:
all_image_metadata

{'object_metadata_1': {'label': 'person',
  'confidence': 0.9998630285263062,
  'object_bbox': [262.5, 34.0, 219, 380]},
 'face_dict': {'face_bbox': 'Null'},
 'object_metadata_2': {'label': 'person',
  'confidence': 0.9986932873725891,
  'object_bbox': [95.0, 93.5, 194, 309]},
 'object_metadata_3': {'label': 'cell phone',
  'confidence': 0.9773884415626526,
  'object_bbox': [351.0, 249.0, 32, 26]},
 'object_metadata_4': {'label': 'cell phone',
  'confidence': 0.7303444743156433,
  'object_bbox': [161.0, 253.0, 34, 20]}}

In [9]:
if len(labels_class) != len(face_locations):    
    # i.e there are some objects except from a person without face
    diff = len(labels_class) - len(face_locations)
    n = np.append([], np.repeat(np.nan, diff))
    face_locations = [*face_locations, *n]

count = 1
all_image_metadata = {}
for i in zip(labels_class, labels_confidence, labels_bbox, face_locations):
    label = i[0]
    confidence = i[1]
    object_bbox = i[2]
    face_bbox = i[3]

    image_metadata = {
        "label": label,
        "confidence": confidence,
        "object_bbox": object_bbox,
        "face_dict": {
            "face_bbox": face_bbox
        }
    }

    all_image_metadata.update(
        {"object_metadata_" + str(count) : image_metadata})

    count += 1


In [10]:
all_image_metadata

{'object_metadata_1': {'label': 'person',
  'confidence': 0.9998630285263062,
  'object_bbox': [262.5, 34.0, 219, 380],
  'face_dict': {'face_bbox': (129, 246, 191, 183)}},
 'object_metadata_2': {'label': 'person',
  'confidence': 0.9986932873725891,
  'object_bbox': [95.0, 93.5, 194, 309],
  'face_dict': {'face_bbox': (88, 370, 163, 295)}},
 'object_metadata_3': {'label': 'cell phone',
  'confidence': 0.9773884415626526,
  'object_bbox': [351.0, 249.0, 32, 26],
  'face_dict': {'face_bbox': nan}},
 'object_metadata_4': {'label': 'cell phone',
  'confidence': 0.7303444743156433,
  'object_bbox': [161.0, 253.0, 34, 20],
  'face_dict': {'face_bbox': nan}}}

In [27]:
{'type': 'primary_object',
  'label': 'Person',
  'confidence': 0.8641915321350098,
  'object_bbox': [329, 79, 1032, 663],
  'obj_perc': 0.45, # percentage of the image covered by the object
  'face_dict': {'face_bbox': [585, 93, 814, 359],'face_perc': 0.07}}

['person', 'person', 'cell phone', 'cell phone']


In [19]:
cv2.imwrite("obj_detect.jpg", image)

True

In [3]:

""" face_loc = []
for i in range(0, len(face_locations)):
    _, _, width, height= face_locations[i]
    # These number are choosen based on teh
    if (width < 1000) & (height < 1000):
        real_face = face_locations[i]
        top, right, buttom, left = real_face """


' face_loc = []\nfor i in range(0, len(face_locations)):\n    _, _, width, height= face_locations[i]\n    # These number are choosen based on teh\n    if (width < 1000) & (height < 1000):\n        real_face = face_locations[i]\n        top, right, buttom, left = real_face '

In [4]:
face_locations

[(129, 246, 191, 183), (88, 370, 163, 295)]

In [5]:
import requests

url = "http://192.168.43.159:5000/detect"

payload={}
files=[
  ('image',('istockphoto-1166584467-612x612.jpg',open('/home/samuel/Downloads/istockphoto-1166584467-612x612.jpg','rb'),'image/jpeg'))
]
headers = {}

response = requests.request("POST", url, headers=headers, data=payload, files=files)

print(response.text)


In [7]:
img_count

248327

YOLO-V5

In [14]:
import cv2
import numpy as np
import face_recognition

In [2]:
# Constants.
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
SCORE_THRESHOLD = 0.5
NMS_THRESHOLD = 0.45
CONFIDENCE_THRESHOLD = 0.45
# Text parameters.
FONT_FACE = cv2.FONT_HERSHEY_SIMPLEX
FONT_SCALE = 0.7
THICKNESS = 1
# Colors.
BLACK  = (0,0,0)
BLUE   = (255,178,50)
YELLOW = (0,255,255)


In [3]:
def draw_label(im, label, x, y):
    """Draw text onto image at location."""
    # Get text size.
    text_size = cv2.getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS)
    dim, baseline = text_size[0], text_size[1]
    # Use text size to create a BLACK rectangle.
    cv2.rectangle(im, (x,y), (x + dim[0], y + dim[1] + baseline), (0,0,0), cv2.FILLED);
    # Display text inside the rectangle.
    cv2.putText(im, label, (x, y + dim[1]), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS, cv2.LINE_AA)

In [4]:
def pre_process(input_image, net):
      # Create a 4D blob from a frame.
      blob = cv2.dnn.blobFromImage(input_image, 1/255,  (INPUT_WIDTH, INPUT_HEIGHT), [0,0,0], 1, crop=False)

      # Sets the input to the network.
      net.setInput(blob)

      # Run the forward pass to get output of the output layers.
      outputs = net.forward(net.getUnconnectedOutLayersNames())
      return outputs

In [7]:
def post_process(input_image, outputs):
    # Lists to hold respective values while unwrapping.
    labels_class = []
    labels_confidence = []
    labels_bbox = []
    class_ids = []
    confidences = []
    boxes = []
    # Rows.
    rows = outputs[0].shape[1]
    image_height, image_width = input_image.shape[:2]
    # Resizing factor.
    x_factor = image_width / INPUT_WIDTH
    y_factor =  image_height / INPUT_HEIGHT
    # Iterate through detections.
    for r in range(rows):
        row = outputs[0][0][r]
        confidence = row[4]
        # Discard bad detections and continue.
        if confidence >= CONFIDENCE_THRESHOLD:
              classes_scores = row[5:]
              # Get the index of max class score.
              class_id = np.argmax(classes_scores)
              #  Continue if the class score is above threshold.
              if (classes_scores[class_id] > SCORE_THRESHOLD):
                    confidences.append(confidence)
                    class_ids.append(class_id)
                    cx, cy, w, h = row[0], row[1], row[2], row[3]
                    left = int((cx - w/2) * x_factor)
                    top = int((cy - h/2) * y_factor)
                    width = int(w * x_factor)
                    height = int(h * y_factor)
                    box = np.array([left, top, width, height])
                    boxes.append(box)

    # Perform non maximum suppression to eliminate redundant, overlapping boxes with lower confidences.
    indices = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
    for i in indices:
          box = boxes[i]
          left = box[0]
          top = box[1]
          width = box[2]
          height = box[3]             
          # Draw bounding box.             
          cv2.rectangle(input_image, (left, top), (left + width, top + height), BLUE, 3*THICKNESS)
          # Save class labels and confidence
          labels_confidence.append(confidences[i])
          labels_bbox.append([left, top, width, height])
          labels_class.append(str(classes[class_ids[i]]))
          # Class label.              
          label = "{}:{:.2f}".format(classes[class_ids[i]], confidences[i])             
          # Draw label.             
          draw_label(input_image, label, left, top)
    return labels_class, labels_confidence, labels_bbox



In [51]:
def run_all(input_image_path, weights_path):

    count = 1
    all_image_metadata = {}
    classesFile = "coco.names"
    classes = None
    with open(classesFile, 'rt') as f:
        classes = f.read().rstrip('\n').split('\n')

    frame = cv2.imread(input_image_path)
    modelWeights = weights_path #"/home/samuel/Downloads/yolov5m.onnx"
    net = cv2.dnn.readNet(modelWeights)
    
    # Process image.
    detections = pre_process(frame, net)
    img_detections = post_process(frame.copy(), detections)

    # Perform face recognition to get face locations
    image = face_recognition.load_image_file(input_image_path)
    face_locations = face_recognition.face_locations(image)
    
    # Get detection data from model
    labels_class, labels_confidence, labels_bbox = img_detections

    if len(labels_class) != len(face_locations):    
        # i.e there are some objects except from a person without face
        diff = len(labels_class) - len(face_locations)
        n = np.append([], np.repeat("Null", diff))
        face_locations = [*face_locations, *n]

    print (face_locations)
    for i in zip(labels_class, labels_confidence, labels_bbox, face_locations):
        label = i[0]
        confidence = i[1]
        object_bbox = i[2]
        face_bbox = i[3]

        image_metadata = {
            "label": label,
            "confidence": confidence,
            "object_bbox": object_bbox,
        }

        face_metadata = {
            "face_bbox": face_bbox
        }

        
        if face_bbox != "Null":
            all_image_metadata.update(
                {
                    "object_metadata_" + str(count) : image_metadata,
                    "face_dict_" + str(count) : face_metadata
                })

           #print (all_image_metadata)
            count += 1
        else:
            all_image_metadata.update(
                {"object_metadata_" + str(count) : image_metadata})
            count += 1

    return all_image_metadata
    

In [52]:
f_metadata = run_all(
    input_image_path="/home/samuel/Downloads/istockphoto-638494402-612x612.jpg",
    weights_path="/home/samuel/Downloads/yolov5m.onnx"
)

[(66, 322, 129, 259), (73, 460, 135, 398), (72, 220, 146, 146)]


In [53]:
f_metadata

{'object_metadata_1': {'label': 'person',
  'confidence': 0.9281701,
  'object_bbox': [369, 46, 222, 361]},
 'face_dict_1': {'face_bbox': (66, 322, 129, 259)},
 'object_metadata_2': {'label': 'person',
  'confidence': 0.9007219,
  'object_bbox': [29, 44, 197, 362]},
 'face_dict_2': {'face_bbox': (73, 460, 135, 398)},
 'object_metadata_3': {'label': 'person',
  'confidence': 0.84347767,
  'object_bbox': [182, 43, 227, 363]},
 'face_dict_3': {'face_bbox': (72, 220, 146, 146)}}

In [26]:
input_image_path="/home/samuel/Downloads/istockphoto-638494402-612x612.jpg"
image = face_recognition.load_image_file(input_image_path)
face_locations = face_recognition.face_locations(image)

In [27]:
face_locations

[(66, 322, 129, 259), (73, 460, 135, 398), (72, 220, 146, 146)]

In [8]:
#if __name__ == '__main__':
# Load class names.

"""
Put efficiency information. The function getPerfProfile returns       the overall time for inference(t) 
and the timings for each of the layers(in layersTimes).
"""
""" t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 /  cv2.getTickFrequency())
print(label)
cv2.putText(img, label, (20, 40), FONT_FACE, FONT_SCALE,  (0, 0, 255), THICKNESS, cv2.LINE_AA)
cv2.imshow('Output', img)
cv2.waitKey(0) """


" t, _ = net.getPerfProfile()\nlabel = 'Inference time: %.2f ms' % (t * 1000.0 /  cv2.getTickFrequency())\nprint(label)\ncv2.putText(img, label, (20, 40), FONT_FACE, FONT_SCALE,  (0, 0, 255), THICKNESS, cv2.LINE_AA)\ncv2.imshow('Output', img)\ncv2.waitKey(0) "

In [None]:
    image = face_recognition.load_image_file(input_image_path)
    face_locations = face_recognition.face_locations(image)
    # Get detection data from model
    labels_class, labels_confidence, labels_bbox = run_object_detection(
        input_image_path = input_image_path)



    if len(labels_class) != len(face_locations):    
        # i.e there are some objects except from a person without face
        diff = len(labels_class) - len(face_locations)
        n = np.append([], np.repeat("Null", diff))
        face_locations = [*face_locations, *n]

    for i in zip(labels_class, labels_confidence, labels_bbox, face_locations):
        label = i[0]
        confidence = i[1]
        object_bbox = i[2]
        face_bbox = i[3]

        image_metadata = {
            "label": label,
            "confidence": confidence,
            "object_bbox": object_bbox,
        }

        face_metadata = {
            "face_bbox": face_bbox
        }

        if face_bbox != "Null":
            all_image_metadata.update(
                {
                    "object_metadata_" + str(count) : image_metadata,
                    "face_dict": face_metadata
                })
            count += 1
        else:
            all_image_metadata.update(
                {"object_metadata_" + str(count) : image_metadata})
            count += 1

In [16]:
a, b, c = img

In [19]:
print (c)

[[369, 46, 222, 361], [29, 44, 197, 362], [182, 43, 227, 363]]
