In [1]:
import cv2
import numpy as np 
import time
from utils import *

from keras.models import Model
from numpy import array
from keras.models import load_model
import PIL
import Utils_SRGAN, Utils_model_SRGAN
from Utils_model_SRGAN import VGG_LOSS

Using TensorFlow backend.


## SRGAN

In [2]:
def load_VGG_SRGAN():
    model_dir = './model-weights/gen_model3000.h5'
    image_shape = (96, 96, 3)
    
    loss = VGG_LOSS(image_shape=image_shape)  
    model = load_model(model_dir , custom_objects={'vgg_loss': loss.vgg_loss})
    
    return model

def test_model_for_lr_images(input_low_res, model):

    x_test_lr = Utils_SRGAN.LOAD_DATA_TEST(input_low_res)
    output_high_res = Utils_SRGAN.plot_test_generated_images(model, x_test_lr)
    return output_high_res

## Functions used to detect faces in an image
**- load_yolo_face** loads the config and weight files for Yolo-v3 trained for Face Detection

**- face_detection** detects faces and stores them in 'image_data/'

In [3]:
def load_yolo_face():
    
    model_cfg = './cfg/yolov3-face.cfg'
    model_weights = './model-weights/yolov3-wider_16000.weights'
    
    net = cv2.dnn.readNetFromDarknet(model_cfg, model_weights)
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
    
    return net

def face_detection(net, frame, current_frame, current_person, model_vgg):

        # Create a 4D blob from a frame.
        blob = cv2.dnn.blobFromImage(frame, 1 / 255, (IMG_WIDTH, IMG_HEIGHT), [0, 0, 0], 1, crop=False)

        # Sets the input to the network
        net.setInput(blob)

        # Runs the forward pass to get output of the output layers
        outs = net.forward(get_outputs_names(net))

        # Remove the bounding boxes with low confidence
        faces = post_process(frame, outs, CONF_THRESHOLD, NMS_THRESHOLD)
        print('[i] ==> # detected faces: {}'.format(len(faces)))
        print('#' * 60)

        # initialize the set of information we'll displaying on the frame
        info = [
            ('number of faces detected', '{}'.format(len(faces)))
        ]
        
        # to crop out multiple faces in the image
        for f in faces:
            x, y, w, h = f

            sub_face = frame[y+2:y+h-2, x+2:x+w-2]
            final_img = test_model_for_lr_images(sub_face, model_vgg)
            #Saves image of the current frame in jpg file
            name='./image_data/frame'+str(current_frame) + '_p'+str(current_person)+'.jpg'
            print('Creating...'+name)
            cv2.imwrite(name,final_img)
            #To prevent duplicate images
            current_person+=1

## Functions used to detect objects in an image
**- load_yolo** is used to load the config and weight files used to detect objects using COCO dataset

**- start_webcam** is used to start the webcam using OpenCV

**- detect_objects** is used to detect different objects inn an image

**- get_box_dimensions** is used bounding box, scores and class of the detected object

**- draw_labels** is used to draw labels above the bounding box of the detected object

**- webcam_detect** is used to call all the above functions to perform the whole process

In [12]:
#Load yolo
def load_yolo():
    net = cv2.dnn.readNet("model-weights/yolov3.weights", "cfg/yolov3.cfg")
    classes = []
    with open("cfg/coco.names", "r") as f:
        classes = [line.strip() for line in f.readlines()]

    layers_names = net.getLayerNames()
    output_layers = [layers_names[i[0]-1] for i in net.getUnconnectedOutLayers()]
    colors = np.random.uniform(0, 255, size=(len(classes), 3))
    return net, classes, colors, output_layers


def start_webcam():
    cap = cv2.VideoCapture(0)

    return cap


def detect_objects(img, net, outputLayers):
    blob = cv2.dnn.blobFromImage(img, scalefactor=0.00392, size=(320, 320), mean=(0, 0, 0), swapRB=True, crop=False)
    net.setInput(blob)
    outputs = net.forward(outputLayers)
    return blob, outputs

def get_box_dimensions(outputs, height, width):
    boxes = []
    confs = []
    class_ids = []
    for output in outputs:
        for detect in output:
            scores = detect[5:]
            class_id = np.argmax(scores)
            conf = scores[class_id]
            if conf > 0.3:
                center_x = int(detect[0] * width)
                center_y = int(detect[1] * height)
                w = int(detect[2] * width)
                h = int(detect[3] * height)
                x = int(center_x - w/2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confs.append(float(conf))
                class_ids.append(class_id)
    return boxes, confs, class_ids


def draw_labels(boxes, confs, colors, class_ids, classes, img, current_frame, net, model_vgg): 
    indexes = cv2.dnn.NMSBoxes(boxes, confs, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN

    current_person = 0
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            if label == 'person':
                color = colors[i]
                sub_person = img[y+1:y+h-1, x+1:x+w-1]
                cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
                cv2.putText(img, label, (x, y - 5), font, 1, color, 1)
                #face_detection(net, sub_person, current_frame, current_person, model_vgg)
                current_person+=1
                
    cv2.imshow("Image", img)


def webcam_detect():
    model, classes, colors, output_layers = load_yolo()
    cap = start_webcam()
    current_frame = 0
    net = load_yolo_face()
    model_vgg = load_VGG_SRGAN()
    while True:
        _, frame = cap.read()
        height, width, channels = frame.shape
        blob, outputs = detect_objects(frame, model, output_layers)
        boxes, confs, class_ids = get_box_dimensions(outputs, height, width)
        draw_labels(boxes, confs, colors, class_ids, classes, frame, current_frame, net, model_vgg)
        current_frame += 1
        key = cv2.waitKey(1)
        if key == 27 or key == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
    

def draw_labels_video(boxes, confs, colors, class_ids, classes, img, current_frame, net, model_vgg): 
    indexes = cv2.dnn.NMSBoxes(boxes, confs, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN

    current_person = 0
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            if label == 'person':
                color = colors[i]
                sub_person = img[y+1:y+h-1, x+1:x+w-1]
                #face_detection(net, sub_person, current_frame, current_person, model_vgg)
                text = label
                cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
                cv2.putText(img, text, (x, y - 5), font, 1, color, 1)
                current_person+=1
                
    cv2.imshow("Video", img)                
    return img


def start_video(video_path):
    model, classes, colors, output_layers = load_yolo()
    print("YOLO-v3 for Person Detection loaded")
    print("------------------------------------------------")
    net = load_yolo_face()
    print("YOLO-v3 for Face Detection loaded")
    print("------------------------------------------------")
    
    """
    K.set_image_data_format('channels_first')
    FRmodel = load_face_recog_model()
    print("Face Recognition Model loaded")
    print("------------------------------------------------")
    database = load_database(FRmodel)
    print("User Database loaded")
    print("------------------------------------------------")
    K.set_image_data_format('channels_last')
    """
    
    model_vgg = load_VGG_SRGAN()
    print("SRGAN loaded")
    print("------------------------------------------------")

    cap = cv2.VideoCapture(video_path)
    current_frame = 0
    sz = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    out = cv2.VideoWriter('output.mp4',cv2.VideoWriter_fourcc('M','J','P','G'), 20, sz)
    while cap.isOpened():
        ret, frame = cap.read()
        if ret == True:
            height, width, channels = frame.shape
            blob, outputs = detect_objects(frame, model, output_layers)
            boxes, confs, class_ids = get_box_dimensions(outputs, height, width)
            image = draw_labels_video(boxes, confs, colors, class_ids, classes, frame, current_frame, net, model_vgg)
            current_frame += 1        
        
            out.write(image)
        
            key = cv2.waitKey(9)
            if key == 27 or key == ord('q'):
                break
        else:
            print("**------- END OF VIDEO FILE -------**")
            break
            
    cap.release()
    cv2.destroyAllWindows()


## Execution

In [13]:
start_video('test_video.mp4')

YOLO-v3 for Person Detection loaded
------------------------------------------------
YOLO-v3 for Face Detection loaded
------------------------------------------------
SRGAN loaded
------------------------------------------------
