In [1]:
%pylab inline

import numpy as np
from moviepy.editor import VideoFileClip

Populating the interactive namespace from numpy and matplotlib


In [2]:
from retinanet.retinanet.model import RetinaNet
from retinanet.utils.image import normalize_image, denormalize_image
from retinanet.utils.eval import Evaluator

Using TensorFlow backend.


# RetinaNet

In [3]:
backbone = 'resnet50'
inference_model_path = 'inferences/resnet50_sinsinsa_270.h5'
pyramids = ['P3', 'P4', 'P5', 'P6', 'P7']
use_p2 = False

SINSINSA_CLASSES = {
    0: 'screw',
    1: 'hole'
}

In [4]:
retinanet = RetinaNet(backbone)
model = retinanet.load_model(inference_model_path,
                                       p2=use_p2,
                                       convert=False)

loading model inferences/resnet50_sinsinsa_270.h5
finish loading model


In [5]:
def label_to_name(label):
    return SINSINSA_CLASSES[label]


def predict_detections(model, image, score_threshold: float = 0.05, max_detections: int = 300, ) -> np.ndarray:
    """
    :param image: (height, width, 3) a single image
    :param scale: rescaling floating point value
    :param score_threshold: all predicted boxes less than score threshold will be dropped
    :param max_detections: the maximum number of detections to limit
    :return:  boxes with score and label. ((x1, y1, x2, y2, label, score), ...)
    """

    # Predict with the inference model
    boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))

    # Select indices which are over the threshold score
    indices = np.where(scores[0, :] > score_threshold)[0]

    # select those scores
    scores = scores[0][indices]

    # Sort scores in descending order. shape : (300, )
    sorted_scores = np.argsort(-scores)[:max_detections]

    # select detections
    image_boxes = boxes[0, indices[sorted_scores], :]
    image_scores = scores[sorted_scores]
    image_labels = labels[0, indices[sorted_scores]]  # (300, )

    image_detections = np.concatenate(
        [image_boxes, np.expand_dims(image_labels, axis=1), np.expand_dims(image_scores, axis=1)], axis=1)

    return image_detections

def transform(image):
    image = normalize_image(image)
    image_for_prediction = image.copy()
    image_for_prediction[540:900] = 0
    detections = predict_detections(model, image_for_prediction)
    image = denormalize_image(image)
    
    Evaluator.draw_detections(image, detections, detections[:, 4], detections[:, 5],
                                 thickness=6, label_to_name=label_to_name)

    image = image.astype(np.int)
    return image
clip = VideoFileClip('cut.mov')
clip = clip.fl_image(transform)
clip.write_videofile('demo.mp4')

[MoviePy] >>>> Building video demo.mp4
[MoviePy] Writing video demo.mp4


100%|██████████| 244/244 [00:57<00:00,  4.24it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: demo.mp4 

