In [None]:
import numpy as np
from PIL import ImageFont, ImageDraw, Image
from yolo import YOLO
from yolo3.utils import letterbox_image
from keras import backend as K

from pedect.config.BasicConfig import BasicConfig
from pedect.dataset.CaltechDataset import CaltechDataset
from pedect.predictor.BasePredictor import BasePredictor
from pedect.predictor.VideoHolder import VideoHolder
from pedect.utils.constants import *

from pedect.predictor.PredictedBox import PredictedBox

class YoloPredictor(BasePredictor):
    def __init__(self, videoHolder, config):
        self.videoHolder = videoHolder
        self.yoloObject = YOLO(model_path = config.modelPath, classes_path = LABELS_FILE, anchors_path = config.anchorsPath)

    def predictForFrame(self, frameNr):
        image = Image.fromarray(videoHolder.getFrame(frameNr), 'RGB')
        if self.yoloObject.model_image_size != (None, None):
            assert self.yoloObject.model_image_size[0]%32 == 0, 'Multiples of 32 required'
            assert self.yoloObject.model_image_size[1]%32 == 0, 'Multiples of 32 required'
            boxed_image = letterbox_image(image, tuple(reversed(self.yoloObject.model_image_size)))
        else:
            new_image_size = (image.width - (image.width % 32),
                              image.height - (image.height % 32))
            boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        print(image_data.shape)
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        out_boxes, out_scores, out_classes = self.yoloObject.sess.run(
            [self.yoloObject.boxes, self.yoloObject.scores, self.yoloObject.classes],
            feed_dict={
                self.yoloObject.yolo_model.input: image_data,
                self.yoloObject.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })

        objects = []
        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = self.yoloObject.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]
            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
            objects.append(PredictedBox(top, left, bottom, right, predicted_class, score))
        return objects
    
    
config = BasicConfig()
config.anchorsPath = os.path.join("keras-yolo3", "model_data", "yolo_anchors.txt")
dataset = CaltechDataset(CALTECH_DIR)
from pedect.predictor.GroundTruthPredictor import GroundTruthPredictor
gtPredictor = GroundTruthPredictor(dataset, "set01", "V000")
videoHolder = gtPredictor
pred = YoloPredictor(videoHolder, config)
    


Using TensorFlow backend.


In [5]:
n = 1700
rez = [len(pred.predictForFrame(i)) for i in range(n)]
rezGT = [len(gtPredictor.predictForFrame(i)) for i in range(n)]

(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 

(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 416, 3)
(416, 

In [10]:
a = [rez[i] - rezGT[i] for i in range(len(rez))]
b = [x for x in a if x > 0]
c = [x for x in a if x < 0]
print(sum(b))
print(sum(c))

321
-544


In [1]:
from pedect.converter.ConverterToImages import *
from pedect.config.BasicConfig import BasicConfig
from pedect.utils.detector.debug import debug
from pedect.utils.detector.train import train
from pedect.utils.detector.evaluate import evaluate
import os


Using TensorFlow backend.


In [2]:
convertVideosToImages([("caltech" , "set01", "V000")], YOLO)
# convertVideosToImages([("caltech" , "set01", "V000"), ("caltech", "set02", "V000")], YOLO)

Saving images from caltech set set01 video V000


100%|██████████████████████████████████████████████████████████████████████████████| 1711/1711 [01:03<00:00, 26.84it/s]


In [8]:
config = BasicConfig()
# debug(config)

In [9]:
# train(config)

In [11]:
# evaluate(config)

In [1]:
from tracker import re3_tracker 
tracker = re3_tracker.Re3Tracker()

Restoring:
re3/conv1/W_conv -> 	[11, 11, 3, 96] = 0MB
re3/conv1/b_conv -> 	[96] = 0MB
re3/conv1_skip/W_conv -> 	[1, 1, 96, 16] = 0MB
re3/conv1_skip/b_conv -> 	[16] = 0MB
re3/conv1_skip/prelu -> 	[16] = 0MB
re3/conv2/W_conv -> 	[5, 5, 48, 256] = 1MB
re3/conv2/b_conv -> 	[256] = 0MB
re3/conv2_skip/W_conv -> 	[1, 1, 256, 32] = 0MB
re3/conv2_skip/b_conv -> 	[32] = 0MB
re3/conv2_skip/prelu -> 	[32] = 0MB
re3/conv3/W_conv -> 	[3, 3, 256, 384] = 3MB
re3/conv3/b_conv -> 	[384] = 0MB
re3/conv4/W_conv -> 	[3, 3, 192, 384] = 2MB
re3/conv4/b_conv -> 	[384] = 0MB
re3/conv5/W_conv -> 	[3, 3, 192, 256] = 1MB
re3/conv5/b_conv -> 	[256] = 0MB
re3/conv5_skip/W_conv -> 	[1, 1, 256, 64] = 0MB
re3/conv5_skip/b_conv -> 	[64] = 0MB
re3/conv5_skip/prelu -> 	[64] = 0MB
re3/fc6/W_fc -> 	[74208, 1024] = 303MB
re3/fc6/b_fc -> 	[1024] = 0MB
re3/fc_output/W_fc -> 	[512, 4] = 0MB
re3/fc_output/b_fc -> 	[4] = 0MB
re3/lstm1/rnn/lstm_cell/bias -> 	[2048] = 0MB
re3/lstm1/rnn/lstm_cell/kernel -> 	[1536, 2048] = 12MB
re3/

In [2]:
from pedect.utils.constants import *
from pedect.dataset.CaltechDataset import CaltechDataset
from pedect.predictor.FakePredictor import FakePredictor
from pedect.predictor.GroundTruthPredictor import GroundTruthPredictor
from pedect.predictor.TrackerPredictor import TrackerPredictor
from pedect.config.BasicConfig import BasicConfig
chosenDataset = CaltechDataset(CALTECH_DIR)
gtPredictor = GroundTruthPredictor(chosenDataset, "set01", "V000")
fakePredictor = FakePredictor((0.0, 0.8), (0.3, 1.0), gtPredictor, gtPredictor)
predictor = TrackerPredictor(fakePredictor, gtPredictor, tracker, BasicConfig())

Format mjpeg detected only with low score of 25, misdetection possible!


In [3]:
from pedect.utils.evaluator.HyperParametersTuner import HyperParametersTuner


noIterations = 3
ctRange = (0.0, 1.0)
rtRange = (0.0, 1.0)
stRange = (0.0, 1.0)
smpRange = (0.0, 1.0)
bestConfig, result = HyperParametersTuner.findBestConfig(predictor, gtPredictor, 10, noIterations, ctRange, rtRange, stRange, 
                                                     smpRange)
print(bestConfig, result)
    
# results = playVideo(predictor, tracker, config)

  0%|                                                                                            | 0/3 [00:00<?, ?it/s]Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly


Current tracking speed:   0.185 FPS
Current image read speed: 100000.000 FPS
Mean tracking speed:      0.000 FPS



deprecated pixel format used, make sure you did set range correctly
 (repeated 2 more times)
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format used, make sure you did set range correctly
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format used, make sure you did set range correctly
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format used, make sure you did set range correctly
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format used, make sure you did set range correctly
Format mjpeg detected only with low score of 25, misdetection possible!
dep

69.75


 33%|████████████████████████████                                                        | 1/3 [00:10<00:21, 10.60s/it]deprecated pixel format used, make sure you did set range correctly
 (repeated 3 more times)
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format used, make sure you did set range correctly
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format used, make sure you did set range correctly
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format used, make sure you did set range correctly
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format 

47.22


 67%|████████████████████████████████████████████████████████                            | 2/3 [00:15<00:08,  8.87s/it]

Current tracking speed:   19.645 FPS
Current image read speed: 100000.000 FPS
Mean tracking speed:      20.887 FPS



deprecated pixel format used, make sure you did set range correctly
 (repeated 3 more times)
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format used, make sure you did set range correctly
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format used, make sure you did set range correctly
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format used, make sure you did set range correctly
Format mjpeg detected only with low score of 25, misdetection possible!
deprecated pixel format used, make sure you did set range correctly
deprecated pixel format used, make sure you did set range correctly
Format mjpeg detected only with low score of 25, misdetection possible!
dep

45.86


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:20<00:00,  7.62s/it]



        BasicConfig
        Possible labels = {'people': (255, 0, 0), 'person-fa': (0, 0, 255), 'person': (0, 255, 0)}
        Batch size = 1
        Steps per epoch = 100
        Number of epochs = 100
        Image max side = 600
        Backbone = mobilenet128_1.0
        Create Threshold = 0.347968
        Remove Threshold = 0.161871
        Survive Threshold = 0.914376
        Survive Move Percent = 0.840482
        Max Age = 100
         69.75


In [4]:
print(bestConfig, result)


        BasicConfig
        Possible labels = {'people': (255, 0, 0), 'person-fa': (0, 0, 255), 'person': (0, 255, 0)}
        Batch size = 1
        Steps per epoch = 100
        Number of epochs = 100
        Image max side = 600
        Backbone = mobilenet128_1.0
        Create Threshold = 0.347968
        Remove Threshold = 0.161871
        Survive Threshold = 0.914376
        Survive Move Percent = 0.840482
        Max Age = 100
         69.75
