In [1]:
import numpy as np
from numpy import expand_dims
import keras.backend as K
from keras.layers import Input, Lambda
from keras.models import Model
from keras.models import load_model
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array

from yolo3.utils import get_random_data
from yolo3.model import preprocess_true_boxes, yolo_body, yolo_loss
from yolo3.yolo import YOLO, detect_video

from timeit import default_timer as timer


from PIL import Image, ImageFont, ImageDraw

print("DONE")

Using TensorFlow backend.


DONE


In [2]:
# set up logging tensorboard
log_dir = "Logs/"
K.tensorflow_backend._get_available_gpus()







[]

In [3]:
# load classes
def get_classes(classes_path):
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names

def get_anchors(anchors_path):
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape(-1, 2)

def get_annotations(annotation_path):
    with open(annotation_path) as f:
        lines = f.readlines()
    return lines

annotation_path = 'annotations.txt'
classes_path = 'classes.txt'      
anchors_path = 'yolo/keras-yolo3/keras-yolo3/model_data/yolo_anchors.txt' # using default yolo anchors

classes = get_classes(classes_path)
num_classes = len(classes)
anchors = get_anchors(anchors_path)
annotations = get_annotations(annotation_path)

# separate a random part of dataset for validation
np.random.seed(10101)
np.random.shuffle(annotations)
np.random.seed(None)
val_split = 0.2
num_val = int(len(annotations)*val_split)
num_train = len(annotations) - num_val

print("-------------------CLASS NAMES-------------------")
print(classes)
print("-------------------CLASS NAMES-------------------")

print(annotations[3])

print(annotations[:num_train])



-------------------CLASS NAMES-------------------
['Airplane', 'Helicopter', 'Quadcopter', 'UAV']
-------------------CLASS NAMES-------------------
F:\Projects\AVObjectTracking\dataset\OIDv4_ToolKit-master\OID\Dataset\train\UAV\85.jpg 27,79,397,261,3

['F:\\Projects\\AVObjectTracking\\dataset\\OIDv4_ToolKit-master\\OID\\Dataset\\train\\Helicopter\\0be2d0e4f6bae641.jpg 142,50,924,636,1\n', 'F:\\Projects\\AVObjectTracking\\dataset\\OIDv4_ToolKit-master\\OID\\Dataset\\train\\UAV\\1078096850_0_691_2508_2048_1000x541_80_0_0_fc109e98d6ff18e4ebd80be6ac3562a9.jpg 26,85,1000,391,3\n', 'F:\\Projects\\AVObjectTracking\\dataset\\OIDv4_ToolKit-master\\OID\\Dataset\\train\\Helicopter\\26ecae23c364218f.jpg 0,74,812,767,1 332,41,1023,528,1 721,218,1023,477,1\n', 'F:\\Projects\\AVObjectTracking\\dataset\\OIDv4_ToolKit-master\\OID\\Dataset\\train\\UAV\\85.jpg 27,79,397,261,3\n', 'F:\\Projects\\AVObjectTracking\\dataset\\OIDv4_ToolKit-master\\OID\\Dataset\\train\\UAV\\images_(2).jpg 20,1,205,107,3\n', 'F

In [4]:
# Data augmentation
def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
    '''data generator for fit_generator'''
    n = len(annotation_lines)
    i = 0
    while True:
        image_data = []
        box_data = []
        for b in range(batch_size):
            if i==0:
                np.random.shuffle(annotation_lines)
            image, box = get_random_data(annotation_lines[i], input_shape, random=True)
            image_data.append(image)
            box_data.append(box)
            i = (i+1) % n
        image_data = np.array(image_data)
        box_data = np.array(box_data)
        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
        yield [image_data, *y_true], np.zeros(batch_size)
        
def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):
    n = len(annotation_lines)
    if n==0 or batch_size<=0: return None
    return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)

In [5]:
# Create model
def create_model(input_shape, anchors, num_classes, freeze_body, weights_path, load_pretrained=True):
    K.clear_session() # get a new session
    image_input = Input(shape=(None, None, 3))
    h, w = input_shape
    num_anchors = len(anchors)
    
    print(input_shape)
    print(num_anchors)
    print(num_classes)

    size_dict = {0:32, 1:16, 2:8}
    h_scaled = h//size_dict[0]
    w_scaled = w//size_dict[0]
    anchors_divided = num_anchors//3
    output_classes = num_classes+5
    y_true = [Input(shape=(h//size_dict[l], w//size_dict[l], anchors_divided, output_classes)) for l in range(3)]

    model_body = yolo_body(image_input, num_anchors//3, num_classes)
    print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))

    if load_pretrained:
        model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
        print('Load weights {}.'.format(weights_path))
        if freeze_body in [1, 2]:
            # Freeze darknet53 body or freeze all but 3 output layers.
            num = (185, len(model_body.layers)-3)[freeze_body-1]
            for i in range(num): model_body.layers[i].trainable = False
            print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))

    model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
        [*model_body.output, *y_true])
    model = Model([model_body.input, *y_true], model_loss)

    return model

In [6]:
# Here we do the training
input_shape = (416,416)


# Stage 1: Create a stock yolo model to start training with frozen layers
model = create_model(input_shape, anchors, num_classes, freeze_body=2, weights_path='yolo/keras-yolo3/keras-yolo3/model_data/yolo.h5')

# Stage 2: Continue training with all layers
# model = create_model(input_shape, anchors, num_classes, freeze_body=2, weights_path='Logs/trained_weights_stage_1.h5')

model.summary()

model.compile(optimizer=Adam(lr=1e-3), loss={'yolo_loss': lambda y_true, y_pred: y_pred})

batch_size = 32
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(data_generator_wrapper(annotations[:num_train], batch_size, input_shape, anchors, num_classes),
    steps_per_epoch=max(1, num_train//batch_size),
    validation_data=data_generator_wrapper(annotations[num_train:], batch_size, input_shape, anchors, num_classes),
    validation_steps=max(1, num_val//batch_size),
    epochs=500,
    initial_epoch=0
)
model.save_weights(log_dir + 'trained_weights_stage_1.h5')

print("Stage 1 complete")
    

for i in range(len(model.layers)):
    model.layers[i].trainable = True
model.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
print('Unfreeze all of the layers.')

batch_size = 4
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(data_generator_wrapper(annotations[:num_train], batch_size, input_shape, anchors, num_classes),
    steps_per_epoch=max(1, num_train//batch_size),
    validation_data=data_generator_wrapper(annotations[num_train:], batch_size, input_shape, anchors, num_classes),
    validation_steps=max(1, num_val//batch_size),
    epochs=100,
    initial_epoch=50
)

model.save_weights(log_dir + 'trained_weights_final_mk2.h5')
model.save("trained_models/mk2.h5")

print("Stage 2 complete")





(416, 416)
9
4



Create YOLOv3 model with 9 anchors and 4 classes.


AttributeError: 'str' object has no attribute 'decode'

In [None]:
# Functions for predicting image based on yolo.py functions
def preprocess_image(image):
    boxed_image = letterbox_image(image, tuple(reversed((416,416))))
    image_data = np.array(boxed_image, dtype='float32')

    print(image_data.shape)
    image_data /= 255.
    image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

    return image_data

def letterbox_image(image, size):
    '''resize image with unchanged aspect ratio using padding'''
    iw, ih = image.size
    w, h = size
    scale = min(w/iw, h/ih)
    nw = int(iw*scale)
    nh = int(ih*scale)

    image = image.resize((nw,nh), Image.BICUBIC)
    new_image = Image.new('RGB', size, (128,128,128))
    new_image.paste(image, ((w-nw)//2, (h-nh)//2))
    return new_image

def predict_img(model2, image_data):
    # model2 = YOLO(**params)
    out_boxes, out_scores, out_classes = model2.sess.run(
                [model2.boxes, model2.scores, model2.classes],
                feed_dict={
                    model2.yolo_model.input: image_data,
                    model2.input_image_shape: [416,416],
                    K.learning_phase(): 0
                })
    return out_boxes, out_scores, out_classes

def draw_predictions(predictions, path):
    out_boxes = predictions[0]
    out_scores = predictions[1]
    out_classes = predictions[2]
    
    # We had some trouble with the fonts and colors, and this ain't a great solution but it works
    colors = [(255,0,0),(0,255,0),(0,0,255),(100,124,53),(255,255,0),(40,200,0),(166,166,166),(255,0,255),(123,212,2),(255,255,255),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4),(4,4,4)]
    
    font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                    size=np.floor(3e-2 * 416 + 0.5).astype('int32'))
    thickness = 3
    class_names = get_classes('classes.txt')
    image = Image.open(path)
    og_w, og_h = image.size
    ratio_w = 416/og_w
    ratio_h = 416/og_h
    
    for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = class_names[c]
            box = out_boxes[i]
            score = out_scores[i]
            
            if score <= 0.5:
                continue
            
            label = '{} {:.2f}'.format(predicted_class, score)
            
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)

            top, left, bottom, right = box
            
            # rescale the bounding boxes for original image size
            top = top/ratio_h
            left = left/ratio_w
            bottom = bottom/ratio_h
            right = right/ratio_w
            
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(og_h, np.floor(bottom + 0.5).astype('int32'))
            right = min(og_w, np.floor(right + 0.5).astype('int32'))
            print(label, (left, top), (right, bottom))

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle(
                    [left + i, top + i, right - i, bottom - i],
                    outline=colors[c])
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=colors[c])
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)
            del draw
    print("saving image")
    image.save('./Saved_images/test3.jpg')
    print("saved")



In [7]:
# Create model for predictions
params = {
        "model_path": './Logs/trained_weights_final_mk2.h5',
        "anchors_path": './yolo/keras-yolo3/keras-yolo3/model_data/yolo_anchors.txt',
        "classes_path": 'classes.txt'
}

print("Creating model...")
model2 = YOLO(**params)
print("Done")

Creating model...




AttributeError: 'str' object has no attribute 'decode'

In [8]:
# detect_video function based on yolo.py
def detect_video(model2, video_path, output_path="./Saved_videos/"):
    # model2 = YOLO(**params)
    import cv2
    vid = cv2.VideoCapture(video_path)
    if not vid.isOpened():
        raise IOError("Couldn't open webcam or video")
    video_FourCC    = int(vid.get(cv2.CAP_PROP_FOURCC))
    video_fps       = vid.get(cv2.CAP_PROP_FPS)
    video_size      = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
                        int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    isOutput = True if output_path != "" else False
    if isOutput:
        print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
        out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
    accum_time = 0
    curr_fps = 0
    fps = "FPS: ??"
    prev_time = timer()
    while True:
        return_value, frame = vid.read()
        if frame is None:
            break
        image = Image.fromarray(frame)
        image = model2.detect_image(image)
        result = np.asarray(image)
        curr_time = timer()
        exec_time = curr_time - prev_time
        prev_time = curr_time
        accum_time = accum_time + exec_time
        curr_fps = curr_fps + 1
        if accum_time > 1:
            accum_time = accum_time - 1
            fps = "FPS: " + str(curr_fps)
            curr_fps = 0
        cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50, color=(255, 0, 0), thickness=2)
        cv2.namedWindow("result", cv2.WINDOW_NORMAL)
        cv2.imshow("result", result)
        if isOutput:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()

In [8]:
# Predict Image
path = 'testing/testpic.jpg'
image = Image.open(path)
image = preprocess_image(image)

#model2 = YOLO(**params)


predictions = predict_img(model2, image)

print(predictions)

draw_predictions(predictions, path)


FileNotFoundError: [Errno 2] No such file or directory: 'testing/testpic.jpg'

In [None]:
# Predict video, displays the video during process, but currently crashes when the video ends
video_path = 'testing/testvideo2.mp4'
detect_video(model2, video_path, './Saved_videos/t2.mp4')

!!! TYPE: <class 'str'> <class 'int'> <class 'float'> <class 'tuple'>
(416, 416, 3)
Found 2 boxes for img
Helicopter 0.53 (63, 57) (640, 360)
Airplane 0.75 (5, 83) (632, 360)
3.102344400000007
(416, 416, 3)
Found 2 boxes for img
Helicopter 0.58 (75, 60) (639, 360)
Airplane 0.73 (7, 87) (627, 360)
1.7253341999999918
(416, 416, 3)
Found 3 boxes for img
UAV 0.33 (2, 55) (636, 360)
Helicopter 0.59 (74, 60) (640, 360)
Airplane 0.72 (6, 86) (628, 360)
1.423529000000002
(416, 416, 3)
Found 3 boxes for img
UAV 0.34 (0, 56) (637, 360)
Helicopter 0.56 (71, 60) (640, 360)
Airplane 0.74 (5, 87) (629, 360)
1.4509825000000092
(416, 416, 3)
