# Import the dataset

In [1]:
!wget -q https://www.dropbox.com/s/8conv524x6xid27/dataset.tar.bz2?dl=0 && mv "dataset.tar.bz2?dl=0" "dataset.tar.bz2" && tar -jxf dataset.tar.bz2 && rm dataset.tar.bz2
!wget -q https://www.dropbox.com/s/5kcxh2dcjtn35k7/Object.csv?dl=0 && mv "Object.csv?dl=0" "Object.csv"
!wget -q https://www.dropbox.com/s/7bfw646mpadynt2/Semantic.csv?dl=0 && mv "Semantic.csv?dl=0" "Semantic.csv"
!mkdir dataset/Ori
!mkdir dataset/Train
!mkdir dataset/Annotations
!mkdir dataset/Valid
!mkdir dataset/Valid_Annotations
!mv dataset/*.jpg dataset/Ori
!rm -r sample_data/

In [2]:
!rm dataset/Ori/Colsterium*
!rm dataset/Ori/Cylindrocystis*
!rm dataset/Ori/Lepocinclis*
!rm dataset/Ori/Micrasterias*
#!rm dataset/Ori/Paramecium_b*
!rm dataset/Ori/Peridinium*
!rm dataset/Ori/Pinnularia*
!rm dataset/Ori/Pleurotaenium*
!rm dataset/Ori/Pyrocystis*
!rm dataset/Ori/Volvox*
!rm dataset/Ori/Ceratium*
!rm dataset/Ori/Coleps*
!rm dataset/Ori/Collodictyon*
!rm dataset/Ori/Didinium*
!rm dataset/Ori/Dinobryon*
!rm dataset/Ori/Frontonia*
!rm dataset/Ori/Paramecium\ s*
!rm dataset/Ori/Phacus*

In [3]:
!grep Paramecium_b Object.csv > object.csv

# Augment object images

In [6]:
import os
import sys
import cv2
import random
import numpy as np
from PIL import Image
from collections import defaultdict
from keras.preprocessing.image import array_to_img

def draw_rect(im, cords, color = None):
	im = im.copy()
	cords = cords[:,:4]
	cords = cords.reshape(-1,4)
	if not color:
		color = [255,255,255]
	for cord in cords:
		pt1, pt2 = (cord[0], cord[1]) , (cord[2], cord[3])
		pt1 = int(pt1[0]), int(pt1[1])
		pt2 = int(pt2[0]), int(pt2[1])
		im = cv2.rectangle(im.copy(), pt1, pt2, color, int(max(im.shape[:2])/200))
	return im

def bbox_area(bbox):
	return (bbox[:,2] - bbox[:,0])*(bbox[:,3] - bbox[:,1])
		
def clip_box(bbox, clip_box, alpha):
	ar_ = (bbox_area(bbox))
	x_min = np.maximum(bbox[:,0], clip_box[0]).reshape(-1,1)
	y_min = np.maximum(bbox[:,1], clip_box[1]).reshape(-1,1)
	x_max = np.minimum(bbox[:,2], clip_box[2]).reshape(-1,1)
	y_max = np.minimum(bbox[:,3], clip_box[3]).reshape(-1,1)
	bbox = np.hstack((x_min, y_min, x_max, y_max, bbox[:,4:]))
	delta_area = ((ar_ - bbox_area(bbox))/ar_)
	mask = (delta_area < (1 - alpha)).astype(int)
	bbox = bbox[mask == 1,:]
	return bbox

def rotate_im(image, angle):
	(h, w) = image.shape[:2]
	(cX, cY) = (w // 2, h // 2)
	M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
	cos = np.abs(M[0, 0])
	sin = np.abs(M[0, 1])
	nW = int((h * sin) + (w * cos))
	nH = int((h * cos) + (w * sin))
	M[0, 2] += (nW / 2) - cX
	M[1, 2] += (nH / 2) - cY
	image = cv2.warpAffine(image, M, (nW, nH))
	return image

def get_corners(bboxes):
	width = (bboxes[:,2] - bboxes[:,0]).reshape(-1,1)
	height = (bboxes[:,3] - bboxes[:,1]).reshape(-1,1)
	x1 = bboxes[:,0].reshape(-1,1)
	y1 = bboxes[:,1].reshape(-1,1)
	x2 = x1 + width
	y2 = y1 
	x3 = x1
	y3 = y1 + height
	x4 = bboxes[:,2].reshape(-1,1)
	y4 = bboxes[:,3].reshape(-1,1)
	corners = np.hstack((x1,y1,x2,y2,x3,y3,x4,y4))
	return corners

def rotate_box(corners,angle,  cx, cy, h, w):
	corners = corners.reshape(-1,2)
	corners = np.hstack((corners, np.ones((corners.shape[0],1), dtype = type(corners[0][0]))))
	M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
	cos = np.abs(M[0, 0])
	sin = np.abs(M[0, 1])
	nW = int((h * sin) + (w * cos))
	nH = int((h * cos) + (w * sin))
	# adjust the rotation matrix to take into account translation
	M[0, 2] += (nW / 2) - cx
	M[1, 2] += (nH / 2) - cy
	# Prepare the vector to be transformed
	calculated = np.dot(M,corners.T).T
	calculated = calculated.reshape(-1,8)
	return calculated

def get_enclosing_box(corners):
	x_ = corners[:,[0,2,4,6]]
	y_ = corners[:,[1,3,5,7]]
	xmin = np.min(x_,1).reshape(-1,1)
	ymin = np.min(y_,1).reshape(-1,1)
	xmax = np.max(x_,1).reshape(-1,1)
	ymax = np.max(y_,1).reshape(-1,1)
	final = np.hstack((xmin, ymin, xmax, ymax,corners[:,8:]))
	return final

def letterbox_image(img, inp_dim):
	inp_dim = (inp_dim, inp_dim)
	img_w, img_h = img.shape[1], img.shape[0]
	w, h = inp_dim
	new_w = int(img_w * min(w/img_w, h/img_h))
	new_h = int(img_h * min(w/img_w, h/img_h))
	resized_image = cv2.resize(img, (new_w,new_h))
	canvas = np.full((inp_dim[1], inp_dim[0], 3), 0)
	canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
	return canvas

class RandomHorizontalFlip(object):
	def __init__(self, p=0.5):
		self.p = p
	def __call__(self, img, bboxes):
			img_center = np.array(img.shape[:2])[::-1]/2
			img_center = np.hstack((img_center, img_center))
			if random.random() < self.p:
				img = img[:, ::-1, :]
				bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]])
				box_w = abs(bboxes[:, 0] - bboxes[:, 2])
				bboxes[:, 0] -= box_w
				bboxes[:, 2] += box_w
			return img, bboxes

class HorizontalFlip(object):
	def __init__(self):
		pass
	def __call__(self, img, bboxes):
		img_center = np.array(img.shape[:2])[::-1]/2
		img_center = np.hstack((img_center, img_center))
		img = img[:, ::-1, :]
		bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]])
		box_w = abs(bboxes[:, 0] - bboxes[:, 2])
		bboxes[:, 0] -= box_w
		bboxes[:, 2] += box_w
		return img, bboxes

class RandomVerticalFlip(object):
	def __init__(self, p=0.5):
		self.p = p
	def __call__(self, img, bboxes):
			img_center = np.array(img.shape[:2])[::-1]/2
			img_center = np.hstack((img_center, img_center))
			if random.random() < self.p:
				img = img[::-1, :, :]
				bboxes[:, [1, 3]] += 2*(img_center[[1, 3]] - bboxes[:, [1, 3]])
				box_h = abs(bboxes[:, 1] - bboxes[:, 3])
				bboxes[:, 1] -= box_h
				bboxes[:, 3] += box_h
			return img, bboxes

class VerticalFlip(object):
	def __init__(self):
		pass
	def __call__(self, img, bboxes):
		img_center = np.array(img.shape[:2])[::-1]/2
		img_center = np.hstack((img_center, img_center))
		img = img[::-1, :, :]
		bboxes[:, [1, 3]] += 2*(img_center[[1, 3]] - bboxes[:, [1, 3]])
		box_h = abs(bboxes[:, 1] - bboxes[:, 3])
		bboxes[:, 1] -= box_h
		bboxes[:, 3] += box_h
		return img, bboxes

class RandomScale(object):
	def __init__(self, scale = 0.2, diff = False):
		self.scale = scale
		if type(self.scale) == tuple:
			assert len(self.scale) == 2, "Invalid range"
			assert self.scale[0] > -1, "Scale factor can't be less than -1"
			assert self.scale[1] > -1, "Scale factor can't be less than -1"
		else:
			assert self.scale > 0, "Please input a positive float"
			self.scale = (max(-1, -self.scale), self.scale)
		self.diff = diff
	def __call__(self, img, bboxes):
		#Chose a random digit to scale by 
		img_shape = img.shape
		if self.diff:
			scale_x = random.uniform(*self.scale)
			scale_y = random.uniform(*self.scale)
		else:
			scale_x = random.uniform(*self.scale)
			scale_y = scale_x
		resize_scale_x = 1 + scale_x
		resize_scale_y = 1 + scale_y
		img=  cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
		bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
		canvas = np.zeros(img_shape, dtype = np.uint8)
		y_lim = int(min(resize_scale_y,1)*img_shape[0])
		x_lim = int(min(resize_scale_x,1)*img_shape[1])
		canvas[:y_lim,:x_lim,:] =  img[:y_lim,:x_lim,:]
		img = canvas
		bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25)
		return img, bboxes

class Scale(object):
	def __init__(self, scale_x = 0.2, scale_y = 0.2):
		self.scale_x = scale_x
		self.scale_y = scale_y
	def __call__(self, img, bboxes):
		#Chose a random digit to scale by 
		img_shape = img.shape
		resize_scale_x = 1 + self.scale_x
		resize_scale_y = 1 + self.scale_y
		img=  cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
		bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
		canvas = np.zeros(img_shape, dtype = np.uint8)
		y_lim = int(min(resize_scale_y,1)*img_shape[0])
		x_lim = int(min(resize_scale_x,1)*img_shape[1])
		canvas[:y_lim,:x_lim,:] =  img[:y_lim,:x_lim,:]
		img = canvas
		bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25)
		return img, bboxes

class RandomTranslate(object):
	def __init__(self, translate = 0.2, diff = False):
		self.translate = translate
		if type(self.translate) == tuple:
			assert len(self.translate) == 2, "Invalid range"
			assert self.translate[0] > 0 & self.translate[0] < 1
			assert self.translate[1] > 0 & self.translate[1] < 1
		else:
			assert self.translate > 0 and self.translate < 1
			self.translate = (-self.translate, self.translate)
		self.diff = diff
	def __call__(self, img, bboxes):
		img_shape = img.shape
		translate_factor_x = random.uniform(*self.translate)
		translate_factor_y = random.uniform(*self.translate)
		if not self.diff:
			translate_factor_y = translate_factor_x
		canvas = np.zeros(img_shape).astype(np.uint8)
		corner_x = int(translate_factor_x*img.shape[1])
		corner_y = int(translate_factor_y*img.shape[0])
		orig_box_cords =  [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]
		mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
		canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
		img = canvas
		bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y]
		bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
		return img, bboxes

class Translate(object):
	def __init__(self, translate_x = 0.2, translate_y = 0.2, diff = False):
		self.translate_x = translate_x
		self.translate_y = translate_y
		assert self.translate_x > 0 and self.translate_x < 1
		assert self.translate_y > 0 and self.translate_y < 1
	def __call__(self, img, bboxes):
		img_shape = img.shape
		translate_factor_x = self.translate_x
		translate_factor_y = self.translate_y
		canvas = np.zeros(img_shape).astype(np.uint8)
		corner_x = int(translate_factor_x*img.shape[1])
		corner_y = int(translate_factor_y*img.shape[0])
		orig_box_cords =  [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]
		mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
		canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
		img = canvas
		bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y]
		bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
		return img, bboxes
	
class RandomRotate(object):
	def __init__(self, angle = 10):
		self.angle = angle
		if type(self.angle) == tuple:
			assert len(self.angle) == 2, "Invalid range"  
		else:
			self.angle = (-self.angle, self.angle)
	def __call__(self, img, bboxes):
		angle = random.uniform(*self.angle)
		w,h = img.shape[1], img.shape[0]
		cx, cy = w//2, h//2
		img = rotate_im(img, angle)
		corners = get_corners(bboxes)
		corners = np.hstack((corners, bboxes[:,4:]))
		corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w)
		new_bbox = get_enclosing_box(corners)
		scale_factor_x = img.shape[1] / w
		scale_factor_y = img.shape[0] / h
		img = cv2.resize(img, (w,h))
		new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y] 
		bboxes	= new_bbox
		bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
		return img, bboxes

class Rotate(object):
	def __init__(self, angle):
		self.angle = angle
	def __call__(self, img, bboxes):
		angle = self.angle
		print(self.angle)
		w,h = img.shape[1], img.shape[0]
		cx, cy = w//2, h//2
		corners = get_corners(bboxes)
		corners = np.hstack((corners, bboxes[:,4:]))
		img = rotate_im(img, angle)
		corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w)
		new_bbox = get_enclosing_box(corners)
		scale_factor_x = img.shape[1] / w
		scale_factor_y = img.shape[0] / h
		img = cv2.resize(img, (w,h))
		new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y] 
		bboxes	= new_bbox
		bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
		return img, bboxes

class RandomShear(object):
	def __init__(self, shear_factor = 0.2):
		self.shear_factor = shear_factor
		if type(self.shear_factor) == tuple:
			assert len(self.shear_factor) == 2, "Invalid range for scaling factor"	 
		else:
			self.shear_factor = (-self.shear_factor, self.shear_factor)
		shear_factor = random.uniform(*self.shear_factor)
	def __call__(self, img, bboxes):
		shear_factor = random.uniform(*self.shear_factor)
		w,h = img.shape[1], img.shape[0]
		if shear_factor < 0:
			img, bboxes = HorizontalFlip()(img, bboxes)
		M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
		nW =  img.shape[1] + abs(shear_factor*img.shape[0])
		bboxes[:,[0,2]] += ((bboxes[:,[1,3]]) * abs(shear_factor) ).astype(int) 
		img = cv2.warpAffine(img, M, (int(nW), img.shape[0]))
		if shear_factor < 0:
			img, bboxes = HorizontalFlip()(img, bboxes)
		img = cv2.resize(img, (w,h))
		scale_factor_x = nW / w
		bboxes[:,:4] /= [scale_factor_x, 1, scale_factor_x, 1] 
		return img, bboxes
		
class Shear(object):
	def __init__(self, shear_factor = 0.2):
		self.shear_factor = shear_factor
	def __call__(self, img, bboxes):
		shear_factor = self.shear_factor
		if shear_factor < 0:
			img, bboxes = HorizontalFlip()(img, bboxes)
		M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
		nW =  img.shape[1] + abs(shear_factor*img.shape[0])
		bboxes[:,[0,2]] += ((bboxes[:,[1,3]])*abs(shear_factor)).astype(int) 
		img = cv2.warpAffine(img, M, (int(nW), img.shape[0]))
		if shear_factor < 0:
			 img, bboxes = HorizontalFlip()(img, bboxes)
		return img, bboxes
	
class Resize(object):
	def __init__(self, inp_dim):
		self.inp_dim = inp_dim
	def __call__(self, img, bboxes):
		w,h = img.shape[1], img.shape[0]
		img = letterbox_image(img, self.inp_dim)
		scale = min(self.inp_dim/h, self.inp_dim/w)
		bboxes[:,:4] *= (scale)
		new_w = scale*w
		new_h = scale*h
		inp_dim = self.inp_dim	 
		del_h = (inp_dim - new_h)/2
		del_w = (inp_dim - new_w)/2
		add_matrix = np.array([[del_w, del_h, del_w, del_h]]).astype(int)
		bboxes[:,:4] += add_matrix
		img = img.astype(np.uint8)
		return img, bboxes 

class RandomHSV(object):
	def __init__(self, hue = None, saturation = None, brightness = None):
		if hue:
			self.hue = hue 
		else:
			self.hue = 0
		if saturation:
			self.saturation = saturation 
		else:
			self.saturation = 0
		if brightness:
			self.brightness = brightness
		else:
			self.brightness = 0
		if type(self.hue) != tuple:
			self.hue = (-self.hue, self.hue)
		if type(self.saturation) != tuple:
			self.saturation = (-self.saturation, self.saturation)
		if type(brightness) != tuple:
			self.brightness = (-self.brightness, self.brightness)
	def __call__(self, img, bboxes):
		hue = random.randint(*self.hue)
		saturation = random.randint(*self.saturation)
		brightness = random.randint(*self.brightness)
		img = img.astype(int)
		a = np.array([hue, saturation, brightness]).astype(int)
		img += np.reshape(a, (1,1,3))
		img = np.clip(img, 0, 255)
		img[:,:,0] = np.clip(img[:,:,0],0, 179)
		img = img.astype(np.uint8)
		return img, bboxes
	
class Sequence(object):
	def __init__(self, augmentations, probs = 1):
		self.augmentations = augmentations
		self.probs = probs
	def __call__(self, images, bboxes):
		for i, augmentation in enumerate(self.augmentations):
			if type(self.probs) == list:
				prob = self.probs[i]
			else:
				prob = self.probs
			if random.random() < prob:
				images, bboxes = augmentation(images, bboxes)
		return images, bboxes

def augment_bbox(image_input='./dataset/Ori',
				image_output='./dataset/Train',
				bbox_output='./dataset/Annotations',
				count=3):
	TheLines = []
	BBOX = defaultdict(list)
	with open('object.csv', 'r') as F:
		#next(F)
		for line in F:
			line = line.strip().split(':')
			filename = line[0].split(',')[0]
			label = line[5].split('"')[4]
			x = int(line[2].split(',')[0])
			y = int(line[3].split(',')[0])
			w = int(line[4].split(',')[0])
			h = int(line[5].split(',')[0].split('}')[0])
			BBOX[filename].append([x, y, w, h, label])
	for Images in os.listdir(image_input):
		Iname = Images.split('.')[0]
		bboxes = np.array(BBOX[Images], dtype=object)
		img = cv2.imread('{}/{}'.format(image_input, Images))[:,:,::-1]
		for i in range(count):
			seq = Sequence([
				RandomHorizontalFlip(0.5),
				RandomVerticalFlip(0.5),
				RandomRotate(15),
				RandomScale(0.01),
				RandomTranslate(0.1),
				RandomShear(0.1),
				RandomHSV(20, 20, 20)])
			img_, bboxes_ = seq(img.copy(), bboxes.copy())
			Ioutput = '{}/Aug_{}-{}.jpg'.format(image_output, Iname, i+1)
			new_image = array_to_img(img_, scale=True)
			new_image.save(Ioutput)
			list_of_boxes = np.ndarray.tolist(bboxes_)
			Boutput = '{}/Aug_{}-{}.xml'.format(bbox_output, Iname, i+1)
			with open(Boutput, 'w') as f:
				source = 'https://github.com/sarisabban/SinfNet'
				total = bboxes_.shape[0]
				filename = 'Aug_{}-{}'.format(Iname, i+1)
				W, H = Image.open('{}/{}'.format(image_input, Images)).size
				f.write('<annotation>\n')
				f.write('\t<filename>{}.jpg</filename>\n'.format(filename))
				f.write('\t<source>{}</source>\n'.format(source))
				f.write('\t<path>../dataset/Train/{}.jpg</path>\n'.format(filename))
				f.write('\t<size>\n')
				f.write('\t\t<width>{}</width>\n'.format(W))
				f.write('\t\t<height>{}</height>\n'.format(H))
				f.write('\t\t<depth>3</depth>\n')
				f.write('\t</size>\n')
				f.write('\t<segments>{}</segments>\n'.format(total))
				items = 0
				for line in bboxes_:
					line = np.ndarray.tolist(line)
					line = [str(i) for i in line]
					x = line[0]
					y = line[1]
					w = line[2]
					h = line[3]
					label = line[4]
					items += 1
					f.write('\t<object>\n')
					f.write('\t\t<name>{}</name>\n'.format(label))
					f.write('\t\t<bndbox>\n')
					f.write('\t\t\t<xmin>{}</xmin>\n'.format(x))
					f.write('\t\t\t<ymin>{}</ymin>\n'.format(y))
					f.write('\t\t\t<xmax>{}</xmax>\n'.format(w))
					f.write('\t\t\t<ymax>{}</ymax>\n'.format(h))
					f.write('\t\t</bndbox>\n')
					f.write('\t</object>\n')
				f.write('</annotation>')

augment_bbox(count=3)

# Object detection training (Yolov3)

In [4]:
#Runtime > Restart runtime
%tensorflow_version 1.x 

import os
import sys
import cv2
import copy
import json
import scipy
import keras
import pickle
import argparse
import numpy as np
import tensorflow as tf
import xml.etree.ElementTree as ET
from keras.utils import Sequence
from keras.optimizers import Adam
from keras.engine.topology import Layer
from keras.models import Model, load_model
from keras.layers.merge import add, concatenate
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.layers import Lambda, concatenate, ZeroPadding2D, UpSampling2D, Lambda, Conv2D, Input, BatchNormalization, LeakyReLU

config = {"model":{
			"min_input_size":       288,
			"max_input_size":       448,
			"anchors":              [55,69,75,234,133,240,136,129,142,363,203,290,228,184,285,359,341,260],
			"labels":               ["Paramecium"]},
		"train":{
			"train_image_folder":   "./dataset/Train/",
			"train_annot_folder":   "./dataset/Annotations/",
			"tensorboard_dir":      "./logs",
			"saved_weights_name":   "./weights.h5",
			"cache_name":           "./training.pkl",
			"pretrained_weights":   "",
			"train_times":          16,
			"batch_size":           8,
			"learning_rate":        1e-4,
			"nb_epochs":            100,
			"warmup_epochs":        0,
			"ignore_thresh":        0.5,
			"gpus":                 "0,1",
			"grid_scales":          [1,1,1],
			"obj_scale":            5,
			"noobj_scale":          1,
			"xywh_scale":           1,
			"class_scale":          1,
			"debug":                False},
		"valid":{
			"valid_image_folder":   "./dataset/Valid/",
			"valid_annot_folder":   "./dataset/Valid_Annotations/",
			"cache_name":           "",
			"valid_times":          1}}

class YoloLayer(Layer):
	def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh, grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale, **kwargs):
		self.ignore_thresh  = ignore_thresh
		self.warmup_batches = warmup_batches
		self.anchors        = tf.constant(anchors, dtype='float', shape=[1,1,1,3,2])
		self.grid_scale     = grid_scale
		self.obj_scale      = obj_scale
		self.noobj_scale    = noobj_scale
		self.xywh_scale     = xywh_scale
		self.class_scale    = class_scale
		max_grid_h, max_grid_w = max_grid
		cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)))
		cell_y = tf.transpose(cell_x, (0,2,1,3,4))
		self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1])
		super(YoloLayer, self).__init__(**kwargs)
	def build(self, input_shape):
		super(YoloLayer, self).build(input_shape)
	def call(self, x):
		input_image, y_pred, y_true, true_boxes = x
		y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0))
		object_mask     = tf.expand_dims(y_true[..., 4], 4)
		batch_seen = tf.Variable(0.)
		grid_h          = tf.shape(y_true)[1]
		grid_w          = tf.shape(y_true)[2]
		grid_factor     = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2])
		net_h           = tf.shape(input_image)[1]
		net_w           = tf.shape(input_image)[2]
		net_factor      = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2])
		pred_box_xy     = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2]))
		pred_box_wh     = y_pred[..., 2:4]
		pred_box_conf   = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4)
		pred_box_class  = y_pred[..., 5:]
		true_box_xy     = y_true[..., 0:2]
		true_box_wh     = y_true[..., 2:4]
		true_box_conf   = tf.expand_dims(y_true[..., 4], 4)
		true_box_class  = tf.argmax(y_true[..., 5:], -1)
		conf_delta      = pred_box_conf - 0
		true_xy         = true_boxes[..., 0:2] / grid_factor
		true_wh         = true_boxes[..., 2:4] / net_factor
		true_wh_half    = true_wh / 2.
		true_mins       = true_xy - true_wh_half
		true_maxes      = true_xy + true_wh_half
		pred_xy         = tf.expand_dims(pred_box_xy / grid_factor, 4)
		pred_wh         = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4)
		pred_wh_half    = pred_wh / 2.
		pred_mins       = pred_xy - pred_wh_half
		pred_maxes      = pred_xy + pred_wh_half
		intersect_mins  = tf.maximum(pred_mins,  true_mins)
		intersect_maxes = tf.minimum(pred_maxes, true_maxes)
		intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
		intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
		true_areas      = true_wh[..., 0] * true_wh[..., 1]
		pred_areas      = pred_wh[..., 0] * pred_wh[..., 1]
		union_areas     = pred_areas + true_areas - intersect_areas
		iou_scores      = tf.truediv(intersect_areas, union_areas)
		best_ious       = tf.reduce_max(iou_scores, axis=4)
		conf_delta     *= tf.expand_dims(tf.to_float(best_ious < self.ignore_thresh), 4)
		true_xy         = true_box_xy / grid_factor
		true_wh         = tf.exp(true_box_wh) * self.anchors / net_factor
		true_wh_half    = true_wh / 2.
		true_mins       = true_xy - true_wh_half
		true_maxes      = true_xy + true_wh_half
		pred_xy         = pred_box_xy / grid_factor
		pred_wh         = tf.exp(pred_box_wh) * self.anchors / net_factor
		pred_wh_half    = pred_wh / 2.
		pred_mins       = pred_xy - pred_wh_half
		pred_maxes      = pred_xy + pred_wh_half
		intersect_mins  = tf.maximum(pred_mins,  true_mins)
		intersect_maxes = tf.minimum(pred_maxes, true_maxes)
		intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
		intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
		true_areas      = true_wh[..., 0] * true_wh[..., 1]
		pred_areas      = pred_wh[..., 0] * pred_wh[..., 1]
		union_areas     = pred_areas + true_areas - intersect_areas
		iou_scores      = tf.truediv(intersect_areas, union_areas)
		iou_scores      = object_mask * tf.expand_dims(iou_scores, 4)
		count           = tf.reduce_sum(object_mask)
		count_noobj     = tf.reduce_sum(1 - object_mask)
		detect_mask     = tf.to_float((pred_box_conf*object_mask) >= 0.5)
		class_mask      = tf.expand_dims(tf.to_float(tf.equal(tf.argmax(pred_box_class, -1), true_box_class)), 4)
		recall50        = tf.reduce_sum(tf.to_float(iou_scores >= 0.5 ) * detect_mask  * class_mask) / (count + 1e-3)
		recall75        = tf.reduce_sum(tf.to_float(iou_scores >= 0.75) * detect_mask  * class_mask) / (count + 1e-3)
		avg_iou         = tf.reduce_sum(iou_scores) / (count + 1e-3)
		avg_obj         = tf.reduce_sum(pred_box_conf  * object_mask)  / (count + 1e-3)
		avg_noobj       = tf.reduce_sum(pred_box_conf  * (1-object_mask))  / (count_noobj + 1e-3)
		avg_cat         = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3)
		batch_seen      = tf.assign_add(batch_seen, 1.)
		true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1), lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask), true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask), tf.ones_like(object_mask)], lambda: [true_box_xy, true_box_wh, object_mask])
		wh_scale        = tf.exp(true_box_wh) * self.anchors / net_factor
		wh_scale        = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4)
		xy_delta        = xywh_mask     * (pred_box_xy-true_box_xy) * wh_scale * self.xywh_scale
		wh_delta        = xywh_mask     * (pred_box_wh-true_box_wh) * wh_scale * self.xywh_scale
		conf_delta      = object_mask   * (pred_box_conf-true_box_conf) * self.obj_scale + (1-object_mask) * conf_delta * self.noobj_scale
		class_delta     = object_mask   * tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * self.class_scale
		loss_xy         = tf.reduce_sum(tf.square(xy_delta),        list(range(1,5)))
		loss_wh         = tf.reduce_sum(tf.square(wh_delta),        list(range(1,5)))
		loss_conf       = tf.reduce_sum(tf.square(conf_delta),      list(range(1,5)))
		loss_class      = tf.reduce_sum(class_delta,                list(range(1,5)))
		loss            = loss_xy + loss_wh + loss_conf + loss_class
		if config['train']['debug']:
			loss        = tf.Print(loss, [grid_h, avg_obj], message='avg_obj \t\t', summarize=1000)
			loss        = tf.Print(loss, [grid_h, avg_noobj], message='avg_noobj \t\t', summarize=1000)
			loss        = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000)
			loss        = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000)
			loss        = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000)
			loss        = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000)
			loss        = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000)
			loss        = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy), tf.reduce_sum(loss_wh), tf.reduce_sum(loss_conf), tf.reduce_sum(loss_class)], message='loss xy, wh, conf, class: \t', summarize=1000)
		return loss*self.grid_scale
	def compute_output_shape(self, input_shape):
		return [(None, 1)]

def _rand_scale(scale):
	scale = np.random.uniform(1, scale)
	return scale if (np.random.randint(2) == 0) else 1./scale;

def _constrain(min_v, max_v, value):
	if value < min_v: return min_v
	if value > max_v: return max_v
	return value

def random_flip(image, flip):
	if flip == 1: return cv2.flip(image, 1)
	return image

def correct_bounding_boxes(boxes, new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h):
	boxes = copy.deepcopy(boxes)
	np.random.shuffle(boxes)
	sx, sy = float(new_w)/image_w, float(new_h)/image_h
	zero_boxes = []
	for i in range(len(boxes)):
		boxes[i]['xmin'] = int(_constrain(0, net_w, boxes[i]['xmin']*sx + dx))
		boxes[i]['xmax'] = int(_constrain(0, net_w, boxes[i]['xmax']*sx + dx))
		boxes[i]['ymin'] = int(_constrain(0, net_h, boxes[i]['ymin']*sy + dy))
		boxes[i]['ymax'] = int(_constrain(0, net_h, boxes[i]['ymax']*sy + dy))
		if boxes[i]['xmax'] <= boxes[i]['xmin'] or boxes[i]['ymax'] <= boxes[i]['ymin']:
			zero_boxes += [i]
			continue
		if flip == 1:
			swap = boxes[i]['xmin'];
			boxes[i]['xmin'] = net_w - boxes[i]['xmax']
			boxes[i]['xmax'] = net_w - swap
	boxes = [boxes[i] for i in range(len(boxes)) if i not in zero_boxes]
	return boxes

def random_distort_image(image, hue=18, saturation=1.5, exposure=1.5):
	dhue = np.random.uniform(-hue, hue)
	dsat = _rand_scale(saturation);
	dexp = _rand_scale(exposure);
	image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype('float')
	image[:,:,1] *= dsat
	image[:,:,2] *= dexp
	image[:,:,0] += dhue
	image[:,:,0] -= (image[:,:,0] > 180)*180
	image[:,:,0] += (image[:,:,0] < 0)  *180
	return cv2.cvtColor(image.astype('uint8'), cv2.COLOR_HSV2RGB)

def apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy):
	im_sized = cv2.resize(image, (new_w, new_h))
	if dx > 0:
		im_sized = np.pad(im_sized, ((0,0), (dx,0), (0,0)), mode='constant', constant_values=127)
	else:
		im_sized = im_sized[:,-dx:,:]
	if (new_w + dx) < net_w:
		im_sized = np.pad(im_sized, ((0,0), (0, net_w - (new_w+dx)), (0,0)), mode='constant', constant_values=127)
	if dy > 0:
		im_sized = np.pad(im_sized, ((dy,0), (0,0), (0,0)), mode='constant', constant_values=127)
	else:
		im_sized = im_sized[-dy:,:,:]
	if (new_h + dy) < net_h:
		im_sized = np.pad(im_sized, ((0, net_h - (new_h+dy)), (0,0), (0,0)), mode='constant', constant_values=127)
	return im_sized[:net_h, :net_w,:]

def _conv_block(inp, convs, do_skip=True):
	x = inp
	count = 0
	for conv in convs:
		if count == (len(convs) - 2) and do_skip:
			skip_connection = x
		count += 1
		if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x)
		x = Conv2D(conv['filter'],
					conv['kernel'],
					strides=conv['stride'],
					padding='valid' if conv['stride'] > 1 else 'same',
					name='conv_' + str(conv['layer_idx']),
					use_bias=False if conv['bnorm'] else True)(x)
		if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
		if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
	return add([skip_connection, x]) if do_skip else x

def create_yolov3_model(nb_class, anchors, max_box_per_image, max_grid, batch_size, warmup_batches, ignore_thresh, grid_scales, obj_scale, noobj_scale, xywh_scale, class_scale):
	input_image     = Input(shape=(None, None, 3))
	true_boxes      = Input(shape=(1, 1, 1, max_box_per_image, 4))
	true_yolo_1     = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class))
	true_yolo_2     = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class))
	true_yolo_3     = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class))
	x = _conv_block(input_image, [	{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
									{'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
									{'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
									{'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])
	x = _conv_block(x, [			{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
									{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
									{'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])
	x = _conv_block(x, [			{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
									{'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
	x = _conv_block(x, [			{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
									{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
									{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
	for i in range(7):
		x = _conv_block(x, [		{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
									{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
	skip_36 = x
	x = _conv_block(x, [			{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
									{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
									{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])
	for i in range(7):
		x = _conv_block(x, [		{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
									{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
	skip_61 = x
	x = _conv_block(x, [			{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
									{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
									{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
	for i in range(3):
		x = _conv_block(x, [		{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
									{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
	x = _conv_block(x, [			{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
									{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
									{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
									{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
									{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], do_skip=False)
	pred_yolo_1 = _conv_block(x, [	{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
									{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], do_skip=False)
	loss_yolo_1 = YoloLayer(anchors[12:], [1*num for num in max_grid], batch_size, warmup_batches, ignore_thresh, grid_scales[0], obj_scale, noobj_scale, xywh_scale, class_scale)([input_image, pred_yolo_1, true_yolo_1, true_boxes])
	x = _conv_block(x, [			{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], do_skip=False)
	x = UpSampling2D(2)(x)
	x = concatenate([x, skip_61])
	x = _conv_block(x, [			{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
									{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
									{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
									{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
									{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], do_skip=False)
	pred_yolo_2 = _conv_block(x, [	{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
									{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], do_skip=False)
	loss_yolo_2 = YoloLayer(anchors[6:12], [2*num for num in max_grid], batch_size, warmup_batches, ignore_thresh, grid_scales[1], obj_scale, noobj_scale, xywh_scale, class_scale)([input_image, pred_yolo_2, true_yolo_2, true_boxes])
	x = _conv_block(x, [			{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}], do_skip=False)
	x = UpSampling2D(2)(x)
	x = concatenate([x, skip_36])
	pred_yolo_3 = _conv_block(x, [	{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},
									{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},
									{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},
									{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},
									{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
									{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
									{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], do_skip=False)
	loss_yolo_3 = YoloLayer(anchors[:6], [4*num for num in max_grid], batch_size, warmup_batches, ignore_thresh, grid_scales[2], obj_scale, noobj_scale, xywh_scale, class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes])
	train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3])
	infer_model = Model(input_image, [pred_yolo_1, pred_yolo_2, pred_yolo_3])
	return [train_model, infer_model]

def dummy_loss(y_true, y_pred):
	return tf.sqrt(tf.reduce_sum(y_pred))

def multi_gpu_model(model, gpus):
	if isinstance(gpus, (list, tuple)):
		num_gpus = len(gpus)
		target_gpu_ids = gpus
	else:
		num_gpus = gpus
		target_gpu_ids = range(num_gpus)
	def get_slice(data, i, parts):
		shape = tf.shape(data)
		batch_size = shape[:1]
		input_shape = shape[1:]
		step = batch_size // parts
		if i == num_gpus - 1:
			size = batch_size - step * i
		else:
			size = step
		size = tf.concat([size, input_shape], axis=0)
		stride = tf.concat([step, input_shape * 0], axis=0)
		start = stride * i
		return tf.slice(data, start, size)
	all_outputs = []
	for i in range(len(model.outputs)):
		all_outputs.append([])
	for i, gpu_id in enumerate(target_gpu_ids):
		with tf.device('/gpu:%d' % gpu_id):
			with tf.name_scope('replica_%d' % gpu_id):
				inputs = []
				for x in model.inputs:
					input_shape = tuple(x.get_shape().as_list())[1:]
					slice_i = Lambda(get_slice, output_shape=input_shape, arguments={'i': i, 'parts': num_gpus})(x)
					inputs.append(slice_i)
				outputs = model(inputs)
				if not isinstance(outputs, list):
					outputs = [outputs]
				for o in range(len(outputs)):
					all_outputs[o].append(outputs[o])
	with tf.device('/cpu:0'):
		merged = []
		for name, outputs in zip(model.output_names, all_outputs):
			merged.append(concatenate(outputs, axis=0, name=name))
		return Model(model.inputs, merged)

def get_color(label):
	if label < len(colors):
		return colors[label]
	else:
		print('Label {} has no color, returning default.'.format(label))
		return (0, 255, 0)

colors = 	[[31 , 0   , 255] ,
			[0   , 159 , 255] ,
			[255 , 95  , 0]   ,
			[255 , 19  , 0]   ,
			[255 , 0   , 0]   ,
			[255 , 38  , 0]   ,
			[0   , 255 , 25]  ,
			[255 , 0   , 133] ,
			[255 , 172 , 0]   ,
			[108 , 0   , 255] ,
			[0   , 82  , 255] ,
			[0   , 255 , 6]   ,
			[255 , 0   , 152] ,
			[223 , 0   , 255] ,
			[12  , 0   , 255] ,
			[0   , 255 , 178] ,
			[108 , 255 , 0]   ,
			[184 , 0   , 255] ,
			[255 , 0   , 76]  ,
			[146 , 255 , 0]   ,
			[51  , 0   , 255] ,
			[0   , 197 , 255] ,
			[255 , 248 , 0]   ,
			[255 , 0   , 19]  ,
			[255 , 0   , 38]  ,
			[89  , 255 , 0]   ,
			[127 , 255 , 0]   ,
			[255 , 153 , 0]   ,
			[0   , 255 , 255] ,
			[0   , 255 , 216] ,
			[0   , 255 , 121] ,
			[255 , 0   , 248] ,
			[70  , 0   , 255] ,
			[0   , 255 , 159] ,
			[0   , 216 , 255] ,
			[0   , 6   , 255] ,
			[0   , 63  , 255] ,
			[31  , 255 , 0]   ,
			[255 , 57  , 0]   ,
			[255 , 0   , 210] ,
			[0   , 255 , 102] ,
			[242 , 255 , 0]   ,
			[255 , 191 , 0]   ,
			[0   , 255 , 63]  ,
			[255 , 0   , 95]  ,
			[146 , 0   , 255] ,
			[184 , 255 , 0]   ,
			[255 , 114 , 0]   ,
			[0   , 255 , 235] ,
			[255 , 229 , 0]   ,
			[0   , 178 , 255] ,
			[255 , 0   , 114] ,
			[255 , 0   , 57]  ,
			[0   , 140 , 255] ,
			[0   , 121 , 255] ,
			[12  , 255 , 0]   ,
			[255 , 210 , 0]   ,
			[0   , 255 , 44]  ,
			[165 , 255 , 0]   ,
			[0   , 25  , 255] ,
			[0   , 255 , 140] ,
			[0   , 101 , 255] ,
			[0   , 255 , 82]  ,
			[223 , 255 , 0]   ,
			[242 , 0   , 255] ,
			[89  , 0   , 255] ,
			[165 , 0   , 255] ,
			[70  , 255 , 0]   ,
			[255 , 0   , 172] ,
			[255 , 76  , 0]   ,
			[203 , 255 , 0]   ,
			[204 , 0   , 255] ,
			[255 , 0   , 229] ,
			[255 , 133 , 0]   ,
			[127 , 0   , 255] ,
			[0   , 235 , 255] ,
			[0   , 255 , 197] ,
			[255 , 0   , 191] ,
			[0   , 44  , 255] ,
			[50  , 255 , 0]]

class BoundBox:
	def __init__(self, xmin, ymin, xmax, ymax, c = None, classes = None):
		self.xmin = xmin
		self.ymin = ymin
		self.xmax = xmax
		self.ymax = ymax
		self.c    = c
		self.classes = classes
		self.label = -1
		self.score = -1
	def get_label(self):
		if self.label == -1:
			self.label = np.argmax(self.classes)
		print(self.xmin, self.ymin, self.xmax, self.ymax, config['model']['labels'][self.label])
		return self.label
	def get_score(self):
		if self.score == -1:
			self.score = self.classes[self.get_label()]
		return self.score

def _interval_overlap(interval_a, interval_b):
	x1, x2 = interval_a
	x3, x4 = interval_b
	if x3 < x1:
		if x4 < x1: return 0
		else: return min(x2,x4) - x1
	else:
		if x2 < x3: return 0
		else: return min(x2,x4) - x3

def bbox_iou(box1, box2):
	intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
	intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
	intersect = intersect_w * intersect_h
	w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
	w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
	union = w1*h1 + w2*h2 - intersect
	return float(intersect) / union

def draw_boxes(image, boxes, labels, obj_thresh, quiet=True):
	for box in boxes:
		label_str = ''
		label = -1
		for i in range(len(labels)):
			if box.classes[i] > obj_thresh:
				if label_str != '': label_str += ', '
				label_str += (labels[i] + ' ' + str(round(box.get_score()*100, 2)) + '%')
				label = i
			if not quiet: print(label_str)
		if label >= 0:
			text_size = cv2.getTextSize(label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-3 * image.shape[0], 5)
			width, height = text_size[0][0], text_size[0][1]
			region = np.array([[box.xmin-3, box.ymin], [box.xmin-3, box.ymin-height-26], [box.xmin+width+13, box.ymin-height-26], [box.xmin+width+13, box.ymin]], dtype='int32')
			cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=3)
			cv2.fillPoly(img=image, pts=[region], color=get_color(label))
			cv2.putText(img=image, text=label_str, org=(box.xmin+13, box.ymin - 13), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1e-3 * image.shape[0], color=(0,0,0), thickness=2)
	return image

def _sigmoid(x):
	return scipy.special.expit(x)
 
def makedirs(path):
	try:
		os.makedirs(path)
	except OSError:
		if not os.path.isdir(path):
			raise

def evaluate(model, generator, iou_threshold=0.5, obj_thresh=0.5, nms_thresh=0.45, net_h=416, net_w=416, save_path=None):
	all_detections      = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
	all_annotations     = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
	for i in range(generator.size()):
		raw_image = [generator.load_image(i)]
		pred_boxes = get_yolo_boxes(model, raw_image, net_h, net_w, generator.get_anchors(), obj_thresh, nms_thresh)[0]
		score = np.array([box.get_score() for box in pred_boxes])
		pred_labels = np.array([box.label for box in pred_boxes])
		if len(pred_boxes) > 0:
			pred_boxes = np.array([[box.xmin, box.ymin, box.xmax, box.ymax, box.get_score()] for box in pred_boxes]) 
		else:
			pred_boxes = np.array([[]])
		score_sort  = np.argsort(-score)
		pred_labels = pred_labels[score_sort]
		pred_boxes  = pred_boxes[score_sort]
		for label in range(generator.num_classes()):
			all_detections[i][label] = pred_boxes[pred_labels == label, :]
		annotations = generator.load_annotation(i)
		for label in range(generator.num_classes()):
			all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
	average_precisions = {}
	for label in range(generator.num_classes()):
		false_positives = np.zeros((0,))
		true_positives  = np.zeros((0,))
		scores          = np.zeros((0,))
		num_annotations = 0.0
		for i in range(generator.size()):
			detections              = all_detections[i][label]
			annotations             = all_annotations[i][label]
			num_annotations        += annotations.shape[0]
			detected_annotations    = []
			for d in detections:
				scores = np.append(scores, d[4])
				if annotations.shape[0] == 0:
					false_positives = np.append(false_positives, 1)
					true_positives  = np.append(true_positives, 0)
					continue
				overlaps            = compute_overlap(np.expand_dims(d, axis=0), annotations)
				assigned_annotation = np.argmax(overlaps, axis=1)
				max_overlap         = overlaps[0, assigned_annotation]
				if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
					false_positives = np.append(false_positives, 0)
					true_positives  = np.append(true_positives, 1)
					detected_annotations.append(assigned_annotation)
				else:
					false_positives = np.append(false_positives, 1)
					true_positives  = np.append(true_positives, 0)
		if num_annotations == 0:
			average_precisions[label] = 0
			continue
		indices         = np.argsort(-scores)
		false_positives = false_positives[indices]
		true_positives  = true_positives[indices]
		false_positives = np.cumsum(false_positives)
		true_positives  = np.cumsum(true_positives)
		recall          = true_positives / num_annotations
		precision       = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
		average_precision           = compute_ap(recall, precision)
		average_precisions[label]   = average_precision
	return average_precisions

def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
	if (float(net_w)/image_w) < (float(net_h)/image_h):
		new_w = net_w
		new_h = (image_h*net_w)/image_w
	else:
		new_h = net_w
		new_w = (image_w*net_h)/image_h
	for i in range(len(boxes)):
		x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
		y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
		boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
		boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
		boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
		boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)

def do_nms(boxes, nms_thresh):
	if len(boxes) > 0:
		nb_class = len(boxes[0].classes)
	else:
		return
	for c in range(nb_class):
		sorted_indices = np.argsort([-box.classes[c] for box in boxes])
		for i in range(len(sorted_indices)):
			index_i = sorted_indices[i]
			if boxes[index_i].classes[c] == 0: continue
			for j in range(i+1, len(sorted_indices)):
				index_j = sorted_indices[j]
				if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
					boxes[index_j].classes[c] = 0

def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
	grid_h, grid_w = netout.shape[:2]
	nb_box = 3
	netout = netout.reshape((grid_h, grid_w, nb_box, -1))
	nb_class = netout.shape[-1] - 5
	boxes = []
	netout[..., :2]     = _sigmoid(netout[..., :2])
	netout[..., 4]      = _sigmoid(netout[..., 4])
	netout[..., 5:]     = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
	netout[..., 5:]    *= netout[..., 5:] > obj_thresh
	for i in range(grid_h*grid_w):
		row = i // grid_w
		col = i % grid_w
		for b in range(nb_box):
			objectness = netout[row, col, b, 4]
			if(objectness <= obj_thresh): continue
			x, y, w, h = netout[row,col,b,:4]
			x = (col + x) / grid_w
			y = (row + y) / grid_h
			w = anchors[2 * b + 0] * np.exp(w) / net_w
			h = anchors[2 * b + 1] * np.exp(h) / net_h
			classes = netout[row,col,b,5:]
			box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
			boxes.append(box)
	return boxes

def preprocess_input(image, net_h, net_w):
	new_h, new_w, _ = image.shape
	if (float(net_w)/new_w) < (float(net_h)/new_h):
		new_h = (new_h * net_w)//new_w
		new_w = net_w
	else:
		new_w = (new_w * net_h)//new_h
		new_h = net_h
	resized = cv2.resize(image[:,:,::-1]/255., (new_w, new_h))
	new_image = np.ones((net_h, net_w, 3)) * 0.5
	new_image[(net_h-new_h)//2:(net_h+new_h)//2, (net_w-new_w)//2:(net_w+new_w)//2, :] = resized
	new_image = np.expand_dims(new_image, 0)
	return new_image

def normalize(image):
	return image/255.

def get_yolo_boxes(model, images, net_h, net_w, anchors, obj_thresh, nms_thresh):
	image_h, image_w, _ = images[0].shape
	nb_images           = len(images)
	batch_input         = np.zeros((nb_images, net_h, net_w, 3))
	for i in range(nb_images):
		batch_input[i] = preprocess_input(images[i], net_h, net_w)
	batch_output = model.predict_on_batch(batch_input)
	batch_boxes  = [None]*nb_images
	for i in range(nb_images):
		yolos = [batch_output[0][i], batch_output[1][i], batch_output[2][i]]
		boxes = []
		for j in range(len(yolos)):
			yolo_anchors = anchors[(2-j)*6:(3-j)*6]
			boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w)
		correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
		do_nms(boxes, nms_thresh)
		batch_boxes[i] = boxes
	return batch_boxes

def compute_overlap(a, b):
	area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
	iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
	ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
	iw = np.maximum(iw, 0)
	ih = np.maximum(ih, 0)
	ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
	ua = np.maximum(ua, np.finfo(float).eps)
	intersection = iw * ih
	return intersection / ua

def compute_ap(recall, precision):
	mrec = np.concatenate(([0.], recall, [1.]))
	mpre = np.concatenate(([0.], precision, [0.]))
	for i in range(mpre.size - 1, 0, -1):
		mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
	i = np.where(mrec[1:] != mrec[:-1])[0]
	ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
	return ap

def _softmax(x, axis=-1):
	x = x - np.amax(x, axis, keepdims=True)
	e_x = np.exp(x)
	return e_x / e_x.sum(axis, keepdims=True)

class BatchGenerator(Sequence):
	def __init__(self,
		instances,
		anchors,
		labels,
		downsample=32,
		max_box_per_image=30,
		batch_size=1,
		min_net_size=320,
		max_net_size=608,
		shuffle=True,
		jitter=True,
		norm=None):
		self.instances          = instances
		self.batch_size         = batch_size
		self.labels             = labels
		self.downsample         = downsample
		self.max_box_per_image  = max_box_per_image
		self.min_net_size       = (min_net_size // self.downsample) * self.downsample
		self.max_net_size       = (max_net_size // self.downsample) * self.downsample
		self.shuffle            = shuffle
		self.jitter             = jitter
		self.norm               = norm
		self.anchors            = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)]
		self.net_h              = 416
		self.net_w              = 416
		if shuffle: np.random.shuffle(self.instances)
	def __len__(self):
		return int(np.ceil(float(len(self.instances))/self.batch_size))
	def __getitem__(self, idx):
		net_h, net_w = self._get_net_size(idx)
		base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample
		l_bound = idx*self.batch_size
		r_bound = (idx+1)*self.batch_size
		if r_bound > len(self.instances):
			r_bound = len(self.instances)
			l_bound = r_bound - self.batch_size
		x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3))
		t_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.max_box_per_image, 4))
		yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h, 1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels)))
		yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h, 2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels)))
		yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h, 4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels)))
		yolos = [yolo_3, yolo_2, yolo_1]
		dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
		dummy_yolo_2 = np.zeros((r_bound - l_bound, 1))
		dummy_yolo_3 = np.zeros((r_bound - l_bound, 1))
		instance_count = 0
		true_box_index = 0
		for train_instance in self.instances[l_bound:r_bound]:
			img, all_objs = self._aug_image(train_instance, net_h, net_w)
			for obj in all_objs:
				max_anchor  = None
				max_index   = -1
				max_iou     = -1
				shifted_box = BoundBox(0, 0, obj['xmax']-obj['xmin'], obj['ymax']-obj['ymin'])
				for i in range(len(self.anchors)):
					anchor  = self.anchors[i]
					iou     = bbox_iou(shifted_box, anchor)
					if max_iou < iou:
						max_anchor  = anchor
						max_index   = i
						max_iou     = iou
				yolo = yolos[max_index//3]
				grid_h, grid_w = yolo.shape[1:3]
				center_x = .5*(obj['xmin'] + obj['xmax'])
				center_x = center_x / float(net_w) * grid_w
				center_y = .5*(obj['ymin'] + obj['ymax'])
				center_y = center_y / float(net_h) * grid_h
				w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax))
				h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax))
				box = [center_x, center_y, w, h]
				obj_indx = self.labels.index(obj['name'])
				grid_x = int(np.floor(center_x))
				grid_y = int(np.floor(center_y))
				yolo[instance_count, grid_y, grid_x, max_index%3]      = 0
				yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
				yolo[instance_count, grid_y, grid_x, max_index%3, 4  ] = 1.
				yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1
				true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']]
				t_batch[instance_count, 0, 0, 0, true_box_index] = true_box
				true_box_index += 1
				true_box_index = true_box_index % self.max_box_per_image
			if self.norm != None:
				x_batch[instance_count] = self.norm(img)
			else:
				for obj in all_objs:
					cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
					cv2.putText(img, obj['name'], (obj['xmin']+2, obj['ymin']+12), 0, 1.2e-3 * img.shape[0], (0,255,0), 2)
				x_batch[instance_count] = img
			instance_count += 1
		return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
	def _get_net_size(self, idx):
		if idx%10 == 0:
			net_size = self.downsample*np.random.randint(self.min_net_size/self.downsample, self.max_net_size/self.downsample+1)
			print('resizing: ', net_size, net_size)
			self.net_h, self.net_w = net_size, net_size
		return self.net_h, self.net_w
	def _aug_image(self, instance, net_h, net_w):
		image_name = instance['filename']
		image = cv2.imread(image_name)
		if image is None: print('Cannot find ', image_name)
		image = image[:,:,::-1]
		image_h, image_w, _ = image.shape
		dw = self.jitter * image_w;
		dh = self.jitter * image_h;
		new_ar = (image_w + np.random.uniform(-dw, dw)) / (image_h + np.random.uniform(-dh, dh));
		scale = np.random.uniform(0.25, 2);
		if (new_ar < 1):
			new_h = int(scale * net_h);
			new_w = int(net_h * new_ar);
		else:
			new_w = int(scale * net_w);
			new_h = int(net_w / new_ar);
		dx = int(np.random.uniform(0, net_w - new_w));
		dy = int(np.random.uniform(0, net_h - new_h));
		im_sized = apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy)
		im_sized = random_distort_image(im_sized)
		flip = np.random.randint(2)
		im_sized = random_flip(im_sized, flip)
		all_objs = correct_bounding_boxes(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h)
		return im_sized, all_objs
	def on_epoch_end(self):
		if self.shuffle: np.random.shuffle(self.instances)
	def num_classes(self):
		return len(self.labels)
	def size(self):
		return len(self.instances)
	def get_anchors(self):
		anchors = []
		for anchor in self.anchors:
			anchors += [anchor.xmax, anchor.ymax]
		return anchors
	def load_annotation(self, i):
		annots = []
		for obj in self.instances[i]['object']:
			annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.labels.index(obj['name'])]
			annots += [annot]
		if len(annots) == 0: annots = [[]]
		return np.array(annots)
	def load_image(self, i):
		return cv2.imread(self.instances[i]['filename'])
def parse_voc_annotation(ann_dir, img_dir, cache_name, labels=[]):
	if os.path.exists(cache_name):
		with open(cache_name, 'rb') as handle:
			cache = pickle.load(handle)
		all_insts, seen_labels = cache['all_insts'], cache['seen_labels']
	else:
		all_insts = []
		seen_labels = {}
		for ann in sorted(os.listdir(ann_dir)):
			img = {'object':[]}
			try:
				tree = ET.parse(ann_dir + ann)
			except Exception as e:
				print(e)
				print('Ignore this bad annotation: ' + ann_dir + ann)
				continue
			for elem in tree.iter():
				if 'filename' in elem.tag:
					img['filename'] = img_dir + elem.text
				if 'width' in elem.tag:
					img['width'] = int(elem.text)
				if 'height' in elem.tag:
					img['height'] = int(elem.text)
				if 'object' in elem.tag or 'part' in elem.tag:
					obj = {}
					for attr in list(elem):
						if 'name' in attr.tag:
							obj['name'] = attr.text
							if obj['name'] in seen_labels:
								seen_labels[obj['name']] += 1
							else: seen_labels[obj['name']] = 1
							if len(labels) > 0 and obj['name'] not in labels: break
							else:
								img['object'] += [obj]
						if 'bndbox' in attr.tag:
							for dim in list(attr):
								if 'xmin' in dim.tag: obj['xmin'] = int(round(float(dim.text)))
								if 'ymin' in dim.tag: obj['ymin'] = int(round(float(dim.text)))
								if 'xmax' in dim.tag: obj['xmax'] = int(round(float(dim.text)))
								if 'ymax' in dim.tag: obj['ymax'] = int(round(float(dim.text)))
			if len(img['object']) > 0:
				all_insts += [img]
		cache = {'all_insts': all_insts, 'seen_labels': seen_labels}
		with open(cache_name, 'wb') as handle:
			pickle.dump(cache, handle, protocol=pickle.HIGHEST_PROTOCOL)
	return all_insts, seen_labels

class CustomTensorBoard(TensorBoard):
	'''
	To log the loss after each batch
	'''
	def __init__(self, log_every=1, **kwargs):
		super(CustomTensorBoard, self).__init__(**kwargs)
		self.log_every = log_every
		self.counter = 0
	def on_batch_end(self, batch, logs=None):
		self.counter+=1
		if self.counter%self.log_every==0:
			for name, value in logs.items():
				if name in ['batch', 'size']:
					continue
				summary = tf.Summary()
				summary_value = summary.value.add()
				summary_value.simple_value = value.item()
				summary_value.tag = name
				self.writer.add_summary(summary, self.counter)
			self.writer.flush()
		super(CustomTensorBoard, self).on_batch_end(batch, logs)

class CustomModelCheckpoint(ModelCheckpoint):
	'''
	To save the template model, not the multi-GPU model
	'''
	def __init__(self, model_to_save, **kwargs):
		super(CustomModelCheckpoint, self).__init__(**kwargs)
		self.model_to_save = model_to_save
	def on_epoch_end(self, epoch, logs=None):
		logs = logs or {}
		self.epochs_since_last_save += 1
		if self.epochs_since_last_save >= self.period:
			self.epochs_since_last_save = 0
			filepath = self.filepath.format(epoch=epoch + 1, **logs)
			if self.save_best_only:
				current = logs.get(self.monitor)
				if current is None:
					warnings.warn('Can save best model only with %s available, '
									'skipping.' % (self.monitor), RuntimeWarning)
				else:
					if self.monitor_op(current, self.best):
						if self.verbose > 0:
							print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
									' saving model to %s' % (epoch + 1, self.monitor, self.best, current, filepath))
						self.best = current
						if self.save_weights_only:
							self.model_to_save.save_weights(filepath, overwrite=True)
						else:
							self.model_to_save.save(filepath, overwrite=True)
					else:
						if self.verbose > 0:
							print('\nEpoch %05d: %s did not improve from %0.5f' % (epoch + 1, self.monitor, self.best))
			else:
				if self.verbose > 0:
					print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath))
				if self.save_weights_only:
					self.model_to_save.save_weights(filepath, overwrite=True)
				else:
					self.model_to_save.save(filepath, overwrite=True)
		super(CustomModelCheckpoint, self).on_batch_end(epoch, logs)

def create_training_instances(train_annot_folder, train_image_folder, train_cache, valid_annot_folder, valid_image_folder, valid_cache, labels,):
	train_ints, train_labels = parse_voc_annotation(train_annot_folder, train_image_folder, train_cache, labels)
	print('valid_annot_folder not exists. Spliting the trainining set.')
	train_valid_split = int(0.8*len(train_ints))
	np.random.seed(0)
	np.random.shuffle(train_ints)
	np.random.seed()
	valid_ints = train_ints[train_valid_split:]
	train_ints = train_ints[:train_valid_split]
	if len(labels) > 0:
		overlap_labels = set(labels).intersection(set(train_labels.keys()))
		print('Seen labels: \t'  + str(train_labels) + '\n')
		print('Given labels: \t' + str(labels))
		if len(overlap_labels) < len(labels):
			print('Some labels have no annotations! Please revise the list of labels in the config.json.')
			return None, None, None
	else:
		print('No labels are provided. Train on all seen labels.')
		print(train_labels)
		labels = train_labels.keys()
	max_box_per_image = max([len(inst['object']) for inst in (train_ints + valid_ints)])
	return train_ints, valid_ints, sorted(labels), max_box_per_image

def create_callbacks(saved_weights_name, tensorboard_logs, model_to_save):
	makedirs(tensorboard_logs)
	early_stop = EarlyStopping(
		monitor         = 'loss',
		min_delta       = 0.01,
		patience        = 5,
		mode            = 'min',
		verbose         = 1)
	checkpoint = CustomModelCheckpoint(
		model_to_save   = model_to_save,
		filepath        = saved_weights_name,
		monitor         = 'loss',
		verbose         = 1,
		save_best_only  = True,
		mode            = 'min',
		period          = 1)
	reduce_on_plateau = ReduceLROnPlateau(
		monitor         = 'loss',
		factor          = .1,
		patience        = 2,
		verbose         = 1,
		mode            = 'min',
		epsilon         = 0.01,
		cooldown        = 0,
		min_lr          = 0)
	tensorboard = CustomTensorBoard(
		log_dir         = tensorboard_logs,
		write_graph     = True,
		write_images    = True,)
	return [early_stop, checkpoint, reduce_on_plateau, tensorboard]

def create_model(
	nb_class,
	anchors,
	max_box_per_image,
	max_grid, batch_size,
	warmup_batches,
	ignore_thresh,
	multi_gpu,
	saved_weights_name,
	lr,
	grid_scales,
	obj_scale,
	noobj_scale,
	xywh_scale,
	class_scale):
	if multi_gpu > 1:
		with tf.device('/cpu:0'):
			template_model, infer_model = create_yolov3_model(
				nb_class            = nb_class,
				anchors             = anchors,
				max_box_per_image   = max_box_per_image,
				max_grid            = max_grid,
				batch_size          = batch_size//multi_gpu,
				warmup_batches      = warmup_batches,
				ignore_thresh       = ignore_thresh,
				grid_scales         = grid_scales,
				obj_scale           = obj_scale,
				noobj_scale         = noobj_scale,
				xywh_scale          = xywh_scale,
				class_scale         = class_scale)
	else:
		template_model, infer_model = create_yolov3_model(
			nb_class                = nb_class,
			anchors                 = anchors,
			max_box_per_image       = max_box_per_image,
			max_grid                = max_grid,
			batch_size              = batch_size,
			warmup_batches          = warmup_batches,
			ignore_thresh           = ignore_thresh,
			grid_scales             = grid_scales,
			obj_scale               = obj_scale,
			noobj_scale             = noobj_scale,
			xywh_scale              = xywh_scale,
			class_scale             = class_scale)
	if os.path.exists(saved_weights_name):
		print('\nLoading pretrained weights.\n')
		template_model.load_weights(saved_weights_name)
	if multi_gpu > 1:
		train_model = multi_gpu_model(template_model, gpus=multi_gpu)
	else:
		train_model = template_model
	optimizer = Adam(lr=lr, clipnorm=0.001)
	train_model.compile(loss=dummy_loss, optimizer=optimizer)
	return train_model, infer_model

def main_train():
	train_ints, valid_ints, labels, max_box_per_image = create_training_instances(
		config['train']['train_annot_folder'],
		config['train']['train_image_folder'],
		config['train']['cache_name'],
		config['valid']['valid_annot_folder'],
		config['valid']['valid_image_folder'],
		config['valid']['cache_name'],
		config['model']['labels'])
	print('\nTraining on: \t' + str(labels) + '\n')
	train_generator = BatchGenerator(
		instances           = train_ints,
		anchors             = config['model']['anchors'],
		labels              = labels,
		downsample          = 32,
		max_box_per_image   = max_box_per_image,
		batch_size          = config['train']['batch_size'],
		min_net_size        = config['model']['min_input_size'],
		max_net_size        = config['model']['max_input_size'],
		shuffle             = True,
		jitter              = 0.3,
		norm                = normalize)
	valid_generator = BatchGenerator(
		instances           = valid_ints,
		anchors             = config['model']['anchors'],
		labels              = labels,
		downsample          = 32,
		max_box_per_image   = max_box_per_image,
		batch_size          = config['train']['batch_size'],
		min_net_size        = config['model']['min_input_size'],
		max_net_size        = config['model']['max_input_size'],
		shuffle             = True,
		jitter              = 0.0,
		norm                = normalize)
	if os.path.exists(config['train']['saved_weights_name']):
		config['train']['warmup_epochs'] = 0
	warmup_batches = config['train']['warmup_epochs'] * (config['train']['train_times']*len(train_generator))
	os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus']
	multi_gpu = len(config['train']['gpus'].split(','))
	train_model, infer_model = create_model(
		nb_class             = len(labels),
		anchors              = config['model']['anchors'],
		max_box_per_image    = max_box_per_image,
		max_grid             = [config['model']['max_input_size'], config['model']['max_input_size']],
		batch_size           = config['train']['batch_size'],
		warmup_batches       = warmup_batches,
		ignore_thresh        = config['train']['ignore_thresh'],
		multi_gpu            = multi_gpu,
		saved_weights_name   = config['train']['saved_weights_name'],
		lr                   = config['train']['learning_rate'],
		grid_scales          = config['train']['grid_scales'],
		obj_scale            = config['train']['obj_scale'],
		noobj_scale          = config['train']['noobj_scale'],
		xywh_scale           = config['train']['xywh_scale'],
		class_scale          = config['train']['class_scale'],)
	callbacks = create_callbacks(config['train']['saved_weights_name'], config['train']['tensorboard_dir'], infer_model)
	train_model.fit_generator(
		generator           = train_generator,
		steps_per_epoch     = len(train_generator) * config['train']['train_times'],
		epochs              = config['train']['nb_epochs'] + config['train']['warmup_epochs'],
		verbose             = 1,
		callbacks           = callbacks,
		workers             = 4,
		max_queue_size      = 8)
	if multi_gpu > 1:
		infer_model = load_model(config['train']['saved_weights_name'])
	average_precisions = evaluate(infer_model, valid_generator)
	for label, average_precision in average_precisions.items():
		print(labels[label] + ': {:.4f}'.format(average_precision))
	print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))

def main_predict(WEIGHTS, FILENAME, output_path):
	config_path		= config
	input_path		= FILENAME
	net_h, net_w = 416, 416
	obj_thresh, nms_thresh = 0.5, 0.45
	os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus']
	infer_model = load_model(WEIGHTS)
	if 'webcam' in input_path:
		video_reader = cv2.VideoCapture(0)
		batch_size   = 1
		images       = []
		while True:
			ret_val, image = video_reader.read()
			if ret_val == True: images += [image]
			if (len(images)==batch_size) or (ret_val==False and len(images)>0):
				batch_boxes = get_yolo_boxes(infer_model, images, net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh)
				for i in range(len(images)):
					draw_boxes(images[i], batch_boxes[i], config['model']['labels'], obj_thresh)
					cv2.imshow('video with bboxes', images[i])
				images = []
			if cv2.waitKey(1) == 27:
				break 
		cv2.destroyAllWindows()
	elif input_path[-4:] == '.mp4':
		video_out = output_path + input_path.split('/')[-1]
		video_reader = cv2.VideoCapture(input_path)
		nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
		frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
		frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
		video_writer = cv2.VideoWriter(video_out, cv2.VideoWriter_fourcc(*'MPEG'), 50.0, (frame_w, frame_h))
		batch_size  = 1
		images      = []
		start_point = 0
		show_window = False
		for i in tqdm(range(nb_frames)):
			_, image = video_reader.read()
			if (float(i+1)/nb_frames) > start_point/100.:
				images += [image]
				if (i%batch_size == 0) or (i == (nb_frames-1) and len(images) > 0):
					batch_boxes = get_yolo_boxes(infer_model, images, net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh)
					for i in range(len(images)):
						draw_boxes(images[i], batch_boxes[i], config['model']['labels'], obj_thresh)
						if show_window: cv2.imshow('video with bboxes', images[i])
						video_writer.write(images[i]) 
					images = []
				if show_window and cv2.waitKey(1) == 27: break
		if show_window: cv2.destroyAllWindows()
		video_reader.release()
		video_writer.release()
	else:
		image_paths = []
		if os.path.isdir(input_path):
			for inp_file in os.listdir(input_path):
				image_paths += [input_path + inp_file]
		else:
			image_paths += [input_path]
		image_paths = [inp_file for inp_file in image_paths if (inp_file[-4:] in ['.jpg', '.png', 'JPEG'])]
		for image_path in image_paths:
			image = cv2.imread(image_path)
			print(image_path)
			boxes = get_yolo_boxes(infer_model, [image], net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh)[0]
			draw_boxes(image, boxes, config['model']['labels'], obj_thresh)
			cv2.imwrite(output_path + image_path.split('/')[-1], np.uint8(image))

main_train()

valid_annot_folder not exists. Spliting the trainining set.
Seen labels: 	{'Paramecium': 63}

Given labels: 	['Paramecium']

Training on: 	['Paramecium']

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use `tf.cast` instead.









resizing:  320 320
Epoch 1/1000
 6/96 [>.............................] - ETA: 15:56 - loss: 215.0587 - yolo_layer_1_loss: 25.6083 - yolo_layer_2_loss: 61.8071 - yolo_layer_3_loss: 127.6433resizing:  416 416
11/96 [==>...........................] - ETA: 9:10 - loss: 199.9084 - yolo_layer_1_loss: 23.4305 - yolo_layer_2_loss: 57.5256 - yolo_layer_3_loss: 118.9524 resizing:  352 352
17/96 [====>.........................] - ETA: 6:35 - loss: 205.4871 - yolo_layer_1_loss: 23.8153 - yolo_layer_2_loss: 58.8670 - yolo_layer_3_loss: 122.8048resizing:  448 448

Epoch 00001: loss improved from inf to 154.24032, saving model to ./weights.h5
Epoch 2/1000
 5/96 [>.............................] - ETA: 1:36 - loss: 83.4476 - yolo_layer_1_loss: 7.3494 - yolo_layer_2_loss: 22.9131 - yolo_layer_3_loss: 53.1851resizing:  288 288
11/96 [==>...........................] - ETA: 1:55 - loss: 77.0975 - yolo_layer_1_loss: 6.5236 - yolo_layer_2_loss: 21.4661 - yolo_layer_3_loss: 49.1079resizing:  448 448
17/9

KeyboardInterrupt: ignored