In [1]:
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import BatchNormalization, Conv2D, Input, ZeroPadding2D, LeakyReLU, UpSampling2D

In [2]:
def parse_cfg(cfgfile):
    with open(cfgfile, 'r') as file:
        # to remove unnecessary characters like '\n' and '#'
        # and lines will have all lines of file yolov3.cfg
        lines = [line.rstrip('\n') for line in file if line != '\n' and line[0] != '#']    
    holder = {}
    blocks = []
    # loop over each line and store each key value pairs in holder dict holder
    # and then that dictionary is stored in blocks list
    for line in lines:
        if line[0] == '[':
            line = 'type=' + line[1:-1].rstrip()
            if len(holder) != 0:
                blocks.append(holder)
                holder = {}
        key, value = line.split('=')
        holder[key.rstrip()] = value.lstrip()
    blocks.append(holder)
    return blocks

In [3]:
parse_cfg('cfg/yolov3.cfg')

[{'type': 'net',
  'batch': '64',
  'subdivisions': '16',
  'width': '608',
  'height': '608',
  'channels': '3',
  'momentum': '0.9',
  'decay': '0.0005',
  'angle': '0',
  'saturation': '1.5',
  'exposure': '1.5',
  'hue': '.1',
  'learning_rate': '0.001',
  'burn_in': '1000',
  'max_batches': '500200',
  'policy': 'steps',
  'steps': '400000,450000',
  'scales': '.1,.1'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '32',
  'size': '3',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '64',
  'size': '3',
  'stride': '2',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '32',
  'size': '1',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '64',
  'size': '3',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'shortcut', 'from': '-3', 'activatio

In [4]:
def YOLOv3Net(cfgfile, model_size, num_classes, inputs):
    blocks = parse_cfg(cfgfile)
    
    outputs = {}
    output_filters = []
    filters = []
    out_pred = []
    scale = 0
    
    # The YOLOv3 has 5 layers types in general, they are: 
    # 1. Convolutional layer
    # 2. Upsample layer
    # 3. Route layer
    # 4. Shortcut layer
    # 5. Yolo layer
    for i, block in enumerate(blocks[1:]):
        
        # In YOLOv3, there are 2 convolutional layer types, i.e with and without batch normalization layer.
        if(block['type'] == 'convolutional'):
            
            activation = block['activation']
            filters = int(block['filters'])
            kernel_size = int(block['size'])
            strides = int(block['stride'])
            
            # If strides is greater than 1 then downsampling is performed,
            # hence need to adjust the padding
            if strides > 1:
                inputs = ZeroPadding2D(((1, 0), (1, 0)))(inputs)

            inputs = Conv2D(filters,
                            kernel_size,
                            strides=strides,
                            padding='valid' if strides > 1 else 'same',
                            name='conv_' + str(i),
                            use_bias=False if ("batch_normalize" in block) else True)(inputs)
            
            # The convolutional layer followed by a batch normalization layer uses a Leaky ReLU activation layer,
            # otherwise, it uses the linear activation by default.
            if "batch_normalize" in block:
                inputs = BatchNormalization(name='bnorm_'+str(i))(inputs)
            if activation == 'leaky':
                inputs = LeakyReLU(alpha=0.1, name='leaky_'+str(i))(inputs)
        
        # In YOLOv3, Upsampler layer performs upsampling of the previous fetaure map
        # using bilinear upsampling method.
        elif (block['type'] == 'upsample'):
            stride = int(block['stride'])
            inputs = UpSampling2D(stride)(inputs)
        
        # Route layer
        elif (block['type'] == 'route'):
            block['layers'] = block['layers'].split(',')
            start = int(block['layers'][0])
            # Check if attribute 'layers' has 1 value or 2
            # If it has 1 value such as -4, then we need to go backward 4 layers and then output
            # the feature map from that layer.
            # If it has 2 values such as -1 and 61, then we need to concatenate the feature map 
            # from a previous layer (-1) and the feature map from layer 61. 
            if len(block['layers']) > 1:
                end = int(block['layers'][1]) - i
                filters = output_filters[i + start] + output_filters[end]
                inputs = tf.concat([outputs[i + start], outputs[i + end]], axis = -1)
            else:
                filters = output_filters[i + start]
                inputs = outputs[i + start]
                
        # In Shortcut layer, we perform skip connection. If attribute 'from' has value -3, then
        # go backward 3 payers and take the feature map from that layer and add it with feature
        # map from previous layer.
        elif (block['type'] == 'shortcut'):
            from_ = int(block['from'])
            inputs = outputs[i - 1] + outputs[i + from_]
            
        # Yolo layer
        elif (block['type'] == 'yolo'):
            mask = block['mask'].split(',')
            mask = [int(x) for x in mask]
            anchors = block['anchors'].split(',')
            anchors = [int(a) for a in anchors]
            anchors = [(anchors[j], anchors[j+1]) for j in range(0,len(anchors),2)]
            anchors = [anchors[j] for j in mask]
           
                

In [None]:
X = tf.placeholder(dtype=tf.float32, shape=[None, 416, 416, 3])
model_size = (416, 416,3)
YOLOv3Net('cfg/yolov3.cfg',model_size,9,X)