<a href="https://colab.research.google.com/github/9aj/Computer-Vision-Notes/blob/main/yolov1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
# imports
import tensorflow as tf
import tensorflow.keras as K
from tensorflow.keras.layers import InputLayer, Conv2D, Activation, MaxPooling2D, Dense, Flatten, Reshape, Dropout, BatchNormalization
import cv2

print(tf.__version__)
print(cv2.__version__)

2.7.0
4.1.2


In [None]:
# datset download
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar

In [None]:
# extract dataset
!tar xvf VOCtrainval_06-Nov-2007.tar
!tar xvf VOCtest_06-Nov-2007.tar
!rm VOCtrainval_06-Nov-2007.tar
!rm VOCtest_06-Nov-2007.tar

Darknetv1 Model

In [20]:
class FCLayer(tf.keras.layers.Layer):
  def __init__(self, shape=(7,7,11)):
    super(FCLayer, self).__init__()
    self.target_shape = shape

  # serialisation not req, no cfg 

  def call(self, inputs):
    s = [self.target_shape[0], self.target_shape[1]]
    C = 1
    B = 2

    # [x,y,w,h,c,x,y,w,h,c,p] (7,7,11) tnsr

    class_probability = K.backend.reshape(
        inputs[:, :(s[0]*s[0]*C)],
        (K.backend.shape(inputs)[0],) + tuple([s[0], s[1], C])
    )

    confidence = K.backend.reshape(
        inputs[:, (s[0]*s[0]*C):((s[0]*s[0]*C) + s[0]*s[0]*B)],
        (K.backend.shape(inputs)[0],) + tuple([s[0], s[1], B])
    )

    bb = K.backend.reshape(
        inputs[:, ((s[0]*s[0]*C) + s[0]*s[0]*B):],
        (K.backend.shape(inputs)[0],) + tuple([s[0], s[1], B*4])
    )

    return K.backend.concatenate([class_probability, confidence, bb])

class YOLOv1(tf.keras.models.Sequential):
  def __init__(self):
    super(YOLOv1, self).__init__()
    self.inp_xy = 448
    self.inp_colour_channels = 3

  def leaky_relu_activation(alpha):
    return lambda x: maximum(0.1 * x, x)

  def build_darknet(self):
    # Input Tensor (448,448,3)
    self.add(InputLayer(input_shape = (self.inp_xy,self.inp_xy,self.inp_colour_channels)))
    
    # Conv Layers
    self.add(Conv2D(filters=64, kernel_size= (7, 7), strides=(2,2), padding = 'same'))
    self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    self.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

    self.add(Conv2D(filters=192, kernel_size= (3, 3), padding = 'same'))
    self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    self.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

    self.add(Conv2D(filters=128, kernel_size= (1, 1), padding = 'same'))
    self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    self.add(Conv2D(filters=256, kernel_size= (3, 3), padding = 'same'))
    self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    self.add(Conv2D(filters=256, kernel_size= (1, 1), padding = 'same'))
    self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    self.add(Conv2D(filters=512, kernel_size= (3, 3), padding = 'same'))
    self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    self.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

    # Alternating (3x3) & (1x1) Kernel Conv
    for i in range(0,4):
      self.add(Conv2D(filters=256, kernel_size= (1, 1), padding = 'same'))
      self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
      self.add(Conv2D(filters=512, kernel_size= (3, 3), padding = 'same'))
      self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    self.add(Conv2D(filters=512, kernel_size= (1, 1), padding = 'same'))
    self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    self.add(Conv2D(filters=1024, kernel_size= (3, 3), padding = 'same'))
    self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    self.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

    for i in range(0,2):
      self.add(Conv2D(filters=512, kernel_size= (1, 1), padding = 'same'))
      self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
      self.add(Conv2D(filters=1024, kernel_size= (3, 3), padding = 'same'))
      self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    
    self.add(Conv2D(filters=1024, kernel_size= (3, 3), padding = 'same'))
    self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    self.add(Conv2D(filters=1024, kernel_size= (3, 3), strides=(2,2), padding = 'same'))
    self.add(Activation(activation=K.layers.LeakyReLU(0.1)))

    # Fully Connected Layer
    self.add(Flatten())
    self.add(Dense(512))
    self.add(Dense(1024))
    self.add(Dropout(0.5))
    self.add(Activation(activation=K.layers.LeakyReLU(0.1)))
    self.add(FCLayer())
            


    

In [22]:
# Create TF Sequential
model = YOLOv1()

# Create Darknet Arch
model.build_darknet()

# Display Model
model.summary()

Model: "yol_ov1_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_90 (Conv2D)          (None, 224, 224, 64)      9472      
                                                                 
 activation_91 (Activation)  (None, 224, 224, 64)      0         
                                                                 
 max_pooling2d_16 (MaxPoolin  (None, 112, 112, 64)     0         
 g2D)                                                            
                                                                 
 conv2d_91 (Conv2D)          (None, 112, 112, 192)     110784    
                                                                 
 activation_92 (Activation)  (None, 112, 112, 192)     0         
                                                                 
 max_pooling2d_17 (MaxPoolin  (None, 56, 56, 192)      0         
 g2D)                                                    