# Faster R_CNN for Object Detection


In [None]:
import cv2 as cv
from matplotlib import pyplot as plt
import numpy as np
import os
import pandas as pd
import random
from skimage import io
from shutil import copyfile
import sys
import time

import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img,img_to_array

## Importing the necessary Datasets


In [None]:
# importing the dataset 
data_Set = "data_Set_satellite_imagery.csv"
annotated_dataset = "annotations_satellite_imagery.csv"
class_description_dataset = "class_description_data.csv"

In [None]:
images_boxable = pd.read_csv(data_Set)
print(images_boxable.head())

In [None]:
annotations_box = pd.read_csv(annotated_dataset)
print(annotations_box.head())

In [None]:
class_descriptions = pd.read_csv(class_description_dataset)
print(class_descriptions.head())

## plot_bbox - Function which is being annotating from the dataset


In [None]:
def plot_bbox(img_id): 
    img_url_exact_loc = images_boxable.loc[images_boxable['file_name'] == img_id]
    img_changed = img_url_exact_loc['file_name']
    img_changed_list = list(img_changed)
    img_changed_str = " ".join(map(str,img_changed_list))
    img=io.imread(img_changed_str)
    resized = cv.resize(img,(2500,2500))
    height,width,channel = img.shape
    print(f"Image:{img.shape}")
    bbox = annotations_box[annotations_box['image_url']==img_id]
    for index,row in bbox.iterrows():
        xmin = row['XMin']
        xmax = row['XMax']
        ymin = row['YMin']
        ymax = row['YMax']
#         xmin = int(xmin*width)
#         xmax= int(xmax*width)
#         ymin = int (ymin*height)
#         ymax = int(ymax*height)
        label_name = row['Label_id']
        class_series = class_descriptions[class_descriptions['Label_id']==label_name]
        class_label_name = class_series['Label_Name']
        class_name_changed = list(class_label_name)
        class_name = " ".join(map(str,class_name_changed))
        print(class_name)
        font = cv.FONT_HERSHEY_COMPLEX
        if class_name == 'Building':
            print(f"Coordinates:{xmin,ymin},{xmax,ymax}")
#             cv.rectangle(img,(xmax,ymax),(xmin,ymin),(255,0,0),1)
            cv.putText(img,class_name,(xmin,ymin-10),font,0.5,(255,0,0),2)
        elif class_name == 'Water':
            print(f"Coordinates:{xmin,ymin},{xmax,ymax}")
            #cv.rectangle(img,(xmin,ymin),(xmax,ymax),(255,0,0),1)
            cv.putText(img,class_name,(xmin,ymin-10),font,0.5,(0,0,255),2)
        elif class_name == 'Trees':
            print(f"Coordinates:{xmin,ymin},{xmax,ymax}")
#             cv.rectangle(img,(xmin,ymin),(xmax,ymax),(255,0,0),1)
            cv.putText(img,class_name,(xmin,ymin-10),font,0.5,(34,139,34),2)
        elif class_name == 'Empty Land':
            print(f"Coordinates:{xmin,ymin},{xmax,ymax}")
#             cv.rectangle(img,(xmin,ymin),(xmax,ymax),(255,0,0),1)
            cv.putText(img,class_name,(xmin,ymin),font,0.5,(255,255,0),2)
            
    plt.figure(figsize=(15,10))
    plt.title("Image with Bounding Box")
    plt.imshow(img)
    plt.axis("off")
    plt.show()
        
        

In [None]:
class_descriptions['Label_Name'].count()

## Partial VGG Convolutional Neural Networks

In [None]:
## Importing the necessary modules for VGG Convolution Neural Network

from sklearn.metrics import average_precision_score

from keras import backend as K
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from keras.layers import Flatten, Dense, Input, Conv2D, MaxPooling2D, Dropout
from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, TimeDistributed
from tensorflow.keras.layers import Layer, InputSpec
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
# from keras.objectives import categorical_crossentropy

from keras.models import Model
from keras.utils import generic_utils
from keras import initializers, regularizers

In [None]:
# Doing Partial VGG since we do want only the feature map

def partial_vgg(input_tensor=None):


    input_shape = (544, 509, 3)

    img_input = Input(shape=input_shape)
    
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
    print(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
    print(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
    print(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
    print(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
    # x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
    
    # We are not using fully connected layers (3 fc layers) as we need feature maps as output from this network.

    return x

In [None]:
x = partial_vgg()

## Regional Proposal Network 

In [None]:
# Regional Proposal Network

def rpn_layer(base_layers, num_anchors):
 
    #cnn_used for creating feature maps: vgg, num_anchors: 9
    x = Conv2D(512, (3, 3), padding='same', activation='relu')(base_layers)
    
    #classification layer: num_anchors (9) channels for 0, 1 sigmoid activation output
    x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid')(x)
    
    #regression layer: num_anchors*4 (36) channels for computing the regression of bboxes
    x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear')(x)

    return [x_class, x_regr, base_layers] #classification of object(0 or 1),compute bounding boxes, base layers vgg

## Taking only the classifier from the RPN Layer not the Regressor

In [None]:
# Taking only the classifier not the regressor

classifier = rpn_layer(x,9)
print(classifier)

In [None]:
# Printing the rank and shape of the classfier

class_layer = classifier[0]
print(tf.rank(class_layer).name)
print(class_layer.shape)
# print(type(class_layer))

(None, 34, 31, 9)
It means that there are 9 anchor boxes in which they are in the Foreground Class
Since 9 is mentioned in the paper
34,31 are the sizes of the number of anchor boxes which are in the foreground classes

## ROI Pooling 
ROI Pooling stands for Region of Interest Pooling which will transform the feature map of different sizes to the feature map of the same sizes

In [None]:
class RoiPoolingConv(Layer):
    '''ROI pooling layer for 2D inputs.
    See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
    K. He, X. Zhang, S. Ren, J. Sun
    # Arguments
        pool_size: int
            Size of pooling region to use. pool_size = 7 will result in a 7x7 region.
        num_rois: number of regions of interest to be used
    # Input shape
        list of two 4D tensors [X_img,X_roi] with shape:
        X_img:
        `(1, rows, cols, channels)`
        X_roi:
        `(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
    # Output shape
        3D tensor with shape:
        `(1, num_rois, channels, pool_size, pool_size)`
    '''
    def __init__(self, pool_size, num_rois, **kwargs):

        self.dim_ordering = K.image_data_format()
        self.pool_size = pool_size
        self.num_rois = num_rois

        super(RoiPoolingConv, self).__init__(**kwargs)

    def build(self, input_shape):
        self.nb_channels = input_shape[0][3]   

    def compute_output_shape(self, input_shape):
        return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels

    def call(self, x, mask=None):

        assert(len(x) == 2)

        # x[0] is image with shape (rows, cols, channels)
        img = x[0]

        # x[1] is roi with shape (num_rois,4) with ordering (x,y,w,h)
        rois = x[1]

        input_shape = K.shape(img)

        outputs = []

        for roi_idx in range(self.num_rois):

            x = np.array(rois)[roi_idx]
            y = np.array(rois)[roi_idx]
            w = np.array(rois)[roi_idx]
            h = np.array(rois)[roi_idx]
            
            x = K.cast(x, 'int32')
            y = K.cast(y, 'int32')
            w = K.cast(w, 'int32')
            h = K.cast(h, 'int32')

            # Resized roi of the image to pooling size (7x7)
            rs = tf.image.resize(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
            outputs.append(rs)
                

        final_output = K.concatenate(outputs, axis=0)

        # Reshape to (1, num_rois, pool_size, pool_size, nb_channels)
        # Might be (1, 4, 7, 7, 3)
        final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))

        # permute_dimensions is similar to transpose
        final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output
    
    
    def get_config(self):
        config = {'pool_size': self.pool_size,
                  'num_rois': self.num_rois}
        base_config = super(RoiPoolingConv, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

## Classifier Layer


In [None]:
def classifier_layer(base_layers,num_rois = 3, nb_classes = 4):
    """Create a classifier layer
    
    Args:
        base_layers: vgg
        input_rois: `(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
        num_rois: number of rois to be processed in one time (4 in here)

    Returns:
        list(out_class, out_regr)
        out_class: classifier layer output
        out_regr: regression layer output
    """

    input_shape = (num_rois,7,7,512)
    input_rois = [1,num_rois,4]

    pooling_regions = 7

    # out_roi_pool.shape = (1, num_rois, channels, pool_size, pool_size)
    # num_rois (4) 7x7 roi pooling
    out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])

    # Flatten the convlutional layer and connected to 2 FC and 2 dropout
    out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool)
    out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out)
    out = TimeDistributed(Dropout(0.5))(out)
    out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out)
    out = TimeDistributed(Dropout(0.5))(out)

    # There are two output layer
    # out_class: softmax acivation function for classify the class name of the object
    # out_regr: linear activation function for bboxes coordinates regression
    out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), 
                                name='dense_class_{}'.format(nb_classes))(out)
    # note: no regression target for bg class
    out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), 
                               name='dense_regress_{}'.format(nb_classes))(out)

    return [out_class, out_regr]

In [None]:
value_returned = classifier_layer(x)
classifier_out = value_returned[0]
print(classifier_out)
print(type(classifier_out))
        


# Final Outcome


In [None]:
# img_id = "image_part_008.jpg"
# plot_bbox(img_id)
images = list(images_boxable.loc[:,"file_name"])
for i in images:
    plot_bbox(i)