# RCNN - Regions with CNN
In R-CNN instead of running classification on huge number of regions we pass the image through selective search and select first 2000 region proposal from the result and run classification on that. In this way instead of classifying huge number of regions we need to just classify first 2000 regions. This makes this algorithm fast compared to previous techniques of object detection.

Steps :
1. Pass the image through selective search and generate region proposal.
2. Calculate IOU (intersection over union) on proposed region with ground truth data and add label to the proposed regions.
3. Do transfer learning using the proposed regions with the labels.
4. Pass the test image to selective search and then pass the first 2000 proposed regions from the trained model and predict the class of those regions.

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

tf.debugging.set_log_device_placement(True)

# Place tensors on the CPU
with tf.device('/CPU:0'):
  a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
  b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])

# Run on the GPU
c = tf.matmul(a, b)
print(c)

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#!unzip "drive/My Drive/airplanes.zip"

In [None]:
import os,cv2,keras
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
from keras.layers import Dense
from keras import Model
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.applications.vgg16 import VGG16

vggmodel = VGG16(weights='imagenet', include_top=True)

In [None]:
image_path = "airplanes/images/"
annot_path = "airplanes/annotations/"
epochs = 10
batch_size = 32

In [None]:
# Initializing selective search 
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()

Intersection over Union is an evaluation metric used to measure the accuracy of an object detector on a particular dataset.

In [None]:
def get_iou(bb1, bb2):
    assert bb1['x1'] < bb1['x2']
    assert bb1['y1'] < bb1['y2']
    assert bb2['x1'] < bb2['x2']
    assert bb2['y1'] < bb2['y2']
    x_left = max(bb1['x1'], bb2['x1'])
    y_top = max(bb1['y1'], bb2['y1'])
    x_right = min(bb1['x2'], bb2['x2'])
    y_bottom = min(bb1['y2'], bb2['y2'])
    if x_right < x_left or y_bottom < y_top:
        return 0.0
    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    bb1_area = (bb1['x2'] - bb1['x1']) * (bb1['y2'] - bb1['y1'])
    bb2_area = (bb2['x2'] - bb2['x1']) * (bb2['y2'] - bb2['y1'])
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

1. set each image one by one as the base for selective search using code ss.setBaseImage(image).
2. Initialising fast selective search and getting proposed regions using using code ss.switchToSelectiveSearchFast() and ssresults = ss.process().
3. Iterating over all the first 2000 results passed by selective search and calculating IOU of the proposed region and annotated region using the user defined get_iou() function.
4. To have good proportion between positives (airplane) = 1 and negatives (background) = 0, we will be considering 30 samples from each

In [None]:
train_images=[]
train_labels=[]
for e,i in enumerate(os.listdir(image_path)):
    try:
        if i.startswith("airplane"):
            image_file = i.split(".")[0]+".jpg"
            annot_file = i.split(".")[0]+".csv"
            #print(e,image_file)
            image = cv2.imread(os.path.join(image_path,image_file))
            df = pd.read_csv(os.path.join(annot_path,annot_file))
            gtvalues=[]
            for row in df.iterrows():
                x1 = int(row[1][0].split(" ")[0])
                y1 = int(row[1][0].split(" ")[1])
                x2 = int(row[1][0].split(" ")[2])
                y2 = int(row[1][0].split(" ")[3])
                gtvalues.append({"x1":x1,"x2":x2,"y1":y1,"y2":y2})
            ss.setBaseImage(image)
            ss.switchToSelectiveSearchFast()
            ssresults = ss.process()
            imout = image.copy()
            counter = 0
            falsecounter = 0
            flag = 0
            fflag = 0
            bflag = 0
            for e,result in enumerate(ssresults):
                if e < 2000 and flag == 0:
                    for gtval in gtvalues:
                        x,y,w,h = result
                        iou = get_iou(gtval,{"x1":x,"x2":x+w,"y1":y,"y2":y+h})
                        if counter < 30:
                            if iou > 0.70:
                                timage = imout[y:y+h,x:x+w]
                                resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA)
                                train_images.append(resized)
                                train_labels.append(1)
                                counter += 1
                        else :
                            fflag =1
                        if falsecounter <30:
                            if iou < 0.3:
                                timage = imout[y:y+h,x:x+w]
                                resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA)
                                train_images.append(resized)
                                train_labels.append(0)
                                falsecounter += 1
                        else :
                            bflag = 1
                    if fflag == 1 and bflag == 1:
                        #print("inside")
                        flag = 1
    except Exception as e:
        #print(e)
        #print("error in "+filename)
        continue


In [None]:
X_new = np.array(train_images)
y_new = np.array(train_labels)

In [None]:
for layers in (vggmodel.layers)[:15]:
    layers.trainable = False
    
X = vggmodel.layers[-2].output
predictions = Dense(2, activation="softmax")(X)
model_final = Model(inputs = vggmodel.input, outputs = predictions)
opt = Adam(lr=0.0001)
model_final.compile(loss = keras.losses.categorical_crossentropy, optimizer = opt, metrics=["accuracy"])
model_final.summary()

Doing one hot encoding for the data set, using the LabelBinarizer() and using it in a user defined function.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
class MyLabelBinarizer(LabelBinarizer):
    def transform(self, y):
        Y = super().transform(y)
        if self.y_type_ == 'binary':
            return np.hstack((Y, 1-Y))
        else:
            return Y
    def inverse_transform(self, Y, threshold=None):
        if self.y_type_ == 'binary':
            return super().inverse_transform(Y[:, 0], threshold)
        else:
            return super().inverse_transform(Y, threshold)
lenc = MyLabelBinarizer()
Y =  lenc.fit_transform(y_new)
X_train, X_test , y_train, y_test = train_test_split(X_new,Y,test_size=0.10)

Defining the data generators and doing data augmentation.

In [None]:
traingen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=90)
traindata = traingen.flow(x=X_train, y=y_train)
testgen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=90)
testdata = testgen.flow(x=X_test, y=y_test)

Fitting the model

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpoint = ModelCheckpoint("rcnn_vgg16.h5", monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_loss', min_delta=0, patience=100, verbose=1, mode='auto')
hist = model_final.fit(traindata, steps_per_epoch=len(X_train)/batch_size, epochs=epochs, validation_data=testdata, validation_steps=len(X_test)/batch_size, callbacks=[checkpoint,early])

Evaluating the model by seeing how it does on the test data

In [None]:
_, train_acc = model_final.evaluate(X_train, y_train, verbose=0)
_, test_acc = model_final.evaluate(X_test, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

In [None]:
z=0
for e,i in enumerate(os.listdir(image_path)):
    if i.startswith("4"):
        z += 1
        img = cv2.imread(os.path.join(image_path,i))
        ss.setBaseImage(img)
        ss.switchToSelectiveSearchFast()
        ssresults = ss.process()
        imout = img.copy()
        for e,result in enumerate(ssresults):
            if e < 2000:
                x,y,w,h = result
                timage = imout[y:y+h,x:x+w]
                resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA)
                img = np.expand_dims(resized, axis=0)
                out= model_final.predict(img)
                if out[0][0] > 0.70:
                    cv2.rectangle(imout, (x, y), (x+w, y+h), (0, 255, 0), 1, cv2.LINE_AA)
        plt.figure(figsize=(10, 10))
        plt.imshow(imout)
        break