## Step-1 Region Proposal

In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

from matplotlib import patches
from PIL import Image
from tqdm.notebook import tqdm

In [2]:
# NWPU-RESISC45 Air Plane Dataset
ROOT_DIR = os.path.abspath('./')
DATA_ROOT = os.path.abspath('./data/air_planes')
ANN_DIR = os.path.join(DATA_ROOT, 'annotations/')
IMG_DIR = os.path.join(DATA_ROOT, 'images')

In [3]:
def ApplySelectiveSearch(img):
    ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
    ss.setBaseImage(img)
    ss.switchToSelectiveSearchFast()
    ssresults = ss.process()
    #print('Proposed Region :: {}'.format(len(ssresults)))
    
    # ssresults의 bbow 좌표 포맷 x, y, w, h
    # 현재 코드에서 사용되는 bbox 좌표 포맷은 xmin, ymin, xmax, ymax
    # IOU 계산 및  DrawBox 함수 모듈화를 위해 convert
    ssresults[:, 2] = ssresults[:, 0] + ssresults[:, 2]
    ssresults[:, 3] = ssresults[:, 1] + ssresults[:, 3]
        
    return ssresults

In [4]:
def DrawBox(img, bboxes, title='Empty', color='magenta', ax=None):
    if ax is None:
        fig, ax = plt.subplots(1, figsize=(10, 10))
    
    # BBox Display
    # Box 좌표 구성(xmin, ymin, xmax, ymax)
    for bbox in bboxes:
        x1, y1, x2, y2 = bbox
        p = patches.Rectangle((x1, y1), (x2-x1), (y2-y1), linewidth=2, alpha=1.0, linestyle="solid", edgecolor=color, facecolor='none')
        ax.add_patch(p)

    ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    ax.axis('off')
    ax.set_title(title)

In [5]:
def GetGroundTruthBBox(ann):
    # Air_Plane Dataset Annotation Data Pasing
    gt_bbox = np.array([], dtype=np.int32).reshape(0, 4)
    for row in ann.iterrows():
        line = row[1][0].split(" ")
        x1 = int(line[0])
        y1 = int(line[1])
        x2 = int(line[2])
        y2 = int(line[3])
        gt_bbox = np.vstack([gt_bbox, [x1, y1, x2, y2]])
    
    return gt_bbox

In [6]:
# Compute IOU between Ground Truth & Proposed Region
'''
@params
  - gt : ground truth bounding box (4,)
  - p : proposed region bounding box (n, 4)
'''
def ComputeIOU(gt, p):

    x1 = np.maximum(gt[0], p[:, 0])
    y1 = np.maximum(gt[1], p[:, 1])
    x2 = np.minimum(gt[2], p[:, 2])
    y2 = np.minimum(gt[3], p[:, 3])

    intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
    gt_area = (gt[2] - gt[0]) * (gt[3] - gt[1])
    propoesed_area = (p[:, 2] - p[:, 0]) * (p[:, 3] - p[:, 1])
    union = gt_area + propoesed_area[:] - intersection[:]

    iou = intersection/union
    
    return iou

In [7]:
# Compute Delta value between Ground Truth & Proposed Region
'''
@params
  - gt : ground truth bounding box (4,)
  - p : proposed region bounding box (n, 4)
'''
def ComputeTargetDelta(gt, p):
    d = np.zeros_like(p, dtype=np.float32)
    gt = gt.astype(np.float32)
    p = p.astype(np.float32)

    d[:, 0] = np.divide(np.subtract(gt[0], p[:,0]), p[:,2])
    d[:, 1] = np.divide(np.subtract(gt[1], p[:,1]), p[:,3])
    d[:, 2] = np.log(np.divide(gt[2], p[:,2]))
    d[:, 3] = np.log(np.divide(gt[2], p[:,3]))
    
    return d

In [8]:
def WarppingImage(img, regions, delta):
    cls_trn_img = np.array([], dtype=np.uint8).reshape(0, 224, 224, 3)
    cls_trn_lb = np.array([], dtype=np.int32).reshape(0, 1)
    reg_trn_img = np.array([], dtype=np.uint8).reshape(0, 224, 224, 3)
    reg_trn_delta = np.array([], dtype=np.float32).reshape(0, 4)
    cnt = 0
    
    for region in regions:
        label, x1, y1, x2, y2 = region
        
        # 원본영상에서 region 영역 crop
        timg = img[y1:y2, x1:x2]
        # 224x224 크기로 wrapping
        rimg = cv2.resize(timg, (224, 224), interpolation = cv2.INTER_AREA)

        # img file 그룹화 하기 위해서 dim 추가
        rimg = np.expand_dims(rimg, axis=0)
        
        cls_trn_img = np.vstack([cls_trn_img, rimg])
        cls_trn_lb = np.vstack([cls_trn_lb, label])
        
        if label == 1:
            reg_trn_img = np.vstack([reg_trn_img, rimg])
            reg_trn_delta = np.vstack([reg_trn_delta, delta[cnt]])
            cnt += 1
    return cls_trn_img, cls_trn_lb, reg_trn_img, reg_trn_delta
    

In [9]:
def AdjustRegion(gt_bboxes, roi):
    POS_LB = 1
    NEG_LB = 0
    MAX_COUNT = 30
    regions = np.array([], dtype=np.int32).reshape(0, 5)
    delta = np.array([], dtype=np.float32).reshape(0, 4)
    
    for bbox in gt_bboxes:
        iou_results = ComputeIOU(bbox, roi)
        # selective search로 얻은 region은 최대 2000개 까지만 사용
        iou_results = iou_results[:2000]

        # i'th GT BBox와 IOU가 0.5 이상인 경우 Positive(최대 영역 갯수 30개)
        if len(regions[np.where(regions[:,0] == POS_LB)]) < MAX_COUNT:
            temp_pos = roi[np.where(iou_results >= 0.5)]
            if len(temp_pos) > MAX_COUNT:
                temp_pos = temp_pos[:MAX_COUNT]
                
            # Positive 영역과 GT간의 Delta값 계산
            delta = np.vstack([delta, ComputeTargetDelta(bbox, temp_pos)])
            
            # 0번째 index에 positive label insert
            pos = np.insert(temp_pos, 0, POS_LB, axis=1)
            regions = np.vstack([regions, pos])

        # i'th GT BBox와 IOU가 0.5 이상인 경우 Negative(최대 영역 갯수 30개)
        if len(regions[np.where(regions[:,0] == NEG_LB)]) < MAX_COUNT:
            temp_neg = roi[np.where(iou_results <= 0.3)]
            if len(temp_neg) > MAX_COUNT:
                temp_neg = temp_neg[:MAX_COUNT]
            
            # 0번째 index에 negative label insert
            temp_neg = np.insert(temp_neg, 0, NEG_LB, axis=1)
            regions = np.vstack([regions, temp_neg])
          
    return regions, delta
            

In [None]:
img_path = os.path.join(DATA_ROOT, IMG_DIR)
ann_path = os.path.join(DATA_ROOT, ANN_DIR)
img_files = sorted(os.listdir(img_path))

cls_trn_img = np.array([], dtype=np.uint8).reshape(0, 224, 224, 3)
cls_trn_lb = np.array([], dtype=np.int32).reshape(0, 1)
reg_trn_img = np.array([], dtype=np.uint8).reshape(0, 224, 224, 3)
reg_trn_delta = np.array([], dtype=np.float32).reshape(0, 4)

is_visible_sample = False

# Annotation File Read
for img_file in tqdm(img_files):
    print(img_file)
    # 1. image file load
    ann_file = '{}.csv'.format(os.path.splitext(img_file)[0])
    img = cv2.imread(os.path.join(img_path, img_file))

    #2. To Obtain positive and negative region
    ## 2-1. Apply Selective Search and obtain ROI
    roi = ApplySelectiveSearch(img)

    ## 2-2. Get Ground Truth Bounding Box Info
    ann = pd.read_csv(os.path.join(ann_path, ann_file))
    gt_bboxes = GetGroundTruthBBox(ann)

    ## 2-3. Generate Train Region to Compute IOU Between GT BBox and ROI
    regions, delta = AdjustRegion(gt_bboxes, roi)

    ## 2-3. Warpping Image
    _cls_trn_img, _cls_trn_lb, _reg_trn_img, _reg_trn_delta = WarppingImage(img, regions, delta)
    cls_trn_img = np.vstack([cls_trn_img, _cls_trn_img])
    cls_trn_lb = np.vstack([cls_trn_lb, _cls_trn_lb])
    reg_trn_img = np.vstack([reg_trn_img, _reg_trn_img])
    reg_trn_delta = np.vstack([reg_trn_delta, _reg_trn_delta])
    
#     if i == 0 and is_visible_sample:
#         _, ax = plt.subplots(2, 2, figsize=(20, 20))
#         DrawBox(img, gt_bboxes, title='GT', ax=ax[0][0])
#         DrawBox(img, roi, title='ROI', color='red', ax=ax[0][1])
#         DrawBox(img, pos, title='pos', color='blue', ax=ax[1][0])
#         DrawBox(img, neg, title='neg', color='cyan', ax=ax[1][1])

#         plt.tight_layout()

# Save Train DATA
np.savez_compressed('train_data.npz', cls_trn_img=cls_trn_img, cls_trn_lb=cls_trn_lb, reg_trn_img=reg_trn_img, reg_trn_delta=reg_trn_delta)


HBox(children=(IntProgress(value=0, max=732), HTML(value='')))

42845.jpg
428451.jpg
428452.jpg
428461.jpg
428462.jpg
42847.jpg
428472.jpg
42848.jpg
428481.jpg
428482.jpg
428483.jpg
42849.jpg
428491.jpg
428492.jpg
42850.jpg
428501.jpg
428503.jpg
Planes1.jpg
Planes10.jpg
Planes11.jpg
Planes12.jpg
Planes3.jpg
Planes4.jpg
Planes6.jpg
Planes7.jpg
Planes8.jpg
Planes9.jpg
airplane_001.jpg
airplane_002.jpg
airplane_003.jpg
airplane_004.jpg
airplane_005.jpg
airplane_006.jpg
airplane_007.jpg
airplane_008.jpg
airplane_009.jpg
airplane_010.jpg
airplane_011.jpg
airplane_012.jpg
airplane_013.jpg
airplane_014.jpg
airplane_015.jpg
airplane_016.jpg
airplane_017.jpg
airplane_018.jpg
airplane_019.jpg
airplane_020.jpg
airplane_021.jpg
airplane_022.jpg
airplane_023.jpg
airplane_024.jpg
airplane_025.jpg
airplane_026.jpg
airplane_027.jpg
airplane_028.jpg
airplane_029.jpg
airplane_030.jpg
airplane_031.jpg
airplane_032.jpg
airplane_033.jpg
airplane_034.jpg
airplane_035.jpg
airplane_036.jpg
airplane_037.jpg
airplane_038.jpg
airplane_039.jpg
airplane_040.jpg
airplane_041.jp

airplane_465.jpg
airplane_466.jpg
airplane_467.jpg
airplane_468.jpg
airplane_469.jpg
airplane_470.jpg
airplane_471.jpg
airplane_472.jpg
airplane_473.jpg
airplane_474.jpg
airplane_475.jpg
airplane_476.jpg
airplane_477.jpg
airplane_478.jpg
airplane_479.jpg
airplane_480.jpg
airplane_481.jpg
airplane_482.jpg
airplane_483.jpg
airplane_484.jpg
airplane_485.jpg
airplane_486.jpg
airplane_487.jpg
airplane_488.jpg
airplane_489.jpg
airplane_490.jpg
airplane_491.jpg
airplane_492.jpg
airplane_493.jpg
airplane_494.jpg
airplane_495.jpg
airplane_496.jpg
airplane_497.jpg
airplane_498.jpg
airplane_499.jpg
airplane_500.jpg
airplane_501.jpg
airplane_502.jpg
airplane_503.jpg
airplane_504.jpg
airplane_505.jpg
airplane_506.jpg
airplane_507.jpg
airplane_508.jpg
airplane_509.jpg
airplane_510.jpg
airplane_511.jpg
airplane_512.jpg
airplane_513.jpg
airplane_514.jpg
airplane_515.jpg
airplane_516.jpg
airplane_517.jpg
airplane_518.jpg
airplane_519.jpg
airplane_520.jpg
airplane_521.jpg
airplane_522.jpg
airplane_523.j

# Stpe-2 Training

In [32]:
%reset
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [40]:
# Load Train Data(with regions)
# train_data = np.load('train_data.npz')

val_split = 0.2

cls_trn_img = train_data['cls_trn_img']
cls_trn_lb = to_categorical(train_data['cls_trn_lb'], 2)

total = cls_trn_img.shape[0]
num_val = int(total * val_split)
num_train = total - num_val

print('num of validation ::',num_val)
print('num of train ::', num_train)

# shuffle all data
indexes = np.arange(total)
np.random.shuffle(indexes)
train_x = cls_trn_img[num_val:]
train_y = cls_trn_lb[num_val:]
val_x = cls_trn_img[:num_val]
val_y = cls_trn_lb[:num_val]

print('train shape::', train_x.shape, train_y.shape)
print('val shape::', val_x.shape, val_y.shape)

# Data Augmentation
# train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
# train_datagen.fit(cls_trn_img)

# # train-data
# train_generator = train_datagen.flow(cls_trn_img, cls_trn_lb, batch_size=32, shuffle=True, subset='training')
# print(train_generator)
# # val-data
# validation_generator = train_datagen.flow(cls_trn_img, cls_trn_lb, batch_size=32, shuffle=True, subset='validation')


num of validation :: 8193
num of train :: 32772
train shape:: (32772, 224, 224, 3) (32772, 2)
val shape:: (8193, 224, 224, 3) (8193, 2)


In [None]:
import keras
from keras.layers import Dense
from keras import Model
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam

from keras.applications.vgg16 import VGG16
vggmodel = VGG16(weights='imagenet', include_top=True)

for layers in (vggmodel.layers)[:15]:
    layers.trainable = False
    
X = vggmodel.layers[-2].output
predictions = Dense(2, activation="softmax")(X)

model_final = Model(input = vggmodel.input, output = predictions)
opt = Adam(lr=0.0001)
model_final.compile(loss = keras.losses.categorical_crossentropy, optimizer = opt, metrics=["accuracy"])