In [1]:
from __future__ import division
import os
import sys
import cv2
import time
import json
import pydot
import random
import pickle
import numpy as np
from tqdm import tqdm
from PIL import Image
import tensorflow as tf
from pprint import pprint
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.utils import Progbar
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam, SGD, RMSprop

from utils import vgg as nn
from utils import roi_helpers
from utils import losses as losses_fn
from utils.simple_parser import get_data
from utils import config, data_generators

In [2]:
seed = 64

tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [3]:
ConfigProto = tf.compat.v1.ConfigProto()
ConfigProto.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=ConfigProto)
# K.set_session(sess)

# Config Setting

In [4]:
cfg = config.Config()

cfg.epoch_length = 5 # iteration 개수 (train: 1409, val: 456)
cfg.num_epochs = 2 # 총 Epochs
# cfg.rot_90 = True # default
# cfg.use_random_brightness = True # add
# cfg.use_vertical_flips = True # default
# cfg.use_horizontal_flips = True # default
cfg.num_rois = 128 # default
cfg.std_scaling = 4 # default
# cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

# TODO: the only file should to be change for other data to train
cfg.model_path = './model/fusion_improved_last.hdf5'
cfg.visual_model_path = './model/fusion_improved.png'
cfg.train_label_file = 'data/train_labels.txt'
cfg.val_label_file = 'data/val_labels.txt'
cfg.class_mapping = {
    'small-vehicle': 0,
    'large-vehicle': 1,
    'harbor': 2,
    'ship': 3,
    'ground-track-field': 4,
    'soccer-ball-field': 5,
    'baseball-diamond': 6,
    'swimming-pool': 7,
    'roundabout': 8,
    'tennis-court': 9,
    'basketball-court': 10,
    'plane': 11,
    'helicopter': 12,
    'bridge': 13,
    'storage-tank': 14,
    'bg': 15
}
cfg.len_class = len(cfg.class_mapping)

with open(cfg.config_save_file, 'wb') as config_f:
    pickle.dump(cfg, config_f)
    print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(cfg.config_save_file))

Config has been written to config.pickle, and can be loaded when testing to ensure correct results


# Data Load
- train: 98990it (03:09)
- valid: 28853it (01:02)

In [5]:
# train_images, train_classes_count = get_data(cfg.train_label_file, cfg.class_mapping)
# validation_images, validation_classes_count = get_data(cfg.val_label_file, cfg.class_mapping)

# with open('data/train_images.json', 'w', encoding='utf-8') as file:
#     json.dump(train_images, file, indent='\t')
    
# with open('data/train_classes_count.json', 'w', encoding='utf-8') as file:
#     json.dump(train_classes_count, file, indent='\t')
    
# with open('data/validation_images.json', 'w', encoding='utf-8') as file:
#     json.dump(validation_images, file, indent='\t')
    
# with open('data/validation_classes_count.json', 'w', encoding='utf-8') as file:
#     json.dump(validation_classes_count, file, indent='\t')

In [6]:
with open('data/train_images.json', 'r', encoding='utf-8') as file:
    train_images = json.load(file)
    
with open('data/train_classes_count.json', 'r', encoding='utf-8') as file:
    train_classes_count = json.load(file)
    
with open('data/validation_images.json', 'r', encoding='utf-8') as file:
    validation_images = json.load(file)
    
with open('data/validation_classes_count.json', 'r', encoding='utf-8') as file:
    validation_classes_count = json.load(file)

In [7]:
print('Num classes (including bg) = {}'.format(cfg.len_class))

print('==Training Num samples {} , images per class:'.format(len(train_images)))
pprint(train_classes_count)

print('==Validation Num samples {} , images per class:'.format(len(validation_images)))
pprint(validation_classes_count)

Num classes (including bg) = 16
==Training Num samples 1409 , images per class:
{'baseball-diamond': 415,
 'basketball-court': 515,
 'bg': 0,
 'bridge': 2047,
 'ground-track-field': 325,
 'harbor': 5983,
 'helicopter': 630,
 'large-vehicle': 16969,
 'plane': 8055,
 'roundabout': 399,
 'ship': 28068,
 'small-vehicle': 26126,
 'soccer-ball-field': 326,
 'storage-tank': 5029,
 'swimming-pool': 1736,
 'tennis-court': 2367}
==Validation Num samples 456 , images per class:
{'baseball-diamond': 214,
 'basketball-court': 132,
 'bg': 0,
 'bridge': 464,
 'ground-track-field': 144,
 'harbor': 2090,
 'helicopter': 73,
 'large-vehicle': 4387,
 'plane': 2531,
 'roundabout': 179,
 'ship': 8960,
 'small-vehicle': 5438,
 'soccer-ball-field': 153,
 'storage-tank': 2888,
 'swimming-pool': 440,
 'tennis-court': 760}


# Data Generator

In [8]:
# def trainGenerator():
#     for x, y in zip(range(10), range(10,20)):
#         yield (x, y)

# train_dataset  = tf.data.Dataset.from_generator(
#     trainGenerator, \
#     (tf.int32, tf.float32), )

In [9]:
with tf.device('/cpu:0'):
    data_gen_train = data_generators.get_anchor_gt(train_images, train_classes_count, cfg, nn.get_img_output_length, mode='train')
    data_gen_val = data_generators.get_anchor_gt(validation_images, validation_classes_count, cfg, nn.get_img_output_length, mode='val')

In [10]:
# X, (Ycls, Yregr), aug = next(data_gen_train)
# print(X.shape, Ycls.shape, Yregr.shape)
# print(Ycls.sum(), Yregr.sum())

# Model

In [11]:
with tf.device('/cpu:0'):
    input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img, name='img_input')
    roi_input = Input(shape=(None, 4), name='roi_input') # 4 == 점의 개수 * 2

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios) # 3 * 3 == 9
    rpn = nn.rpn(shared_layers, num_anchors) # rpn_out_class 2k이어야 하는데 k 반환
    
    # cfg.num_rois : 32
    classifier = nn.classifier(shared_layers, roi_input, cfg.num_rois, nb_classes=cfg.len_class, trainable=True)

    model_rpn = Model(img_input, rpn[:2], name='model_rpn')
    model_classifier = Model([img_input, roi_input], classifier, name='model_classifier')

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier, name='model_all')

    model_rpn.compile(optimizer=Adam(lr=1e-5), 
                      loss=[losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors)])
    
    model_classifier.compile(optimizer=Adam(lr=1e-5),
                             loss=[losses_fn.class_loss_cls, losses_fn.class_loss_regr(cfg.len_class - 1)],
                             metrics={'dense_class_{}'.format(cfg.len_class): 'accuracy'})
    
    model_all.compile(optimizer='sgd', loss='mae')

In [12]:
# plot_model(model_all, to_file=cfg.visual_model_path, show_shapes=True, \
#            show_layer_names=True, expand_nested=False, dpi=200)

# display(Image.open(cfg.visual_model_path))
# model_all.summary(line_length=120)

# Train

In [13]:
epoch_length = 5 # len(train_images) # int(cfg.epoch_length)
num_epochs = int(cfg.num_epochs)
iter_num = 0

losses = np.zeros((epoch_length, 5))
best_loss = np.Inf

rpn_accuracy_for_epoch = []

class_mapping_inv = {v: k for k, v in cfg.class_mapping.items()}

In [14]:
with tf.device('/cpu:0'):
    start_time = time.time()

    for epoch_num in range(num_epochs):
        progbar = Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
        
        for iter_num in range(epoch_length):
            
            X, Y, img_data, cache = next(data_gen_train)
            print("success data_gen_train", Y[0].shape, Y[1].shape)
            

            loss_rpn = model_rpn.train_on_batch(X, Y)
            print("loss")
            print(loss_rpn[0], loss_rpn[1])

            P_rpn = model_rpn.predict_on_batch(X) # (rpn cls, rpn regr)

            result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, 'tf', use_regr=True,
                                            overlap_thresh=0.7,
                                            max_boxes=256) # origin code is 300 but paper is set by 256 (128 - positive, 128 - negative)
            

            # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
            X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, cfg.class_mapping)

            if X2 is None:
                print("X2 is None", X2)
                rpn_accuracy_for_epoch.append(0)
                continue
            else:
                print("X2 is not None: ", X2.shape)

            neg_samples = np.where(Y1[0, :, -1] == 1)
            pos_samples = np.where(Y1[0, :, -1] == 0)
            rpn_accuracy_for_epoch.append((len(pos_samples)))
            
            if len(neg_samples) > 0:
                neg_samples = neg_samples[0]
            else:
                neg_samples = []

            if len(pos_samples) > 0:
                pos_samples = pos_samples[0]
            else:
                pos_samples = []
            
            if len(pos_samples) < cfg.num_rois // 2:
                selected_pos_samples = pos_samples.tolist()
            else:
                selected_pos_samples = np.random.choice(pos_samples, cfg.num_rois // 2, replace=False).tolist()

            if len(neg_samples) + len(selected_pos_samples) > cfg.num_rois:
                selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples), replace=False).tolist()
            else:
                selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples), replace=True).tolist()

            selected_samples = selected_pos_samples + selected_neg_samples

            y_sample = model_classifier.predict([X, X2[:, selected_samples, :]])
            loss_class = model_classifier.train_on_batch([X, X2[:, selected_samples, :]], [Y1[:, selected_samples, :], Y2[:, selected_samples, :]])
            
            ####
            
            print("img input:", X.shape)
            print("roi input:", X2[:, selected_samples, :].shape, len(selected_samples))
            print(X.shape, X2.shape, Y1.shape, Y2.shape, X.dtype, X2.dtype, Y1.dtype, Y2.dtype)

            losses[iter_num, 0] = loss_rpn[1]
            losses[iter_num, 1] = loss_rpn[2]

            losses[iter_num, 2] = loss_class[1]
            losses[iter_num, 3] = loss_class[2]
            losses[iter_num, 4] = loss_class[3]

            progbar.update(iter_num,
                           [('rpn_cls', np.mean(losses[:iter_num, 0])), \
                            ('rpn_regr', np.mean(losses[:iter_num, 1])), \
                            ('detector_cls', np.mean(losses[:iter_num, 2])), \
                            ('detector_regr', np.mean(losses[:iter_num, 3]))])

        loss_rpn_cls = np.mean(losses[:, 0])
        loss_rpn_regr = np.mean(losses[:, 1])
        loss_class_cls = np.mean(losses[:, 2])
        loss_class_regr = np.mean(losses[:, 3])
        class_acc = np.mean(losses[:, 4])

        mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)

        if cfg.verbose:
            print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes))
            if mean_overlapping_bboxes == 0:
                print('RPN is not producing bounding boxes that overlap the ground truth boxes.')
                print('Check RPN settings or keep training.')
                
            print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
            print('Loss RPN classifier: {}'.format(loss_rpn_cls))
            print('Loss RPN regression: {}'.format(loss_rpn_regr))
            print('Loss Detector classifier: {}'.format(loss_class_cls))
            print('Loss Detector regression: {}'.format(loss_class_regr))
            print('Elapsed time: {}'.format(time.time() - start_time))

        curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr

        if curr_loss < best_loss:
            if cfg.verbose:
                print('Total loss decreasetd from {} to {}, saving weights'.format(best_loss, curr_loss))
            best_loss = curr_loss
            model_all.save_weights(cfg.model_path)

        rpn_accuracy_for_epoch = []
        start_time = time.time()

Epoch 1/2
P2433.png (1956, 1930, 3)
P2433.png (1968, 1936, 3)
success data_gen_train (1, 246, 242, 18) (1, 246, 242, 72)
y_rpn_overlap: Tensor("rpn_loss_regr_fixed_num/strided_slice_2:0", shape=(535788,), dtype=float32)
L_IIOU: Tensor("rpn_loss_regr_fixed_num/sub_12:0", shape=(535788,), dtype=float32)
mean: Tensor("rpn_loss_regr_fixed_num/Mean:0", shape=(), dtype=float32)
y_rpn_overlap: Tensor("rpn_loss_regr_fixed_num/strided_slice_2:0", shape=(535788,), dtype=float32)
L_IIOU: Tensor("rpn_loss_regr_fixed_num/sub_12:0", shape=(535788,), dtype=float32)
mean: Tensor("rpn_loss_regr_fixed_num/Mean:0", shape=(), dtype=float32)
loss
-136739952.0 7.6683526039123535
X2 is None None
P2379.png (2251, 2085, 3)
P2379.png (2256, 2096, 3)
success data_gen_train (1, 282, 262, 18) (1, 282, 262, 72)
y_rpn_overlap: Tensor("rpn_loss_regr_fixed_num/strided_slice_2:0", shape=(664956,), dtype=float32)
L_IIOU: Tensor("rpn_loss_regr_fixed_num/sub_12:0", shape=(664956,), dtype=float32)
mean: Tensor("rpn_loss_re

AssertionError: in user code:

    c:\users\ipsl\.conda\envs\dota\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function  *
        return step_function(self, iterator)
    C:\Users\ipsl\Desktop\Faster-RCNN-for-Dota\utils\losses.py:79 class_loss_regr_fixed_num  *
        assert(y_true.shape == y_pred.shape)

    AssertionError: 


In [None]:
Y1[:, selected_samples, :].shape, Y2[:, selected_samples, :].shape

In [None]:
Epoch 1/2  
P2379.png (2251, 2085, 3)
P2379.png (2272, 2112, 3)
success data_gen_train (1, 284, 264, 18) (1, 284, 264, 72)
loss_cls (1, 284, 264, 18)
loss_regr (1, 284, 264, 72)
loss_cls (1, 284, 264, 18)
loss_regr (1, 284, 264, 72)
rpn train and predict (1, 284, 264, 9) (1, 284, 264, 36)
img input: (1, 2272, 2112, 3)

In [None]:
P1930.png (2462, 1300, 3)
rpn ground truth 419.92185068130493
calc_rpn Error
#calc_rpn Exception : can't multiply sequence by non-int of type 'float'

# Epoch 1/2
5/5 [==============================] - 3270s 654s/step - rpn_cls: 6.2361 - rpn_regr: 0.7347 - detector_cls: 2.7627 - detector_regr: 0.2693
Mean number of bounding boxes from RPN overlapping ground truth boxes: 1.9166666666666667
- Classifier accuracy for bounding boxes from RPN: 0.7
- Loss RPN classifier: 5.417903423309326
- Loss RPN regression: 0.764133733510971
- Loss Detector classifier: 2.743869400024414
- Loss Detector regression: 0.1439718186855316
- Elapsed time: 3269.942060947418
- Total loss decreased from inf to 9.069878375530244, saving weights


# Epoch 2/2
5/5 [==============================] - 1960s 392s/step - rpn_cls: 5.6016 - rpn_regr: 0.7821 - detector_cls: 2.6116 - detector_regr: 0.3472
Mean number of bounding boxes from RPN overlapping ground truth boxes: 1.1666666666666667

- Classifier accuracy for bounding boxes from RPN: 0.9125
- Loss RPN classifier: 4.953123617172241
- Loss RPN regression: 0.8024014845490456
- Loss Detector classifier: 2.5067588329315185
- Loss Detector regression: 0.22378002405166625
- Elapsed time: 1962.5114130973816
- Total loss decreased from 9.069878375530244 to 8.486063958704472, saving weights

In [None]:
X, Y, aug, cache = next(data_gen_train)
y_rpn_cls, y_rpn_regr = Y
y_rpn_overlap, y_rpn_regr = cache

In [None]:
base = os.path.join('..' , 'data', 'train', 'images')
colors = []
grays = []

for file in tqdm(os.listdir(base)):
    img = tf.io.read_file(os.path.join(base, file))
    img = tf.image.decode_image(img)
    
    if img.shape[-1] == 3:
        colors.append(img.shape)
    elif img.shape[-1] == 1:
        grays.append(img.shape)
    else:
        print("ERROR : {}".format(image.shape))
    
#     print("{} \t {}".format(file, img.shape))
    
print("colors : {} , grays: {}".format(len(colors), len(grays)))

In [None]:
base = os.path.join('..' , 'data', 'train', 'images')
img = tf.io.read_file(os.path.join(base, 'P1200.png'))
img = tf.image.decode_image(img)
print(img.shape)

img = tf.image.grayscale_to_rgb(img)
print(img.shape)

# cv2.imwrite('img.jpg', img.numpy())

In [None]:
img = tf.pad(img, ((0, 0), (0, 16), (0, 0)), "CONSTANT", constant_values=0)
img = tf.expand_dims(img, axis=0)
img = tf.cast(img, dtype=tf.float32)

print(img.shape)

In [None]:
rpn_cls, rpn_regr = model_rpn.predict(img)
print(rpn_cls.shape, rpn_regr.shape)

In [None]:
model_classifier

In [None]:
rpn_cls_args = np.argwhere(rpn_cls >= 0.6)
print(rpn_cls_args)

In [None]:
rpn_regr

In [None]:
for args in rpn_cls_args:
    print(rpn_regr[0, args[1], args[2], args[3]*4: args[3]*4 + 4])
    
    break

In [None]:
dir(np)

In [None]:
# rpn_regr, rpn_cls, class_cls = model_all.predict(img)

In [None]:
img = tf.io.read_file(os.path.join('..' , 'data', 'train', 'images', 'P1311.png'))
img = tf.image.decode_image(img)

if len(img.shape) == 2 or img.shape[-1] == 1:
    img = tf.image.grayscale_to_rgb(img)

if img.shape[0] % 2 or img.shape[1] % 2:
    pad_w, pad_h = 16 - img.shape[0] % 16, 16 - img.shape[1] % 16

    if pad_h == 16:
        pad_h = 0
    if pad_w == 16:
        pad_w = 0

    paddings = tf.constant([[0, pad_h], [0, pad_w]])

    img = tf.pad(img, paddings, "CONSTANT", constant_values=0)

---