In [1]:
from __future__ import division
import os
import sys
import cv2
import time
import json
import pydot
import random
import pickle
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
from PIL import Image
import tensorflow as tf
from pprint import pprint
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.models import Model
from tensorflow.keras.utils import Progbar
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam, SGD, RMSprop

from utils import vgg as nn
from utils import roi_helpers
from utils import losses as losses_fn
from utils.simple_parser import get_data
from utils import config, data_generators

In [2]:
seed = 64

tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [3]:
ConfigProto = tf.compat.v1.ConfigProto()
ConfigProto.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=ConfigProto)
# K.set_session(sess)

# Config Setting

In [4]:
cfg = config.Config()

# iteration 개수 (train: 1409, val: 456)
cfg.num_epochs = 4 # 총 Epochs
# cfg.rot_90 = True # default
# cfg.use_random_brightness = True # add
# cfg.use_vertical_flips = True # default
# cfg.use_horizontal_flips = True # default
cfg.num_rois = 128 # default
cfg.std_scaling = 4 # default

# TODO: the only file should to be change for other data to train
cfg.model_path = './models/fusion_improved_last.hdf5'
cfg.visual_model_path = './models/fusion_improved.png'
cfg.train_label_file = './datav2/train_labels.txt' # txt 파일 경로 설정
cfg.val_label_file = './datav2/val_labels.txt' # txt 파일 경로 설정
cfg.class_mapping = {
    'plane': 0, 
    'ship': 1, 
    'storage-tank': 2, 
    'baseball-diamond': 3, 
    'tennis-court': 4, 
    'basketball-court': 5, 
    'ground-track-field': 6, 
    'harbor': 7, 
    'bridge': 8, 
    'large-vehicle': 9, 
    'small-vehicle': 10, 
    'helicopter': 11, 
    'roundabout': 12, 
    'soccer-ball-field': 13, 
    'swimming-pool': 14, 
    'container-crane': 15, 
    'airport': 16, 
    'helipad': 17,
    'bg': 18,
}
cfg.len_class = len(cfg.class_mapping)

with open(cfg.config_save_file, 'wb') as config_f:
    pickle.dump(cfg, config_f)
    print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(cfg.config_save_file))

Config has been written to config.pickle, and can be loaded when testing to ensure correct results


# Data Load
- train: 98990it (03:09)
- valid: 28853it (01:02)

In [5]:
train_images, train_classes_count = get_data(cfg.train_label_file, cfg.class_mapping, sep=',')
validation_images, validation_classes_count = get_data(cfg.val_label_file, cfg.class_mapping, sep=',')

with open('datav2/train_images.json', 'w', encoding='utf-8') as file:
    json.dump(train_images, file, indent='\t')
    
with open('datav2/train_classes_count.json', 'w', encoding='utf-8') as file:
    json.dump(train_classes_count, file, indent='\t')
    
with open('datav2/validation_images.json', 'w', encoding='utf-8') as file:
    json.dump(validation_images, file, indent='\t')
    
with open('datav2/validation_classes_count.json', 'w', encoding='utf-8') as file:
    json.dump(validation_classes_count, file, indent='\t')

264038it [03:56, 1115.42it/s]
80120it [01:14, 1079.29it/s]


In [None]:
# with open('data/dotav2_train_images_not_large.json', 'r', encoding='utf-8') as file:
#     train_images = json.load(file)
    
# with open('data/dotav2_train_classes_count.json', 'r', encoding='utf-8') as file:
#     train_classes_count = json.load(file)
    
# with open('data/dotav2_validation_images.json', 'r', encoding='utf-8') as file:
#     validation_images = json.load(file)
    
# with open('data/dotav2_validation_classes_count.json', 'r', encoding='utf-8') as file:
#     validation_classes_count = json.load(file)

In [6]:
print('Num classes (including bg) = {}'.format(cfg.len_class))

print('==Training Num samples {} , images per class:'.format(len(train_images)))
pprint(train_classes_count)

print('==Validation Num samples {} , images per class:'.format(len(validation_images)))
pprint(validation_classes_count)

Num classes (including bg) = 19
==Training Num samples 1824 , images per class:
{'airport': 302,
 'baseball-diamond': 428,
 'basketball-court': 546,
 'bg': 0,
 'bridge': 2260,
 'container-crane': 178,
 'ground-track-field': 406,
 'harbor': 6380,
 'helicopter': 652,
 'helipad': 8,
 'large-vehicle': 24501,
 'plane': 8171,
 'roundabout': 589,
 'ship': 38394,
 'small-vehicle': 169268,
 'soccer-ball-field': 361,
 'storage-tank': 6802,
 'swimming-pool': 2343,
 'tennis-court': 2449}
==Validation Num samples 592 , images per class:
{'airport': 101,
 'baseball-diamond': 227,
 'basketball-court': 147,
 'bg': 0,
 'bridge': 494,
 'container-crane': 14,
 'ground-track-field': 166,
 'harbor': 2249,
 'helicopter': 78,
 'helipad': 2,
 'large-vehicle': 5371,
 'plane': 2600,
 'roundabout': 228,
 'ship': 13466,
 'small-vehicle': 50062,
 'soccer-ball-field': 151,
 'storage-tank': 3136,
 'swimming-pool': 851,
 'tennis-court': 777}


# Data Generator

In [7]:
with tf.device('/cpu:0'):
    data_gen_train = data_generators.get_anchor_gt(train_images, train_classes_count, cfg, nn.get_img_output_length, mode='train')
    data_gen_val = data_generators.get_anchor_gt(validation_images, validation_classes_count, cfg, nn.get_img_output_length, mode='val')

# Model

In [8]:
with tf.device('/cpu:0'):
    img_input = Input(shape=(None, None, 3), name='img_input')
    roi_input = Input(shape=(None, 4), name='roi_input') # 4 == 점의 개수 * 2

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios) # 3 * 3 == 9
    rpn = nn.rpn(shared_layers, num_anchors) # rpn_out_class 2k이어야 하는데 k 반환
    
    # cfg.num_rois : 32
    classifier = nn.classifier(shared_layers, roi_input, cfg.num_rois, nb_classes=cfg.len_class, trainable=True)

    model_rpn = Model(img_input, rpn[:2], name='model_rpn')
    model_classifier = Model([img_input, roi_input], classifier, name='model_classifier')

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier, name='model_all')

    model_rpn.compile(optimizer=Adam(lr=1e-5), 
                      loss=[losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors)])
    
    model_classifier.compile(optimizer=Adam(lr=1e-5),
                             loss=[losses_fn.class_loss_cls, losses_fn.class_loss_regr(cfg.len_class - 1)],
                             metrics={'dense_class_{}'.format(cfg.len_class): 'accuracy'})
    
    model_all.compile(optimizer='sgd', loss='mae')

In [9]:
# plot_model(model_all, to_file=cfg.visual_model_path, show_shapes=True, \
#            show_layer_names=True, expand_nested=False, dpi=200)

# display(Image.open(cfg.visual_model_path))
# model_all.summary(line_length=120)

# Train

In [10]:
train_length = 5# len(train_images)
val_length = 5# len(validation_images)
num_epochs = int(cfg.num_epochs)

losses = np.zeros((train_length, 5))
val_losses = np.zeros((val_length, 5))
best_loss = np.Inf

rpn_accuracy_for_epoch = []

class_mapping_inv = {v: k for k, v in cfg.class_mapping.items()}

In [11]:
with tf.device('/cpu:0'):
    for epoch_num in range(num_epochs):
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        start_time = time.time()
        rpn_accuracy_for_epoch = []
        progbar = Progbar(train_length)
        
        for iter_num in range(train_length):
            X, Y, img_data, cache = next(data_gen_train)
            
            loss_rpn = model_rpn.train_on_batch(X, Y)
            print(f"\t rpn loss -> cls: {loss_rpn[1]}, regr: {loss_rpn[2]}")
            P_rpn = model_rpn.predict_on_batch(X) # (rpn cls, rpn regr)
            result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, 'tf', use_regr=True,
                                            overlap_thresh=0.7,
                                            max_boxes=256) # origin code is 300 but paper is set by 256 (128 - positive, 128 - negative)

            # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
            X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, cfg.class_mapping)
            
            if X2 is None:
                rpn_accuracy_for_epoch.append(0)
                continue

            pos_samples = np.where(Y1[0, :, -1] == 0)
            neg_samples = np.where(Y1[0, :, -1] == 1)
            rpn_accuracy_for_epoch.append((len(pos_samples)))
            
            if len(neg_samples) > 0:
                neg_samples = neg_samples[0]
            else:
                neg_samples = []

            if len(pos_samples) > 0:
                pos_samples = pos_samples[0]
            else:
                pos_samples = []
            
            if len(pos_samples) < cfg.num_rois // 2:
                selected_pos_samples = pos_samples.tolist()
            else:
                selected_pos_samples = np.random.choice(pos_samples, cfg.num_rois // 2, replace=False).tolist()

            if len(neg_samples) + len(selected_pos_samples) > cfg.num_rois:
                selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples), replace=False).tolist()
            else:
                selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples), replace=True).tolist()
            selected_samples = selected_pos_samples + selected_neg_samples
            
            loss_class = model_classifier.train_on_batch([X, X2[:, selected_samples, :]], [Y1[:, selected_samples, :], Y2[:, selected_samples, :]])
            print(f"\t cls loss -> cls: {loss_class[1]}, regr: {loss_class[2]}, cls acc: {loss_class[3]}")
            
            #### loss 계산 갱신
            
            losses[iter_num, 0] = loss_rpn[1]
            losses[iter_num, 1] = loss_rpn[2]

            losses[iter_num, 2] = loss_class[1]
            losses[iter_num, 3] = loss_class[2]
            losses[iter_num, 4] = loss_class[3]
            
            rpn_cls = np.mean(losses[:iter_num, 0])
            rpn_regr = np.mean(losses[:iter_num, 1])
            detector_cls = np.mean(losses[:iter_num, 2])
            detector_regr = np.mean(losses[:iter_num, 3])

            progbar.update(iter_num,
                           [('rpn_cls', rpn_cls), \
                            ('rpn_regr', rpn_regr), \
                            ('detector_cls', detector_cls), \
                            ('detector_regr', detector_regr)])
            if iter_num % 300 == 299:
                model_all.save_weights(os.path.join('models', f'Epoch_{epoch_num}_Iter_{iter_num}_rpnCls_{rpn_cls:.4f}_rpnRegr_{rpn_regr:.4f}_clsCls_{detector_cls:.4f}_clsRegr_{detector_regr:.4f}.hdf5'))

        loss_rpn_cls = np.mean(losses[:, 0])
        loss_rpn_regr = np.mean(losses[:, 1])
        loss_class_cls = np.mean(losses[:, 2])
        loss_class_regr = np.mean(losses[:, 3])
        class_acc = np.mean(losses[:, 4])

        mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)

        if cfg.verbose:
            print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes))
            if mean_overlapping_bboxes == 0:
                print('RPN is not producing bounding boxes that overlap the ground truth boxes.')
                print('Check RPN settings or keep training.')
                
            print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
            print('Loss RPN classifier: {}'.format(loss_rpn_cls))
            print('Loss RPN regression: {}'.format(loss_rpn_regr))
            print('Loss Detector classifier: {}'.format(loss_class_cls))
            print('Loss Detector regression: {}'.format(loss_class_regr))
            print('Elapsed time: {}'.format(time.time() - start_time))

        curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr

        if curr_loss < best_loss:
            print(f'\t Total loss decreasetd from {best_loss:.5f} to {curr_loss:.5f}, saving weights')
            best_loss = curr_loss
        
        
        
        # Validation Check
        
        start_time = time.time()
        rpn_accuracy_for_epoch = []
        progbar = Progbar(val_length)
                    
        for iter_num in range(val_length):
            X, Y, img_data, cache = next(data_gen_train)
            
            loss_rpn = model_rpn.evaluate(X, Y)
            P_rpn = model_rpn.predict_on_batch(X)
            result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, 'tf', use_regr=True,
                                            overlap_thresh=0.7,
                                            max_boxes=256) # origin code is 300 but paper is set by 256 (128 - positive, 128 - negative)

            # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
            X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, cfg.class_mapping)
            
            if X2 is None:
                rpn_accuracy_for_epoch.append(0)
                continue

            selected_samples = np.where(Y1[0, :, -1] == 0)
            rpn_accuracy_for_epoch.append((len(selected_samples)))
            loss_class = model_classifier.evaluate([X, X2[:, selected_samples, :]], [Y1[:, selected_samples, :], Y2[:, selected_samples, :]])
            
            #### loss 계산 갱신
            
            val_losses[iter_num, 0] = loss_rpn[1]
            val_losses[iter_num, 1] = loss_rpn[2]

            val_losses[iter_num, 2] = loss_class[1]
            val_losses[iter_num, 3] = loss_class[2]
            val_losses[iter_num, 4] = loss_class[3]
            
            rpn_cls = np.nanmean(val_losses[:iter_num, 0])
            rpn_regr = np.nanmean(val_losses[:iter_num, 1])
            detector_cls = np.nanmean(val_losses[:iter_num, 2])
            detector_regr = np.nanmean(val_losses[:iter_num, 3])

            progbar.update(iter_num,
                           [('rpn_cls', rpn_cls), \
                            ('rpn_regr', rpn_regr), \
                            ('detector_cls', detector_cls), \
                            ('detector_regr', detector_regr)])        
        
        
        loss_rpn_cls = np.mean(val_losses[:, 0])
        loss_rpn_regr = np.mean(val_losses[:, 1])
        loss_class_cls = np.mean(val_losses[:, 2])
        loss_class_regr = np.mean(val_losses[:, 3])
        class_acc = np.mean(val_losses[:, 4])

        mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)

        print('Val Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes))
        if mean_overlapping_bboxes == 0:
            print('Val RPN is not producing bounding boxes that overlap the ground truth boxes.')
            print('Val Check RPN settings or keep training.')

        print('Val Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
        print('Val Loss RPN classifier: {}'.format(loss_rpn_cls))
        print('Val Loss RPN regression: {}'.format(loss_rpn_regr))
        print('Val Loss Detector classifier: {}'.format(loss_class_cls))
        print('Val Loss Detector regression: {}'.format(loss_class_regr))
        print('Val Elapsed time: {}'.format(time.time() - start_time))
        
        model_all.save_weights(os.path.join('models', f'Epoch_{epoch_num}_rpnCls_{loss_rpn_cls:.4f}_rpnRegr_{loss_rpn_regr:.4f}_clsCls_{loss_class_cls:.4f}_clsRegr_{loss_class_regr:.4f}.hdf5'))


Epoch 1/4

 P0215.png (1376, 3216, 3)
#calc_rpn Exception : can't multiply sequence by non-int of type 'float'

 P1972.png (2592, 6640, 3)
#calc_rpn Exception : can't multiply sequence by non-int of type 'float'

 P1827.png (1424, 1792, 3)
	 rpn loss -> cls: 7.7625908851623535, regr: 1.8256162405014038
	 cls loss -> cls: 2.9444386959075928, regr: 0.0, cls acc: 0.0
0/5 [..............................] - ETA: 0s - rpn_cls: nan - rpn_regr: nan - detector_cls: nan - detector_regr: nan
 P0383.png (912, 1136, 3)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


	 rpn loss -> cls: 7.493694305419922, regr: 1.7276943922042847
	 cls loss -> cls: 2.9397969245910645, regr: 1.314517617225647, cls acc: 0.8984375
1/5 [=====>........................] - ETA: 2:04:03 - rpn_cls: nan - rpn_regr: nan - detector_cls: nan - detector_regr: nan
 P3899.png (1024, 1024, 3)
	 rpn loss -> cls: 7.97972297668457, regr: 1.9732075929641724

 P0892.png (2224, 2336, 3)
#calc_rpn Exception : can't multiply sequence by non-int of type 'float'

 P0775.png (1888, 1792, 3)
#calc_rpn Exception : can't multiply sequence by non-int of type 'float'

 P0109.png (1232, 1184, 3)
	 rpn loss -> cls: 7.927600860595703, regr: 1.92723548412323
	 cls loss -> cls: 2.9249818325042725, regr: 0.0, cls acc: 1.0
 P2538.png (2224, 1888, 3)
	 rpn loss -> cls: 7.803128242492676, regr: 1.7395098209381104
Mean number of bounding boxes from RPN overlapping ground truth boxes: 0.6
Classifier accuracy for bounding boxes from RPN: 0.3796875
Loss RPN classifier: 4.6367772102355955
Loss RPN regression: 1.

ValueError: in user code:

    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:1224 test_function  *
        return step_function(self, iterator)
    C:\Users\ipsl\Desktop\Faster-RCNN-for-Dota\utils\roi_pooling_conv.py:51 call  *
        x = rois[0, roi_idx, 0]
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\array_ops.py:1013 _slice_helper
        return strided_slice(
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\array_ops.py:1186 strided_slice
        op = gen_array_ops.strided_slice(
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_array_ops.py:10347 strided_slice
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:742 _apply_op_helper
        op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py:591 _create_op_internal
        return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:3477 _create_op_internal
        ret = Operation(
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:1974 __init__
        self._c_op = _create_c_op(self._graph, node_def, inputs,
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:1815 _create_c_op
        raise ValueError(str(e))

    ValueError: slice index 0 of dimension 2 out of bounds. for '{{node model_classifier/roi_pooling_conv/strided_slice}} = StridedSlice[Index=DT_INT32, T=DT_FLOAT, begin_mask=0, ellipsis_mask=0, end_mask=0, new_axis_mask=0, shrink_axis_mask=7](model_classifier/Cast, model_classifier/roi_pooling_conv/strided_slice/stack, model_classifier/roi_pooling_conv/strided_slice/stack_1, model_classifier/roi_pooling_conv/strided_slice/stack_2)' with input shapes: [?,1,0,4], [3], [3], [3] and with computed input tensors: input[1] = <0 0 0>, input[2] = <1 1 1>, input[3] = <1 1 1>.


In [15]:
([12,2])

7.0

---

---