In [1]:
from __future__ import division
import os
import sys
import cv2
import time
import json
import pydot
import random
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm
from PIL import Image
import tensorflow as tf
from pprint import pprint
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.models import Model
from tensorflow.keras.utils import Progbar
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam, SGD, RMSprop

from utils import vgg as nn
from utils import roi_helpers
from utils import losses as losses_fn
from utils.simple_parser import get_data
from utils import config, data_generators

In [2]:
seed = 64

tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [3]:
ConfigProto = tf.compat.v1.ConfigProto()
ConfigProto.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=ConfigProto)
# K.set_session(sess)

# Config Setting

In [4]:
cfg = config.Config()

# iteration 개수 (train: 1409, val: 456)
cfg.num_epochs = 4 # 총 Epochs
# cfg.rot_90 = True # default
# cfg.use_random_brightness = True # add
# cfg.use_vertical_flips = True # default
# cfg.use_horizontal_flips = True # default
cfg.num_rois = 128 # default
cfg.std_scaling = 4 # default

# TODO: the only file should to be change for other data to train
cfg.model_path = './models/fusion_improved_last.hdf5'
cfg.visual_model_path = './models/fusion_improved.png'
cfg.train_label_file = 'dotav12_train_label_change_new.txt'
cfg.val_label_file = 'val_labels.txt' # txt 파일 경로 설정
cfg.class_mapping = {
    'plane': 0, 
    'ship': 1, 
    'storage-tank': 2, 
    'baseball-diamond': 3, 
    'tennis-court': 4, 
    'basketball-court': 5, 
    'ground-track-field': 6, 
    'harbor': 7, 
    'bridge': 8, 
    'large-vehicle': 9, 
    'small-vehicle': 10, 
    'helicopter': 11, 
    'roundabout': 12, 
    'soccer-ball-field': 13, 
    'swimming-pool': 14, 
    'container-crane': 15, 
    'airport': 16, 
    'helipad': 17,
    'bg': 18,
}
cfg.len_class = len(cfg.class_mapping)

with open(cfg.config_save_file, 'wb') as config_f:
    pickle.dump(cfg, config_f)
    print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(cfg.config_save_file))

Config has been written to config.pickle, and can be loaded when testing to ensure correct results


# Data Load
- train: 98990it (03:09)
- valid: 28853it (01:02)

In [5]:
train_name = 'dotav12_train_label_change_new'
train_classes_count_name = f'{train_name}_classes_count'

validation_name = 'dotav2_validation'
validation_classes_count_name = f'{validation_name}_classes_count'

In [6]:
# train_images, train_classes_count = get_data(cfg.train_label_file, cfg.class_mapping, sep=' ')
# validation_images, validation_classes_count = get_data(cfg.val_label_file, cfg.class_mapping, sep=',')

# with open(f'{train_name}.json', 'w', encoding='utf-8') as file:
#     json.dump(train_images, file, indent='\t')

# with open(f'{train_classes_count_name}.json', 'w', encoding='utf-8') as file:
#     json.dump(train_classes_count, file, indent='\t')

# with open(f'{validation_name}.json', 'w', encoding='utf-8') as file:
#     json.dump(validation_images, file, indent='\t')

# with open(f'{validation_classes_count_name}.json', 'w', encoding='utf-8') as file:
#     json.dump(validation_classes_count, file, indent='\t')

In [7]:
with open(f'{train_name}.json', 'r', encoding='utf-8') as file:
    train_images = json.load(file)

with open(f'{train_classes_count_name}.json', 'r', encoding='utf-8') as file:
    train_classes_count = json.load(file)

with open(f'{validation_name}.json', 'r', encoding='utf-8') as file:
    validation_images = json.load(file)

with open(f'{validation_classes_count_name}.json', 'r', encoding='utf-8') as file:
    validation_classes_count = json.load(file)

In [8]:
print('Num classes (including bg) = {}'.format(cfg.len_class))

print('==Training Num samples {} , images per class:'.format(len(train_images)))
pprint(train_classes_count)

print('==Validation Num samples {} , images per class:'.format(len(validation_images)))
pprint(validation_classes_count)

Num classes (including bg) = 19
==Training Num samples 1409 , images per class:
{'airport': 0,
 'baseball-diamond': 419,
 'basketball-court': 525,
 'bg': 0,
 'bridge': 3825,
 'container-crane': 0,
 'ground-track-field': 339,
 'harbor': 6529,
 'helicopter': 648,
 'helipad': 0,
 'large-vehicle': 18733,
 'plane': 8733,
 'roundabout': 521,
 'ship': 38832,
 'small-vehicle': 39340,
 'soccer-ball-field': 410,
 'storage-tank': 11453,
 'swimming-pool': 1954,
 'tennis-court': 2399}
==Validation Num samples 456 , images per class:
{'airport': 0,
 'baseball-diamond': 214,
 'basketball-court': 132,
 'bg': 0,
 'bridge': 464,
 'container-crane': 0,
 'ground-track-field': 144,
 'harbor': 2090,
 'helicopter': 73,
 'helipad': 0,
 'large-vehicle': 4387,
 'plane': 2531,
 'roundabout': 179,
 'ship': 8960,
 'small-vehicle': 5438,
 'soccer-ball-field': 153,
 'storage-tank': 2888,
 'swimming-pool': 440,
 'tennis-court': 760}


# Data Generator

In [9]:
with tf.device('/cpu:0'):
    data_gen_train = data_generators.get_anchor_gt(train_images, train_classes_count, cfg, nn.get_img_output_length, mode='train')
    data_gen_val = data_generators.get_anchor_gt(validation_images, validation_classes_count, cfg, nn.get_img_output_length, mode='val')

# Model

In [10]:
with tf.device('/cpu:0'):
    img_input = Input(shape=(None, None, 3), name='img_input')
    roi_input = Input(shape=(None, 4), name='roi_input') # 4 == 점의 개수 * 2

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios) # 3 * 3 == 9
    rpn = nn.rpn(shared_layers, num_anchors) # rpn_out_class 2k이어야 하는데 k 반환
    
    # cfg.num_rois : 32
    classifier = nn.classifier(shared_layers, roi_input, cfg.num_rois, nb_classes=cfg.len_class, trainable=True)

    model_rpn = Model(img_input, rpn[:2], name='model_rpn')
    model_classifier = Model([img_input, roi_input], classifier, name='model_classifier')

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier, name='model_all')

    model_rpn.compile(optimizer=Adam(lr=1e-5), 
                      loss=[losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors)])
    
    model_classifier.compile(optimizer=Adam(lr=1e-5),
                             loss=[losses_fn.class_loss_cls, losses_fn.class_loss_regr(cfg.len_class - 1)],
                             metrics={'dense_class_{}'.format(cfg.len_class): 'accuracy'})
    
    model_all.compile(optimizer='sgd', loss='mae')

# Train

In [11]:
historys = pd.DataFrame(columns=['rpn_cls', 'rpn_regr', 'detector_cls', 'detector_regr'])
train_length = len(train_images)
val_length = len(validation_images)
num_epochs = int(cfg.num_epochs)
print(train_length, val_length, num_epochs)

losses = np.zeros((train_length, 5))
val_losses = np.zeros((val_length, 5))
best_loss = np.Inf

rpn_accuracy_for_epoch = []

class_mapping_inv = {v: k for k, v in cfg.class_mapping.items()}

1409 456 4


In [None]:
with tf.device('/cpu:0'):
    for epoch_num in range(num_epochs):
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        start_time = time.time()
        rpn_accuracy_for_epoch = []
        progbar = Progbar(train_length)
        
        for iter_num in range(train_length):
            X, Y, img_data, cache = next(data_gen_train)
            
            loss_rpn = model_rpn.train_on_batch(X, Y)
            print(f"\t rpn loss -> cls: {loss_rpn[1]:.4f}, regr: {loss_rpn[2]:.4f}")
            P_rpn = model_rpn.predict_on_batch(X) # (rpn cls, rpn regr)
            result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, 'tf', use_regr=True,
                                            overlap_thresh=0.7,
                                            max_boxes=256) # origin code is 300 but paper is set by 256 (128 - positive, 128 - negative)

            # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
            X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, cfg.class_mapping)
            
            if X2 is None:
                rpn_accuracy_for_epoch.append(0)
                continue

            pos_samples = np.where(Y1[0, :, -1] == 0)
            neg_samples = np.where(Y1[0, :, -1] == 1)
            rpn_accuracy_for_epoch.append((len(pos_samples)))
            
            if len(neg_samples) > 0:
                neg_samples = neg_samples[0]
            else:
                neg_samples = []

            if len(pos_samples) > 0:
                pos_samples = pos_samples[0]
            else:
                pos_samples = []
            
            if len(pos_samples) < cfg.num_rois // 2:
                selected_pos_samples = pos_samples.tolist()
            else:
                selected_pos_samples = np.random.choice(pos_samples, cfg.num_rois // 2, replace=False).tolist()

            if len(neg_samples) + len(selected_pos_samples) > cfg.num_rois:
                selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples), replace=False).tolist()
            else:
                selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples), replace=True).tolist()
            selected_samples = selected_pos_samples + selected_neg_samples
            
            loss_class = model_classifier.train_on_batch([X, X2[:, selected_samples, :]], [Y1[:, selected_samples, :], Y2[:, selected_samples, :]])
            print(f"\t cls loss -> cls: {loss_class[1]:.4f}, regr: {loss_class[2]:.4f}, cls acc: {loss_class[3]:.4f}")
            
            #### loss 계산 갱신
            
            losses[iter_num, 0] = loss_rpn[1]
            losses[iter_num, 1] = loss_rpn[2]
            losses[iter_num, 2] = loss_class[1]
            losses[iter_num, 3] = loss_class[2]
            losses[iter_num, 4] = loss_class[3]
            
            rpn_cls = np.nanmean(losses[:iter_num+1, 0])
            rpn_regr = np.nanmean(losses[:iter_num+1, 1])
            detector_cls = np.nanmean(losses[:iter_num+1, 2])
            detector_regr = np.nanmean(losses[:iter_num+1, 3])

            progbar.update(iter_num,
                           [('rpn_cls', rpn_cls), \
                            ('rpn_regr', rpn_regr), \
                            ('detector_cls', detector_cls), \
                            ('detector_regr', detector_regr)])
            
            historys = historys.append(dict([
                            ('rpn_cls', rpn_cls), \
                            ('rpn_regr', rpn_regr), \
                            ('detector_cls', detector_cls), \
                            ('detector_regr', detector_regr)]), ignore_index=True)
            historys.to_csv(f"{train_name}-history.csv")
            
            if iter_num % 100 == 0:
                model_all.save_weights(os.path.join('models', f'Epoch_{epoch_num}_Iter_{iter_num:04}_rpnCls_{rpn_cls:.4f}_rpnRegr_{rpn_regr:.4f}_clsCls_{detector_cls:.4f}_clsRegr_{detector_regr:.4f}.hdf5'))

        loss_rpn_cls = np.nanmean(losses[:, 0])
        loss_rpn_regr = np.nanmean(losses[:, 1])
        loss_class_cls = np.nanmean(losses[:, 2])
        loss_class_regr = np.nanmean(losses[:, 3])
        class_acc = np.nanmean(losses[:, 4])

        mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)

        if cfg.verbose:
            print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes))
            if mean_overlapping_bboxes == 0:
                print('RPN is not producing bounding boxes that overlap the ground truth boxes.')
                print('Check RPN settings or keep training.')
                
            print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
            print('Loss RPN classifier: {}'.format(loss_rpn_cls))
            print('Loss RPN regression: {}'.format(loss_rpn_regr))
            print('Loss Detector classifier: {}'.format(loss_class_cls))
            print('Loss Detector regression: {}'.format(loss_class_regr))
            print('Elapsed time: {}'.format(time.time() - start_time))

        curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr

        if curr_loss < best_loss:
            print(f'\t Total loss decreased from {best_loss:.5f} to {curr_loss:.5f}')
            best_loss = curr_loss
        
        model_all.save_weights(os.path.join('models', f'Epoch_{epoch_num}_rpnCls_{loss_rpn_cls:.4f}_rpnRegr_{loss_rpn_regr:.4f}_clsCls_{loss_class_cls:.4f}_clsRegr_{loss_class_regr:.4f}_acc{class_acc:.4f}.hdf5'))

Epoch 1/4

 ./save_dota_aug_train_change/P0000_augment.png (5504, 3888, 3)
	 rpn loss -> cls: 7.864053249359131, regr: 1.9811793565750122

 ./save_dota_aug_train_change/P0001_augment.png (5776, 1264, 3)
	 rpn loss -> cls: 7.609644889831543, regr: 1.5934219360351562

 ./save_dota_aug_train_change/P0002_augment.png (2096, 2560, 3)
	 rpn loss -> cls: 7.691080093383789, regr: 1.78555428981781
	 cls loss -> cls: 2.9444386959075928, regr: 0.0, cls acc: 0.0
   2/1409 [..............................] - ETA: 239:52:48 - rpn_cls: 2.5637 - rpn_regr: 0.5952 - detector_cls: 0.9815 - detector_regr: 0.0000e+00
 ./save_dota_aug_train_change/P0005_augment.png (992, 896, 3)
	 rpn loss -> cls: 7.677186489105225, regr: 1.6817430257797241
	 cls loss -> cls: 2.934804916381836, regr: 0.0, cls acc: 1.0
   3/1409 [..............................] - ETA: 166:24:00 - rpn_cls: 2.9898 - rpn_regr: 0.6857 - detector_cls: 1.1443 - detector_regr: 0.0000e+00
 ./save_dota_aug_train_change/P0008_augment.png (1248, 1152, 3


 ./save_dota_aug_train_change/P0038_augment.png (4320, 2560, 3)
	 rpn loss -> cls: 7.672197341918945, regr: 1.5559089183807373
	 cls loss -> cls: 0.04507136344909668, regr: 0.0, cls acc: 1.0
  19/1409 [..............................] - ETA: 124:13:32 - rpn_cls: 4.6320 - rpn_regr: 1.1307 - detector_cls: 1.7992 - detector_regr: 0.3172
 ./save_dota_aug_train_change/P0039_augment.png (3664, 3600, 3)
	 rpn loss -> cls: 4.730466365814209, regr: 1.9264658689498901
	 cls loss -> cls: 0.10104075074195862, regr: 0.0, cls acc: 1.0
  20/1409 [..............................] - ETA: 140:39:25 - rpn_cls: 4.6269 - rpn_regr: 1.1333 - detector_cls: 1.7817 - detector_regr: 0.3194
 ./save_dota_aug_train_change/P0041_augment.png (2336, 3200, 3)
	 rpn loss -> cls: 4.540389060974121, regr: 1.8668243885040283

 ./save_dota_aug_train_change/P0042_augment.png (2240, 1904, 3)
	 rpn loss -> cls: 4.337307453155518, regr: 1.838014841079712

 ./save_dota_aug_train_change/P0044_augment.png (1744, 1648, 3)
	 rpn loss

---

### Start
- 2021-04-15 14:52

### Problem
- img 전처리가 너무 오래 걸린다. (Y 값 계산)


---

 ./save_dota_aug_train/P1848_augment.png (2224, 1584, 3)
#calc_rpn Exception : can't multiply sequence by non-int of type 'float'

 ./save_dota_aug_train/P0282_augment.png (944, 1200, 3)
	 rpn loss -> cls: 7.8367462158203125, regr: 1.8953641653060913
	 cls loss -> cls: 2.9444386959075928, regr: 0.0, cls acc: 0.0
0/5 [..............................] - ETA: 0s - rpn_cls: nan - rpn_regr: nan - detector_cls: nan - detector_regr: nan

---