Copyright (C) 2018 Pierluigi Ferrari

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

In [1]:
import h5py
import numpy as np
import shutil

from misc_utils.tensor_sampling_utils import sample_tensors

In [2]:
# 1. 학습시켜서 성능을 올리고자 하는 weight file(h5)의 경로를 입력합니다.
weights_source_path = 'C:\\Users\\user\\Desktop\\weight\\VGG_VOC0712Plus_SSD_300x300_iter_240000.h5'

# 학습된 weight file(h5)이 저장 될 경로를 입력합니다.
weights_destination_path = 'C:\\Users\\user\\Desktop\\weight\\VGG_VOC0712Plus_SSD_300x300_iter_240000_sampling_2_.h5'

shutil.copy(weights_source_path, weights_destination_path)

'C:\\Users\\user\\Desktop\\weight\\VGG_VOC0712Plus_SSD_300x300_iter_240000_sampling_2_.h5'

In [3]:
weights_source_file = h5py.File(weights_source_path, 'r')
weights_destination_file = h5py.File(weights_destination_path)

In [4]:
classifier_names = ['conv4_3_norm_mbox_conf',
                    'fc7_mbox_conf',
                    'conv6_2_mbox_conf',
                    'conv7_2_mbox_conf',
                    'conv8_2_mbox_conf',
                    'conv9_2_mbox_conf']

In [6]:
# 2. 기본적인 수치들은 SSD Model의 방법론을 따릅니다.
n_classes_source = 21
# 학습할 데이터의 class는 '균열'과 '비균열'뿐이므로, 2를 입력합니다.
classes_of_interest = 2


for name in classifier_names:
    kernel = weights_source_file[name][name]['kernel:0'].value
    bias = weights_source_file[name][name]['bias:0'].value

    height, width, in_channels, out_channels = kernel.shape
    
    if isinstance(classes_of_interest, (list, tuple)):
        subsampling_indices = []
        for i in range(int(out_channels/n_classes_source)):
            indices = np.array(classes_of_interest) + i * n_classes_source
            subsampling_indices.append(indices)
        subsampling_indices = list(np.concatenate(subsampling_indices))
    elif isinstance(classes_of_interest, int):
        subsampling_indices = int(classes_of_interest * (out_channels/n_classes_source))
    else:
        raise ValueError("`classes_of_interest` must be either an integer or a list/tuple.")
    
    new_kernel, new_bias = sample_tensors(weights_list=[kernel, bias],
                                          sampling_instructions=[height, width, in_channels, subsampling_indices],
                                          axes=[[3]], # The one bias dimension corresponds to the last kernel dimension.
                                          init=['gaussian', 'zeros'],
                                          mean=0.0,
                                          stddev=0.005)
    
    del weights_destination_file[name][name]['kernel:0']
    del weights_destination_file[name][name]['bias:0']

    weights_destination_file[name][name].create_dataset(name='kernel:0', data=new_kernel)
    weights_destination_file[name][name].create_dataset(name='bias:0', data=new_bias)

weights_destination_file.flush()

In [7]:
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TerminateOnNaN, CSVLogger
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt

from models.keras_ssd300 import ssd_300
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast
from keras_layers.keras_layer_L2Normalization import L2Normalization

from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder
from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms

%matplotlib inline

Using TensorFlow backend.


In [8]:
img_height = 300
img_width = 300
img_channels = 3
mean_color = [123, 117, 104] 
swap_channels = [2, 1, 0] 
n_classes = 1 
            
scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] 
scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] 
scales = scales_pascal
aspect_ratios = [[1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5]]
two_boxes_for_ar1 = True
steps = [8, 16, 32, 64, 100, 300] 
offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 
clip_boxes = False
variances = [0.1, 0.1, 0.2, 0.2] 
normalize_coords = True

In [9]:
K.clear_session()

model = ssd_300(image_size=(img_height, img_width, img_channels),
                n_classes=n_classes,
                mode='training',
                l2_regularization=0.0005,
                scales=scales,
                aspect_ratios_per_layer=aspect_ratios,
                two_boxes_for_ar1=two_boxes_for_ar1,
                steps=steps,
                offsets=offsets,
                clip_boxes=clip_boxes,
                variances=variances,
                normalize_coords=normalize_coords,
                subtract_mean=mean_color,
                swap_channels=swap_channels)

weights_path = weights_destination_path

model.load_weights(weights_path, by_name=True)

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

In [10]:
# 3. 경로를 자신의 환경에 맞게 수정합니다.

train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None)
val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None)

# 균열 이미지 세트가 저장된 디렉토리 경로를 입력합니다.
Crack_images_dir      = '../../Desktop/SPARK/Concrete Crack Images for Classification/Positive 300 X 300/Positive/'
#Noncrack_images_dir   = '../../Desktop/SPARK/Concrete Crack Images for Classification/Negative 300 X 300/Negative/'

# Annotation 파일들이 저장된 디렉토리 경로를 입력합니다.
Crack_images_dir_annotations      = '../../Desktop/SPARK/Concrete Crack Images for Classification/Positive 300 X 300/annotation/'
#Noncrack_images_dir_annotations   = '../../Desktop/SPARK/Concrete Crack Images for Classification/Negative 300 X 300/annotation/'

# Train과 Validation을 진행할 이미지 파일명을 입력한 텍스트 파일의 경로를 입력합니다.
trainval_image_set_filename_crack = '../../Desktop/SPARK/Concrete Crack Images for Classification/Positive 300 X 300/trainval.txt'
#trainval_image_set_filename_noncrack = '../../Desktop/SPARK/Concrete Crack Images for Classification/Negative 300 X 300/trainval.txt'

# Test를 진행할 이미지 파일명을 입력한 텍스트 파일의 경로를 입력합니다.
test_image_set_filename_crack     = '../../Desktop/SPARK/Concrete Crack Images for Classification/Positive 300 X 300/test.txt'
#test_image_set_filename_noncrack     = '../../Desktop/SPARK/Concrete Crack Images for Classification/Negative 300 X 300/test.txt'

# 총 2개의 클래스로 학습을 진행합니다.
classes = ['background','crack']

train_dataset.parse_xml(images_dirs=[Crack_images_dir],
                        image_set_filenames=[trainval_image_set_filename_crack],
                        annotations_dirs=[Crack_images_dir_annotations],
                        classes=classes,
                        include_classes='all',
                        exclude_truncated=False,
                        exclude_difficult=False,
                        ret=False)

val_dataset.parse_xml(images_dirs=[Crack_images_dir],
                      image_set_filenames=[test_image_set_filename_crack],
                      annotations_dirs=[Crack_images_dir_annotations],
                      classes=classes,
                      include_classes='all',
                      exclude_truncated=False,
                      exclude_difficult=True,
                      ret=False)

Processing image set 'trainval.txt': 100%|████████████████████████████████████████| 8000/8000 [00:13<00:00, 574.77it/s]
Loading images into memory: 100%|█████████████████████████████████████████████████| 8000/8000 [00:20<00:00, 385.31it/s]
Processing image set 'test.txt': 100%|████████████████████████████████████████████| 2000/2000 [00:03<00:00, 551.86it/s]
Loading images into memory: 100%|█████████████████████████████████████████████████| 2000/2000 [00:05<00:00, 379.25it/s]


In [11]:
# 3: Set the batch size.

batch_size = 2

# 4: Set the image transformations for pre-processing and data augmentation options.

# For the training generator:
ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                            img_width=img_width,
                                            background=mean_color)

# For the validation generator:
convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=img_height, width=img_width)

# 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

# The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
# layer를 과제에 맞게 수정해야 하는 곳
# 추후 각자 공부한 내용을 바탕으로 layer를 수정해가며 어떤 시도가 가장 loss 값을 줄여주는 지 알아보자 
predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
                   model.get_layer('fc7_mbox_conf').output_shape[1:3],
                   model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]

ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                    img_width=img_width,
                                    n_classes=n_classes,
                                    predictor_sizes=predictor_sizes,
                                    scales=scales,
                                    aspect_ratios_per_layer=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    clip_boxes=clip_boxes,
                                    variances=variances,
                                    matching_type='multi',
                                    pos_iou_threshold=0.5,
                                    neg_iou_limit=0.5,
                                    normalize_coords=normalize_coords)

# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

train_generator = train_dataset.generate(batch_size=batch_size,
                                         shuffle=True,  # 이미지들의 train 순서를 random하게 해주는 기능 맞는지 확인
                                         transformations=[ssd_data_augmentation],
                                         label_encoder=ssd_input_encoder,
                                         returns={'processed_images',
                                                  'encoded_labels'},
                                         keep_images_without_gt=False)

val_generator = val_dataset.generate(batch_size=batch_size,
                                     shuffle=False,
                                     transformations=[convert_to_3_channels,
                                                      resize],
                                     label_encoder=ssd_input_encoder,
                                     returns={'processed_images',
                                              'encoded_labels'},
                                     keep_images_without_gt=False)

# Get the number of samples in the training and validations datasets.
train_dataset_size = train_dataset.get_dataset_size()
val_dataset_size   = val_dataset.get_dataset_size()

print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size))

Number of images in the training dataset:	  8000
Number of images in the validation dataset:	  2000


In [30]:
# Define a learning rate schedule.
# epoch = 1,000 training steps
# 알맞은 Learning rate를 찾을 것
def lr_schedule(epoch):
    if epoch < 10:           
        return 0.0005      
    else:
        return 0.000005

In [31]:
# Define model callbacks.

# TODO: Set the filepath under which you want to save the model.
# 추후 file path를 알맞게 수정하자
# 원래 여기서 return하는 파일은 weight가 아니라 model이다!
### 하지만 inference의 편의를 위해 여기서 Weight 만 저장하는 것으로 바꿈!!!
### 여기서 Model을 통째로 저장하면 Inference에서 굉장한 어려움을 겪음!!!
model_checkpoint = ModelCheckpoint(filepath='ssd300_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
                                   monitor='val_loss',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=True, ### 여기서 Weight 만 저장하는 것으로 바꿈!!!
                                   mode='auto',
                                   period=1)
#model_checkpoint.best = 

csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv',
                       separator=',',
                       append=True)

learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                verbose=1)

terminate_on_nan = TerminateOnNaN()

callbacks = [model_checkpoint,
             csv_logger,
             learning_rate_scheduler,
             terminate_on_nan]

In [32]:
# If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
# 추후 최적화를 위해 이 부분을 수정해야 함
# 데이터의 갯수에 따라서도 각 epoch 변수를 수정해야 함
initial_epoch   = 0
final_epoch     = 16
steps_per_epoch = 1000

history = model.fit_generator(generator=train_generator,
                              steps_per_epoch=steps_per_epoch,
                              epochs=final_epoch,
                              callbacks=callbacks,
                              validation_data=val_generator,
                              validation_steps=ceil(val_dataset_size/batch_size),
                              initial_epoch=initial_epoch)

Epoch 1/16

Epoch 00001: LearningRateScheduler reducing learning rate to 0.0005.

Epoch 00001: val_loss improved from inf to 3.87801, saving model to ssd300_pascal_07+12_epoch-01_loss-3.8650_val_loss-3.8780.h5
Epoch 2/16

Epoch 00002: LearningRateScheduler reducing learning rate to 0.0005.





Epoch 00002: val_loss improved from 3.87801 to 3.40380, saving model to ssd300_pascal_07+12_epoch-02_loss-3.4589_val_loss-3.4038.h5
Epoch 3/16

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0005.

Epoch 00003: val_loss did not improve from 3.40380
Epoch 4/16

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0005.



Epoch 00004: val_loss did not improve from 3.40380
Epoch 5/16

Epoch 00005: LearningRateScheduler reducing learning rate to 0.0005.
 167/1000 [====>.........................] - ETA: 4:28 - loss: 4.790 - ETA: 4:27 - loss: 3.394 - ETA: 4:29 - loss: 3.044 - ETA: 4:34 - loss: 2.936 - ETA: 4:32 - loss: 2.827 - ETA: 4:32 - loss: 2.618 - ETA: 4:31 - loss: 2.977 - ETA: 4:32 - loss: 3.098 - ETA: 4:31 - loss: 2.980 - ETA: 4:32 - loss: 3.005 - ETA: 4:32 - loss: 3.076 - ETA: 4:31 - loss: 3.026 - ETA: 4:32 - loss: 2.901 - ETA: 4:31 - loss: 2.969 - ETA: 4:31 - loss: 2.878 - ETA: 4:30 - loss: 2.900 - ETA: 4:30 - loss: 2.993 - ETA: 4:31 - loss: 2.962 - ETA: 4:30 - loss: 2.973 - ETA: 4:30 - loss: 3.162 - ETA: 4:29 - loss: 3.177 - ETA: 4:29 - loss: 3.250 - ETA: 4:29 - loss: 3.220 - ETA: 4:28 - loss: 3.238 - ETA: 4:29 - loss: 3.207 - ETA: 4:28 - loss: 3.156 - ETA: 4:28 - loss: 3.188 - ETA: 4:28 - loss: 3.164 - ETA: 4:28 - loss: 3.123 - ETA: 4:28 - loss: 3.096 - ETA: 4:28 - loss: 3.102 - ETA: 4:27 - loss

KeyboardInterrupt: 