In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import cv2
import os
import sys
sys.path.append('../../src/spine/model')
sys.path.append('../../src/spine/dataset')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.metrics import AUC, Recall, Precision
from tensorflow.keras.utils import plot_model, Sequence
import albumentations as A

from retinanet import *
from loss import *
from label_encoder import *
from vrt2coco import *
from process_dataset import process_json_to_img

data_dir = '../../data/spine'

num_classes = 1
batch_size = 8
img_size = 480
max_channel = 20

# Load data

### Data generator

In [2]:
# label_encoder = LabelEncoder()

In [3]:
class DataGen(Sequence):
    def __init__(self, data_dir, path, max_channel, batch_size=8, img_size=640, shuffle=True, mode=None):
        self.data_dir = data_dir
        self.path = path
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.img_size = img_size
        self.mode = mode
        self.max_channel = max_channel
        self.class_names = {'book':1}
        self.ratios = [0.5, 1.0, 2.0]
        self.scales = [4 * 2**(i/3) for i in range(3)]
        self.angles = [-np.pi/6, 0, np.pi/6] 

        self.transform_train = A.Compose([A.HorizontalFlip(),
                                          A.Rotate(limit=1),
                                          A.RandomBrightnessContrast(),
                                          A.LongestMaxSize (max_size=self.img_size),
#                                           A.Resize (height=self.img_size, width=self.img_size), 
                                          A.PadIfNeeded(min_height=self.img_size, min_width=self.img_size, border_mode=0),
                                        
                                          ],
                                         keypoint_params=A.KeypointParams(format='xy'))
        self.on_epoch_end()
    
    def on_epoch_end(self):
        if self.mode == 'train':
            self.indexes = np.arange(len(self.path))
            if self.shuffle:
                np.random.shuffle(self.indexes)
   
    def __len__(self):
        return int(np.floor(len(self.path) / self.batch_size))
    
    def __getitem__(self, index):
        X, y = self.data_generation(index)
        return X, y
    
    def process_keypoints(self, vrt, shape):
        target = np.ones((self.max_channel, 5)) * -1
        pts, k = [], 0
        for i in shape:
            pts.append([k, k+i])
            k+=i
        for idx, i in enumerate(pts):
            if idx ==self.max_channel:
                break
            cnt = np.array(vrt[i[0]:i[1]]).reshape(-1,2).astype(np.int0)
            x, y, w, h = cv2.boundingRect(cnt)
            theta = cv2.minAreaRect(cnt)[2]
            target[idx,:]= np.array([x, y, w, h, self.class_names['book']]).astype(float)        
        return target
    
    def data_generation(self, index):
        
        X = np.zeros((self.batch_size, self.img_size, self.img_size, 3), dtype=int)
        y = np.zeros((self.batch_size, self.max_channel, 5), dtype=float)
        batch_path = self.path[index * self.batch_size : (index + 1) * self.batch_size]

        for idx, img_path in enumerate(batch_path):
            _, img, vrt, vrt_shape = process_json_to_img(self.data_dir, path = img_path)
            
            if self.mode == 'train' or self.mode == 'val':
                tr = self.transform_train(image=img,
                                          keypoints=vrt)
                X[idx, ] = tr['image']
                y[idx, ] = self.process_keypoints(tr['keypoints'], vrt_shape)
            else:
                X[idx, ] = img
#             y[i, ] = self.encode_sample(images_shape, gt_boxes[i], cls_ids[i])
        return X, y
    
    def extract_target(self, ):
        return

In [4]:
json_path = [os.path.join(data_dir,i) for i in os.listdir(os.path.join(data_dir)) if os.path.splitext(i)[1]=='.json']

train, val = train_test_split(json_path, test_size=0.3, random_state=0)
val, test = train_test_split(val, test_size=0.5, random_state=0)

In [5]:
train_datagen = DataGen(data_dir, train, max_channel, batch_size, img_size, True,'train')
val_datagen = DataGen(data_dir, val, max_channel, batch_size, img_size, True, 'val')
# test_datagen = DataGen(data_dir, test, batch_size, img_size)

### Train/val/test split & data generator

# Train data visualization

In [6]:
decode = DecodePredictions()
X, y = train_datagen.__getitem__(0)
print('Image: ', X.shape, 'Label: ', y.shape)

plt.figure(figsize=(20,35))
for idx, (img, pred) in enumerate(zip(X, y)):
    
    img_res = decode(img, pred)
    plt.subplot(batch_size/2, 2, idx+1)
    plt.imshow(img_res[:, :, [2, 1, 0]])
    plt.axis('off')
plt.tight_layout()

Image:  (8, 480, 480, 3) Label:  (8, 20, 5)


InvalidArgumentError: Exception encountered when calling layer "decode_predictions" (type DecodePredictions).

Value for attr 'T' of int32 is not in the list of allowed values: bfloat16, half, float, double, complex64, complex128
	; NodeDef: {{node Sigmoid}}; Op<name=Sigmoid; signature=x:T -> y:T; attr=T:type,allowed=[DT_BFLOAT16, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128]> [Op:Sigmoid]

Call arguments received:
  • images=tf.Tensor(shape=(480, 480, 3), dtype=int32)
  • predictions=array([[457,  84,  17, 335,   1],
       [441,  82,  18, 337,   1],
       [422,  61,  19, 358,   1],
       [407,  61,  18, 358,   1],
       [392,  61,  16, 358,   1],
       [380,  61,  16, 358,   1],
       [362,  60,  23, 359,   1],
       [344,  61,  22, 358,   1],
       [327,  61,  21, 359,   1],
       [308,  61,  21, 358,   1],
       [297,  61,  12, 358,   1],
       [286,  61,  12, 358,   1],
       [268,  61,  19, 358,   1],
       [249,  61,  19, 358,   1],
       [229,  66,  20, 353,   1],
       [209,  67,  23, 351,   1],
       [197,  66,  14, 353,   1],
       [187,  66,  12, 353,   1],
       [169,  68,  19, 351,   1],
       [160,  68,  10, 351,   1]])

<Figure size 1440x2520 with 0 Axes>

# Model

In [None]:
learning_rates = [2.5e-06, 0.000625, 0.00125, 0.0025, 0.00025, 2.5e-05]
learning_rate_boundaries = [125, 250, 500, 240000, 360000]
learning_rate_fn = tf.optimizers.schedules.PiecewiseConstantDecay(boundaries=learning_rate_boundaries, 
                                                                  values=learning_rates)

In [None]:
resnet50_backbone = get_backbone()
loss_fn = RetinaNetLoss(num_classes)
model = RetinaNet(num_classes, resnet50_backbone)

optimizer = tf.optimizers.SGD(learning_rate=learning_rate_fn, momentum=0.9)
model.compile(loss=loss_fn, optimizer=optimizer)

In [None]:
# callbacks_list = [ModelCheckpoint(filepath=os.path.join(model_dir, "weights" + "_epoch_{epoch}"),
#                                   monitor="loss",
#                                   save_best_only=True,
#                                   save_weights_only=True,
#                                   verbose=1,)]

filepath = '../model/spine.epoch{epoch:02d}-loss{val_loss:.2f}.hdf5'
csv_logger = CSVLogger("../model/training_smile_binary.csv", append=True)
tb = TensorBoard(log_dir= "../model/logs", histogram_freq=0, write_graph=True, write_images=True)
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=True, mode=’max’)
callbacks_list = [csv_logger, tb, checkpoint]

# Training

In [None]:
# history = model.fit(train_dataset.take(100),
#                     validation_data=val_dataset.take(50),
#                     epochs=10,
#                     callbacks=callbacks_list,
#                     verbose=1,)

history =  model.fit_generator(generator=train_datagen,
                               steps_per_epoch = train_datagen.__len__() // batch_size,
                               epochs=10,
                               validation_data = val_datagen,
                               validation_steps = val_datagen.__len__() // batch_size,
                               callbacks=callbacks_list,
                               verbose=1)

# Training metrics

In [None]:
plt.figure(figsize=(20,7))
plt.rcParams.update({'font.size': 15})

plt.subplot(1,2,1)
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['acc'], label='train_iou')
plt.plot(history.history['val_acc'], label='val_iou')
plt.legend()

plt.tight_layout()

# Evaluation

# Test data visualization

In [None]:
image = tf.keras.Input(shape=[None, None, 3], name="image")
predictions = model(image, training=False)
detections = DecodePredictions(confidence_threshold=0.5)(image, predictions)
inference_model = tf.keras.Model(inputs=image, outputs=detections)

In [None]:
def prepare_image(image):
    image, _, ratio = resize_and_pad_image(image, jitter=None)
    image = tf.keras.applications.resnet.preprocess_input(image)
    return tf.expand_dims(image, axis=0), ratio

val_dataset = tfds.load("coco/2017", split="validation", data_dir="data")
int2str = dataset_info.features["objects"]["label"].int2str

for sample in val_dataset.take(2):
    image = tf.cast(sample["image"], dtype=tf.float32)
    input_image, ratio = prepare_image(image)
    detections = inference_model.predict(input_image)
    num_detections = detections.valid_detections[0]
    class_names = [int2str(int(x)) for x in detections.nmsed_classes[0][:num_detections]]
    visualize_detections(image,
                         detections.nmsed_boxes[0][:num_detections] / ratio,
                         class_names,
                         detections.nmsed_scores[0][:num_detections],)