## imports

In [5]:
import numpy as np
import pandas as pd
import os
from itertools import cycle

from keras import Input
from keras.applications.inception_v3 import InceptionV3
from keras.layers import Dense, Flatten, AveragePooling2D, concatenate
from keras.optimizer_v2.adam import Adam
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from flow_dataframe import flow_from_dataframe

# hyperparameters
NUM_EPOCHS = 150
LEARNING_RATE = 0.001
BATCH_SIZE_TRAIN = 16
BATCH_SIZE_VAL = 16


# default size of InceptionResNetV2
IMG_SIZE = (299, 299)


## Preprocessing Image Data

In [2]:
# Generate batches of tensor image data with real-time data augmentation. The data will be looped over (in batches).

core_idg = ImageDataGenerator(zoom_range=0.2,
                              fill_mode='nearest',
                              featurewise_center=False,  # set input mean to 0 over the dataset
                              samplewise_center=False,  # set each sample mean to 0
                              featurewise_std_normalization=False,  # divide inputs by std of the dataset
                              samplewise_std_normalization=False,  # divide each input by its std
                              zca_whitening=False,  # apply ZCA whitening
                              rotation_range=25,  # randomly rotate images in the range (degrees, 0 to 180)
                              width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
                              height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
                              horizontal_flip=True,  # randomly flip images
                              vertical_flip=False)

val_idg = ImageDataGenerator(width_shift_range=0.25, height_shift_range=0.25, horizontal_flip=True)


## Creating Data Generators
### Reading RSNA Boneage Dataset

In [3]:
class_str_col = 'boneage'
gender_str_col = 'male'

base_bone_dir = os.path.join('rsna-bone-age')
boneage_df = pd.read_csv(os.path.join(base_bone_dir, 'boneage-training-dataset.csv'))
boneage_df['path'] = boneage_df['id'].map(lambda x: os.path.join(base_bone_dir,
                                                         'boneage-training-dataset', 
                                                         'boneage-training-dataset', 
                                                         '{}.png'.format(x)))

boneage_df['exists'] = boneage_df['path'].map(os.path.exists)
print(boneage_df['exists'].sum(), 'images found of', boneage_df.shape[0], 'total')

boneage_df[gender_str_col] = boneage_df[gender_str_col].map(lambda x: np.array([1]) if x else np.array([0])) # map boolean values to 1 and 0

train_df_boneage, valid_df_boneage = train_test_split(boneage_df, test_size=0.2,
                                                      random_state=2018)  # ,stratify=boneage_df['boneage_category'])
print('train', train_df_boneage.shape[0], 'validation', valid_df_boneage.shape[0])

train_gen_boneage = flow_from_dataframe(core_idg, train_df_boneage, path_col='path', y_col=class_str_col,
                                        target_size=IMG_SIZE,
                                        color_mode='rgb', batch_size=BATCH_SIZE_TRAIN)

# used a fixed dataset for evaluating the algorithm
valid_gen_boneage = flow_from_dataframe(core_idg, valid_df_boneage, path_col='path', y_col=class_str_col,
                                        target_size=IMG_SIZE,
                                        color_mode='rgb',
                                        batch_size=BATCH_SIZE_VAL)  # we can use much larger batches for evaluation

12611 images found of 12611 total
train 10088 validation 2523
flow_from_dataframe() -->
## Ignore next message from keras, values are replaced anyways
Found 0 images belonging to 0 classes.
Reinserting dataframe: 10088 images
flow_from_dataframe() <--
flow_from_dataframe() -->
## Ignore next message from keras, values are replaced anyways
Found 0 images belonging to 0 classes.
Reinserting dataframe: 2523 images
flow_from_dataframe() <--


## Defining callback functions and generators

In [6]:
weight_path = "{}_weights.best.hdf5".format('bone_age')

checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1,
                            save_best_only=True, mode='min', save_weights_only=True)

early = EarlyStopping(monitor="val_loss", mode="min",
                      patience=15)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=15, verbose=1,
                                   save_best_only=True, mode='auto', min_delta=0.0001, cooldown=5)

optimizer = Adam(learning_rate = 0.007)


def combined_generators(image_generator, gender_data, batch_size):
    gender_generator = cycle(batch(gender_data, batch_size))
    while True:
        nextImage = next(image_generator)
        nextGender = next(gender_generator)
        nextGender=np.asarray(nextGender).astype(int)
        assert len(nextImage[0]) == len(nextGender)
        yield [nextImage[0], nextGender], nextImage[1]


def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

train_gen_wrapper = combined_generators(train_gen_boneage, train_df_boneage[gender_str_col], BATCH_SIZE_TRAIN)
val_gen_wrapper = combined_generators(valid_gen_boneage, valid_df_boneage[gender_str_col], BATCH_SIZE_VAL)

## Building Model

In [7]:
i1 = Input(shape=(299, 299, 3), name='input_img')
i2 = Input(shape=(1), name='input_gender')
base = InceptionV3(input_tensor=i1, input_shape=(299, 299, 3), include_top=False, weights=None)

feature_img = base.get_layer(name='mixed10').output
feature_img = AveragePooling2D((2, 2))(feature_img)
feature_img = Flatten()(feature_img)
feature_gender = Dense(32, activation='relu')(i2)
feature = concatenate([feature_img, feature_gender], axis=1)

o = Dense(1000, activation='relu')(feature)
o = Dense(1000, activation='relu')(o)
o = Dense(1)(o)
model = Model(inputs=[i1, i2], outputs=o)
model.compile(loss='mean_absolute_error', optimizer=optimizer, metrics=['mae'])


In [None]:
model.summary()

In [9]:
model.load_weights(weight_path)

In [10]:
history = model.fit(train_gen_wrapper, validation_data=val_gen_wrapper,
                              epochs=NUM_EPOCHS, steps_per_epoch=len(train_gen_boneage),
                              validation_steps=len(valid_gen_boneage),
                              callbacks=[early, reduceLROnPlat, checkpoint])

model.save('./model_save_here/saved_model')

Epoch 1/150
Epoch 00001: val_loss improved from inf to 26.82191, saving model to bone_age_weights.best.hdf5
Epoch 2/150
Epoch 00002: val_loss improved from 26.82191 to 21.15364, saving model to bone_age_weights.best.hdf5
Epoch 3/150
Epoch 00003: val_loss improved from 21.15364 to 16.78942, saving model to bone_age_weights.best.hdf5
Epoch 4/150
Epoch 00004: val_loss did not improve from 16.78942
Epoch 5/150
Epoch 00005: val_loss improved from 16.78942 to 16.49123, saving model to bone_age_weights.best.hdf5
Epoch 6/150
Epoch 00006: val_loss did not improve from 16.49123
Epoch 7/150
Epoch 00007: val_loss did not improve from 16.49123
Epoch 8/150
Epoch 00008: val_loss did not improve from 16.49123
Epoch 9/150