In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split

#loading training and testing datasets
df_train = pd.read_csv('../input/rsna-bone-age/boneage-training-dataset.csv')
df_test = pd.read_csv('../input/rsna-bone-age/boneage-test-dataset.csv')

#appending png file extension to id column for both training and testing datasets
df_train['id'] = df_train['id'].apply(lambda x: str(x)+'.png')
df_test['Case ID'] = df_test['Case ID'].apply(lambda x: str(x)+'.png')

#Feature Engineering
df_train['Sex'] = df_train['male'].apply(lambda x: '1' if x else '0')
del(df_train['male'])
df_test['id'] = df_test['Case ID']
del(df_test['Case ID'])

#splitting train datasets into traininng and validation datasets
test_train_df, valid_df = train_test_split(df_train, test_size = 0.27, random_state = 0)
train_df, test_df = train_test_split(test_train_df, test_size = 0.2, random_state = 0)
test_df, orginal_test_df = train_test_split(test_df, test_size = 0.07, random_state = 0)

orginal_test_df.to_csv('original_test_data.csv')

In [2]:
# print(train_df.shape)
# train_df.head()

In [3]:
from tensorflow import keras 
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense,GlobalAveragePooling2D

#packages required for image preprocessing
from tensorflow.keras.preprocessing import image
from tensorflow.keras.metrics import mean_absolute_error
import tensorflow as tf
from keras.applications import ResNet50V2 as pre_model

#image_size = 256
image_size = 224
#pretrain_model = ResNet50(input_shape=(image_size, image_size, 3), include_top=False, weights='imagenet')
pretrain_model = pre_model(input_shape=(image_size, image_size, 3), include_top=False, weights='imagenet')
x = pretrain_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024,activation='relu')(x)
x = Dense(1024,activation='relu')(x)
x = Dense(512,activation='relu')(x)
output_image = Dense(1,activation='linear')(x)

#image_output = keras.Model(input=pretrain_model, output=output_image)
gender_input = keras.Input(shape=(2,),name = 'gender')
gender_concat =layers.concatenate([output_image,gender_input])
output_gen = Dense(1,activation='linear')(gender_concat)
#gender_model = keras.Model(inputs = gender_input,outputs = output_gen)


model = keras.Model(inputs =[pretrain_model.input, gender_input],outputs=[output_gen])


#keras.utils.plot_model(model, "multi_input_and_output_model.png", show_shapes=True)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [4]:
model.compile(optimizer = 'adam', loss = 'mse',
                           metrics = ['mae'])

#model.summary()

In [5]:
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}_mnet_weights.h5".format('ResNet50V2_bone_age')
checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, 
                             save_best_only=True, mode='min', save_weights_only = True)


reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=2, verbose=1, mode='auto', min_delta=0.01, cooldown=3, min_lr=0.01)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=10) # probably needs to be more patient
callbacks_list = [checkpoint, early, reduceLROnPlat]

In [6]:
class CustomDataGen(tf.keras.utils.Sequence):
    
    def __init__(self, df, directory, X_col, y_col,
                 batch_size,
                 input_size=(224, 224, 3),
                 shuffle=True):
        
        self.df = df.copy()
        self.X_col = X_col
        self.y_col = y_col
        self.batch_size = batch_size
        self.input_size = input_size
        self.shuffle = shuffle
        
        self.n = len(self.df)
        self.n_sex = df[X_col['Sex']].nunique()
        #self.n_path = df[X_col['id']]
        self.directory = directory
    
    def __get_input(self, path, target_size):
        img_path = self.directory+path
        #print(img_path)
        image = tf.keras.preprocessing.image.load_img(img_path)
        image_arr = tf.keras.preprocessing.image.img_to_array(image)

        image_arr = tf.image.resize(image_arr,(target_size[0], target_size[1])).numpy()

        return image_arr/255.
    
    def __get_gender(self, label, num_classes):
        return tf.keras.utils.to_categorical(label, num_classes=num_classes)
        
        
    def __get_data(self, batches):
        # Generates data containing batch_size samples

        path_batch = batches[self.X_col['id']]
        sex_batch = batches[self.X_col['Sex']]

        bone_batch = batches[self.y_col['boneage']]

        X0_batch = np.asarray([self.__get_input(x, self.input_size) for x in path_batch])
        X1_batch = np.asarray([self.__get_gender(y, self.n_sex) for y in sex_batch])
        y_batch = np.asarray(bone_batch)

        return tuple([X0_batch,X1_batch]), y_batch
    
    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)
    
    def __getitem__(self, index):

        batches = self.df[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__get_data(batches)        
        return X, y
    
    def __len__(self):
        return self.n // self.batch_size

In [7]:
target_size = (image_size, image_size, 3)
batch_size = 32
traingen = CustomDataGen(train_df,
                         directory="../input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset/",
                         X_col={'id':'id','Sex': 'Sex'},
                         y_col={'boneage': 'boneage'},
                         batch_size=batch_size, input_size=target_size)
validgen = CustomDataGen(valid_df,
                         directory="../input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset/",
                         X_col={'id':'id','Sex': 'Sex'},
                         y_col={'boneage': 'boneage'},
                         batch_size=batch_size, input_size=target_size)

In [8]:
testgen = CustomDataGen(test_df,
                         directory="../input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset/",
                         X_col={'id':'id','Sex': 'Sex'},
                         y_col={'boneage': 'boneage'},
                         batch_size=batch_size, input_size=target_size)

In [9]:
# print(len(test_df))
# X,y = traingen[0]
# print(len(y))

In [10]:
model.load_weights('../input/bone-age-detection-resnet50v2/ResNet50V2_bone_age_mnet_weights.h5')

In [11]:
# history_mobilenet = model.fit_generator(traingen,
#           validation_data=validgen,
#           epochs=100,callbacks = callbacks_list)

In [12]:
#[205.38279724121094, 10.904102325439453][199.9623260498047, 10.696362495422363]

In [13]:
model.evaluate(testgen)



[257.12933349609375, 12.594344139099121]