In [0]:
def preprocess_data():
  import numpy as np # linear algebra
  import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
  import matplotlib.pyplot as plt # basic plotting
  import seaborn as sns 


  xray_data = pd.read_csv('./Data_Entry_2017.csv')

  # see how many observations there are
  num_obs = len(xray_data)
  print('Number of observations:',num_obs)

  # examine the raw data before performing pre-processing
  xray_data.head(5) # view first 5 rows
  #xray_data.sample(5) # view 5 randomly sampled rows
  
  
  
  from glob import glob
  #import os # already imported earlier

  my_glob = glob('./input_images/images*/*.png')
  print('Number of Observations: ', len(my_glob)) # check to make sure I've captured every pathway, should equal 112,120
  
  
  
  full_img_paths = {os.path.basename(x): x for x in my_glob}
  xray_data['full_path'] = xray_data['Image Index'].map(full_img_paths.get)
  
  #---for removing null path
  df = xray_data['full_path'].dropna()
  new_xray_data = pd.merge(xray_data, df, how='right')
  
  #----for making seperate dataframe of specific diseases
  df1 = new_xray_data[new_xray_data['Finding Labels']=='Atelectasis']
  df2 = new_xray_data[new_xray_data['Finding Labels']=='Infiltration']
  df3 = new_xray_data[new_xray_data['Finding Labels']=='Effusion']
  
  n = int(df2.shape/5)  #-- sampling the 'No Finding' labels
  df4 = new_xray_data[new_xray_data['Finding Labels']=='No Finding'].iloc[:n, :] # taking small sample beacuse to avoid baised data
  
  cleaned_df = pd.concat([df1,df2,df3,df4], ignore_index=True)
  
  #--for shuffling data
  from sklearn.utils import shuffle
  cleaned_df = shuffle(cleaned_df)
  
  
  dummy_labels = ['No Finding', 'Atelectasis', 'Infiltration','Effusion'] # taken from paper

  # One Hot Encoding of Finding Labels to dummy_labels
  for label in dummy_labels:
      cleaned_df[label] = cleaned_df['Finding Labels'].map(lambda result: 1.0 if label in result else 0)
  cleaned_df.head() # check the data, looking good!
  
  
  cleaned_df['target_vector'] = cleaned_df.apply(lambda target: [target[dummy_labels].values], 1).map(lambda target: target[0])
  
  
  
    # split the data into a training and testing set
  from sklearn.model_selection import train_test_split
  train_set, test_set = train_test_split(cleaned_df, test_size = 0.2, random_state = 1993)

  # quick check to see that the training and test set were split properly
  print('training set - # of observations: ', len(train_set))
  print('test set - # of observations): ', len(test_set))
  print('prior, full data set - # of observations): ', len(cleaned_df))
  
  
  
  #--Preparing data generator for feeding data into model--
  from keras.preprocessing.image import ImageDataGenerator
  data_gen = ImageDataGenerator(
          rescale=1./255,
          shear_range=0.2,
          zoom_range=0.2,
          rotation_range=20,
          width_shift_range=0.2,
          height_shift_range=0.2,
          horizontal_flip=True)


  train_gen = data_gen.flow_from_dataframe(train_set, directory=None, x_col='full_path', y_col='Finding Labels', target_size=(224, 224), color_mode='rgb', classes=None, class_mode='categorical', batch_size=128, shuffle=True, seed=None, save_to_dir=None, save_prefix='', save_format='png', subset=None, interpolation='nearest', drop_duplicates=True)
  test_X, test_Y = next(data_gen.flow_from_dataframe(test_set, directory=None, x_col='full_path', y_col='Finding Labels', target_size=(224, 224), color_mode='rgb', classes=None, class_mode='categorical', batch_size=128, shuffle=True, seed=None, save_to_dir=None, save_prefix='', save_format='png', subset=None, interpolation='nearest', drop_duplicates=True))

  
  
  return(train_gen, test_X, test_Y)

In [0]:
def model():

  from keras.applications.vgg16 import VGG16
  from keras.layers import Dense, Input, Conv2D, BatchNormalization, Flatten, Dropout
  from keras.models import Model
  
  model = VGG16(include_top=False, input_shape=(224,224,3))
  
  x = Flatten()(model.output)

  
  x = Dense(units=1024, activation='relu', kernel_initializer='he_normal')(x)
  x = BatchNormalization()(x)

  x = Dense(units=1024, activation='relu', kernel_initializer='he_normal')(x)
  x = BatchNormalization()(x)
  x = Dropout(0.2)(x)

  x = Dense(units=4,activation='softmax')(x)
  
  custom_model = Model(input=model.input, output=x)
  
  return(custom_model)
  
  

In [0]:
def training():
  
  train_gen, test_X, test_Y = preprocess_data()
  
  custom_model = model()
  
  
  for layer in custom_model.layers[:11]:
    layer.trainable = False
    
    
  custom_model.compile(loss='categorical_crossentropy',
                     optimizer='adam',
                     metrics=['accuracy'])
  
  #---making checkpoints for storing best weights during training
  from keras.callbacks import ModelCheckpoint

  checkpointer = ModelCheckpoint(filepath='weights.best.{epoch:02d}-{val_loss:.2f}.hdf5', verbose=1, save_best_only = True)
  callbacks_list = [checkpointer]
  
  custom_model.fit_generator(generator = train_gen, steps_per_epoch = 20, epochs = 10, validation_data = (test_X, test_Y))
  
  return(custom_model)


In [0]:
trained_model = training()

**Saving Model Weigths and Architecture** 

In [0]:
custom_model.save_weights('xray.h5')

model_json = custom_model.to_json()

with open('xray_network_arch.json', 'w') as file:
  file.write(model_json)

**Loading Model**

In [0]:
from keras.models import model_from_json

with open('xray_network_arch.json', 'r') as json_file:
  json_model = json_file.read()
  

trained_model = model_from_json(json_model)
trained_model.load_weights('./drive/My Drive/model/xray_2.h5')

# **Prediction**

In [0]:
def prediction(img_path, trained_model):
  from keras.preprocessing import image
  
  img = image.load_img(path=img_path, target_size=(224,224,3))
  img_arr = image.img_to_array(img)
  img_arr.resize(1,224,224,3)
  
  prediction = moodel.predict(img_arr)[0]
  
  return(prediction)

In [0]:
img_path = './input_images/images_3/00005274_007.png'
prediction_value = prediction(img_path, trained_model)



In [0]:
output = np.argmax(prediction_value)

if output == 0:
  print('No Finding')
  
elif output == 1:
  print('Atelectasis')
  
elif output == 2:
  print('Infiltration')
  
elif ouput == 3:
  print('Effusion')