<a href="https://colab.research.google.com/github/AlexandraLakka/Deep-Learning-MURA-Dataset/blob/main/DeepLearning_MURA_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Mount Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Import libraries

In [None]:
import gc
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
%matplotlib inline
import cv2
import os
import pandas as pd
from glob import glob
import pickle
from sklearn.utils import shuffle
from skimage.transform import resize
from tensorflow.keras.callbacks import TensorBoard
import time
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras import backend as K # Importing Keras backend (by default it is Tensorflow)
from tensorflow.keras.callbacks import EarlyStopping # Callback for early stopping
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation, Input, Conv2D, Dense, Dropout, Flatten, MaxPooling2D, MaxPool2D, BatchNormalization, GaussianNoise # Layers to be used for building our model
from tensorflow.keras.models import Sequential, Model # The class used to create a model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.utils import to_categorical
from tensorflow.random import set_seed
from tensorflow.keras.constraints import MinMaxNorm
from tensorflow.keras.applications import DenseNet169, Xception, VGG16

print("Tensorflow version " + tf.__version__)

Read and save train data in dataframes from *train_image_paths.csv*

In [None]:
data_path = '/content/drive/MyDrive/MURA-v1.1'
train_images_csv = 'train_image_paths.csv'

train_images_data = pd.read_csv(os.path.join(data_path, train_images_csv), header = None)
train_images_data.columns = ['image_path']
train_images_data['patient_id'] = train_images_data['image_path'].apply(lambda x : x.split('/')[3].replace('patient', ''))
train_images_data['case'] = train_images_data['image_path'].apply(lambda x : x.split('/')[2])
train_images_data['label'] = train_images_data['image_path'].apply(lambda x : x.split('/')[4].split('_')[1]).replace('negative', 0).replace('positive', 1)

Add to *image_path* column of dataframe the */content/drive/MyDrive/*, in order to be able to correctly read the images from the Drive 

In [None]:
for i in range(0, len(train_images_data)):
  tmp1 ='/content/drive/MyDrive/' + train_images_data['image_path'][i]
  train_images_data['image_path'][i] = train_images_data['image_path'][i].replace(train_images_data['image_path'][i], tmp1)

Split original dataframe, based on the musculoskeletal category

In [None]:
#run for the first time to later create the part CSVs
train_images_elbow = train_images_data.copy()
train_images_finger = train_images_data.copy()
train_images_forearm = train_images_data.copy()
train_images_hand = train_images_data.copy()
train_images_humerus = train_images_data.copy()
train_images_shoulder = train_images_data.copy()
train_images_wrist = train_images_data.copy()

for i in range(0, len(train_images_elbow)):
  if not ('XR_ELBOW') in train_images_elbow['image_path'][i]:
    train_images_elbow.drop(i, inplace=True)

for i in range(0, len(train_images_finger)):
  if not ('XR_FINGER') in train_images_finger['image_path'][i]:
    train_images_finger.drop(i, inplace=True)

for i in range(0, len(train_images_forearm)):
  if not ('XR_FOREARM') in train_images_forearm['image_path'][i]:
    train_images_forearm.drop(i, inplace=True)

for i in range(0, len(train_images_hand)):
  if not ('XR_HAND') in train_images_hand['image_path'][i]:
    train_images_hand.drop(i, inplace=True)

for i in range(0, len(train_images_humerus)):
  if not ('XR_HUMERUS') in train_images_humerus['image_path'][i]:
    train_images_humerus.drop(i, inplace=True)    

for i in range(0, len(train_images_shoulder)):
  if not ('XR_SHOULDER') in train_images_shoulder['image_path'][i]:
    train_images_shoulder.drop(i, inplace=True)   

for i in range(0, len(train_images_wrist)):
  if not ('XR_WRIST') in train_images_wrist['image_path'][i]:
    train_images_wrist.drop(i, inplace=True)       

Save training datasets to Drive

In [None]:
train_images_elbow.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs/XR_ELBOW_csv.csv', index=False)
train_images_finger.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs/XR_FINGER_csv.csv', index=False)
train_images_forearm.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs/XR_FOREARM_csv.csv', index=False)
train_images_hand.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs/XR_HAND_csv.csv', index=False)
train_images_humerus.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs/XR_HUMERUS_csv.csv', index=False)
train_images_shoulder.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs/XR_SHOULDER_csv.csv', index=False)
train_images_wrist.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs/XR_WRIST_csv.csv', index=False)

Read and save validation data in dataframes from *valid_image_paths.csv*

In [None]:
val_images_csv = 'valid_image_paths.csv'
val_images_data = pd.read_csv(os.path.join(data_path, val_images_csv), header = None)
val_images_data.columns = ['image_path']
val_images_data['patient_id'] = val_images_data['image_path'].apply(lambda x : x.split('/')[3].replace('patient', ''))
val_images_data['case'] = val_images_data['image_path'].apply(lambda x : x.split('/')[2])
val_images_data['label'] = val_images_data['image_path'].apply(lambda x : x.split('/')[4].split('_')[1]).replace('negative', 0).replace('positive', 1)

Add to *image_path* column of dataframe the */content/drive/MyDrive/*, in order to be able to correctly read the images from the Drive 

In [None]:
for i in range(0, len(val_images_data)):
  tmp ='/content/drive/MyDrive/' + val_images_data['image_path'][i]
  val_images_data['image_path'][i] = val_images_data['image_path'][i].replace(val_images_data['image_path'][i], tmp)

Split original dataframe, based on the musculoskeletal category

In [None]:
#run for the first time to later create the part CSVs
val_images_elbow = val_images_data.copy()
val_images_finger = val_images_data.copy()
val_images_forearm = val_images_data.copy()
val_images_hand = val_images_data.copy()
val_images_humerus = val_images_data.copy()
val_images_shoulder = val_images_data.copy()
val_images_wrist = val_images_data.copy()

for i in range(0, len(val_images_elbow)):
  if not ('XR_ELBOW') in val_images_elbow['image_path'][i]:
    val_images_elbow.drop(i, inplace=True)

for i in range(0, len(val_images_finger)):
  if not ('XR_FINGER') in val_images_finger['image_path'][i]:
    val_images_finger.drop(i, inplace=True)

for i in range(0, len(val_images_forearm)):
  if not ('XR_FOREARM') in val_images_forearm['image_path'][i]:
    val_images_forearm.drop(i, inplace=True)

for i in range(0, len(val_images_hand)):
  if not ('XR_HAND') in val_images_hand['image_path'][i]:
    val_images_hand.drop(i, inplace=True)

for i in range(0, len(val_images_humerus)):
  if not ('XR_HUMERUS') in val_images_humerus['image_path'][i]:
    val_images_humerus.drop(i, inplace=True)    

for i in range(0, len(val_images_shoulder)):
  if not ('XR_SHOULDER') in val_images_shoulder['image_path'][i]:
    val_images_shoulder.drop(i, inplace=True)   

for i in range(0, len(val_images_wrist)):
  if not ('XR_WRIST') in val_images_wrist['image_path'][i]:
    val_images_wrist.drop(i, inplace=True)       

Save training datasets to Drive

In [None]:
val_images_elbow.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/validation_csvs/XR_ELBOW_csv.csv', index=False)
val_images_finger.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/validation_csvs/XR_FINGER_csv.csv', index=False)
val_images_forearm.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/validation_csvs/XR_FOREARM_csv.csv', index=False)
val_images_hand.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/validation_csvs/XR_HAND_csv.csv', index=False)
val_images_humerus.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/validation_csvs/XR_HUMERUS_csv.csv', index=False)
val_images_shoulder.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/validation_csvs/XR_SHOULDER_csv.csv', index=False)
val_images_wrist.to_csv('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/validation_csvs/XR_WRIST_csv.csv', index=False)

Load train and validation dataframes from Drive

In [None]:
#read CSVs
train_images_elbow = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs', 'XR_ELBOW_csv.csv'))
train_images_finger = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs', 'XR_FINGER_csv.csv'))
train_images_forearm= pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs', 'XR_FOREARM_csv.csv'))
train_images_hand = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs/test_csvs', 'XR_HAND_csv.csv'))
train_images_humerus = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs/test_csvs', 'XR_HUMERUS_csv.csv'))
train_images_shoulder = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs', 'XR_SHOULDER_csv.csv'))
train_images_wrist = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/test_csvs', 'XR_WRIST_csv.csv'))

val_images_elbow = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/validation_csvs', 'XR_ELBOW_csv.csv'))
val_images_finger = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/validation_csvs', 'XR_FINGER_csv.csv'))
val_images_forearm= pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/validation_csvs', 'XR_FOREARM_csv.csv'))
val_images_hand = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs/validation_csvs', 'XR_HAND_csv.csv'))
val_images_humerus = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs/validation_csvs', 'XR_HUMERUS_csv.csv'))
val_images_shoulder = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs/validation_csvs', 'XR_SHOULDER_csv.csv'))
val_images_wrist = pd.read_csv(os.path.join('/content/drive/MyDrive/Colab Notebooks/Test_Project/CSVs_binary/validation_csvs', 'XR_WRIST_csv.csv'))

Shuffle dataframes so that the labels are in random order and the model cannot learn from that(this code was executed for all the different dataframes created)

In [None]:
train_images_humerus = shuffle(train_images_humerus)
train_images_humerus = train_images_humerus.reset_index(drop=True)

val_images_humerus = shuffle(val_images_humerus)
val_images_humerus = val_images_humerus.reset_index(drop=True)

Create X_train from *image_path* in train datasets. All images were resized to have size 320x320 and turn images to grayscale(an attempt was made to store images with 3 channels, however that failed due to RAM constraints)
[The process was repeated for all dataset(each time changed the name of the dataframe variable)]

In [None]:
IMG_SIZE = 320
X_train = np.empty((len(train_images_humerus), IMG_SIZE, IMG_SIZE, 1), dtype=np.float32)
for i in range(0,len(train_images_humerus)):
  image = cv2.imread(train_images_humerus['image_path'][i])
  #print(train_images_hand['image_path'][i])
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  image = resize(image, output_shape=(IMG_SIZE, IMG_SIZE, 1), preserve_range=True)
  X_train[i] = image

Create Y_train from *label* in train datasets

In [None]:
Y_train = np.empty(len(train_images_humerus), dtype=np.float32)
for i in range(0,len(train_images_humerus)):
  Y_train[i] = train_images_humerus['label'][i]

Store X_train, Y_train as pickles in Drive, in order to avoid re-transforming the images

In [None]:
pickle_out = open("/content/drive/MyDrive/Colab Notebooks/Test_Project/pickle/XR_HUMERUS/X_train_shuffled_rgb.pickle", "wb") 
pickle.dump(X_train, pickle_out)
pickle_out.close()

pickle_out = open("/content/drive/MyDrive/Colab Notebooks/Test_Project/pickle/XR_HUMERUS/Y_train_shuffled_rgb.pickle", "wb")
pickle.dump(Y_train, pickle_out)
pickle_out.close()

Create X_val from *image_path* in validation datasets. All images were resized to have size 320x320 and turn images to grayscale(an attempt was made to store images with 3 channels, however that failed due to RAM constraints)[The process was repeated for all dataset(each time changed the name of the dataframe variable)]

In [None]:
X_val = np.empty((len(val_images_humerus), IMG_SIZE, IMG_SIZE, 1), dtype=np.float32)
for i in range(0,len(val_images_humerus)):
  image = cv2.imread(val_images_humerus['image_path'][i])
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  image = resize(image, output_shape=(IMG_SIZE, IMG_SIZE, 1), preserve_range=True)
  X_val[i] = image

Create Y_val from *label* in validation datasets

In [None]:
Y_val = np.empty(len(val_images_humerus), dtype=np.float32)
for i in range(0,len(val_images_humerus)):
  Y_val[i] = val_images_humerus['label'][i]

Store X_val, Y_val as pickles in Drive, in order to avoid re-transforming the images

In [None]:
pickle_out = open("/content/drive/MyDrive/Colab Notebooks/Test_Project/pickle/XR_HUMERUS/X_val_shuffled_rgb.pickle", "wb") 
pickle.dump(X_val, pickle_out)
pickle_out.close()

pickle_out = open("/content/drive/MyDrive/Colab Notebooks/Test_Project/pickle/XR_HUMERUS/Y_val_shuffled_rgb.pickle", "wb")
pickle.dump(Y_val, pickle_out)
pickle_out.close()

Load X_train, Y_train, X_val, Y_val from Drive

In [None]:
X_train = pickle.load(open('/content/drive/MyDrive/Colab Notebooks/Test_Project/pickle/XR_HUMERUS/X_train_shuffled.pickle', 'rb'))
Y_train = pickle.load(open('/content/drive/MyDrive/Colab Notebooks/Test_Project/pickle/XR_HUMERUS/Y_train_shuffled.pickle', 'rb'))

X_val = pickle.load(open('/content/drive/MyDrive/Colab Notebooks/Test_Project/pickle/XR_HUMERUS/X_val_shuffled.pickle', 'rb'))
Y_val = pickle.load(open('/content/drive/MyDrive/Colab Notebooks/Test_Project/pickle/XR_HUMERUS/Y_val_shuffled.pickle', 'rb'))

#turn to int, as the values are stored as flow
Y_train = Y_train.astype(int)
Y_val = Y_val.astype(int)

Normalize images

In [None]:
X_train /= 255
X_val /= 255

Set batch size and epochs

In [None]:
batch_size = 32
epochs = 30

VGG16(For the pretrained models, 3-channel images had to be used, however the model could only be tested for HUMERUS, since the process of resizing the images could not be performed on the majority of the categories - rejected)

In [None]:
base_model = VGG16(
    weights='imagenet',  # Load weights pre-trained on ImageNet.
    input_shape=(320, 320, 3),
    include_top=False)  # Do not include the ImageNet classifier at the top.

base_model.trainable = False # Freeze training of pre-trained model

model = Sequential()

model.add(base_model)
model.add(Flatten(input_shape=base_model.output_shape[1:]))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(Adam(learning_rate=0.0001),loss='binary_crossentropy',metrics=['accuracy'])

DenseNet169(For the pretrained models, 3-channel images had to be used, however the model could only be tested for HUMERUS, since the process of resizing the images could not be performed on the majority of the categories - rejected)

In [None]:
base_model = DenseNet169(
    weights='imagenet',  # Load weights pre-trained on ImageNet.
    input_shape=(320, 320, 3),
    include_top=False)  # Do not include the ImageNet classifier at the top.

base_model.trainable = False # Freeze training of pre-trained model

model = Sequential()

model.add(base_model)
model.add(Flatten(input_shape=base_model.output_shape[1:]))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(Adam(learning_rate=0.0001),loss='binary_crossentropy',metrics=['accuracy'])

Model made from scratch - Final version(accepted solution)

In [None]:
# initializer = tf.keras.initializers.GlorotUniform() -> Use Glorot Uniform to initialize weights : rejected
#initializer = tf.keras.initializers.GlorotNormal() -> Use Glorot Normal to initialize weights : rejected
model = Sequential()

model.add(Input(shape=X_train.shape[1:]))

model.add(Conv2D(16, (7, 7)))
model.add(Activation('relu'))
model.add(BatchNormalization(momentum=0.99))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (5, 5), kernel_constraint=MinMaxNorm(min_value=0.0, max_value=1.0, rate=1.0, axis=0)))
model.add(Activation('relu'))
model.add(BatchNormalization(momentum=0.99))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(64, (5, 5), kernel_constraint=MinMaxNorm(min_value=0.0, max_value=1.0, rate=1.0, axis=0)))
model.add(Activation('relu'))
model.add(BatchNormalization(momentum=0.99))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), kernel_constraint=MinMaxNorm(min_value=0.0, max_value=1.0, rate=1.0, axis=0)))
model.add(Activation('relu'))
model.add(BatchNormalization(momentum=0.99))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(128, (3, 3), kernel_constraint=MinMaxNorm(min_value=0.0, max_value=1.0, rate=1.0, axis=0)))
model.add(Activation('relu'))
model.add(BatchNormalization(momentum=0.99))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), kernel_constraint=MinMaxNorm(min_value=0.0, max_value=1.0, rate=1.0, axis=0)))
model.add(Activation('relu'))
model.add(BatchNormalization(momentum=0.99))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(GaussianNoise(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(Adam(learning_rate=0.0001),loss='binary_crossentropy',metrics=['accuracy'])    

In [None]:
model.summary()

Create log of training results to check anytime on TensorBoard 

In [None]:
NAME = "MURA-XR-SHOULDER-attempt-1-Adam-0.0001-Noise-0.2-1x16-1x32-2x64-2x128-EarlyStopping(val_loss)-{}".format(int(time.time()))
tensorboard = TensorBoard(log_dir='/content/drive/MyDrive/Colab Notebooks/logs/{}'.format(NAME))

Use Early Stopping for validation loss with patience=10

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

In [None]:
model.fit(x=X_train,
          y=Y_train,
                validation_data=(X_val, Y_val),
                batch_size=batch_size,
                epochs=30, callbacks=[tensorboard, cp_callback]
        )

'''
# In the case of using generators to read and tranform data
model1.fit(train_generator,
                workers=8,
                validation_data=val_generator,
                epochs=10, callbacks=[tensorboard]
        )
'''

Load TensorBoard to see results

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir='/content/drive/MyDrive/Colab Notebooks/logs'

Make prediction

In [None]:
case_study = '/content/drive/MyDrive/MURA-v1.1/valid/XR_HUMERUS/patient11186/study1_positive/'
normal = 0
abnormal = 0
IMG_SIZE = 320

for image in os.listdir(case_study):
  image = case_study + image
  #print(image)
  image = cv2.imread(image)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
  #print(image.shape)
  image = image.reshape(-1, IMG_SIZE, IMG_SIZE, 1)
  #print('Image shape: ', image.shape)
  prediction = model.predict(image)
  print (prediction)
  if (int(prediction[0][0]) > 0.5):
    abnormal += 1
    print('Abnormal:' + str(abnormal))
  else:
    normal += 1
    print('Normal:' + str(normal))

if normal > abnormal:
  print('This case study is normal!')
elif abnormal > normal:
  print('This case study is abnormal!')
else: # if counters are equal
  print('Reexamination is required!')

Attempts with ImageDataGenerator

*flow_from_dataframe()* was initially used, to avoid creating and storing pickle files. Different dataframes were created, since the *y_col* required a string(instead of *1* or *0*,  *normal* or *abnormal* were used)[Rejected, beacause it was slow in *fit()* for the first epoch]

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True, brightness_range=[0.2,1.0])
train_generator = train_datagen.flow_from_dataframe(train_images_hand, x_col='image_path',
                                       y_col='label',target_size = (320, 320), 
                                       color_mode='rgb',
                                       batch_size = batch_size, 
                                       class_mode = 'binary', 
                                       shuffle = True, validate_filenames=False)

val_datagen = ImageDataGenerator(rescale=1./255)
val_generator = val_datagen.flow_from_dataframe(val_images_hand, x_col='image_path',
                                       y_col='label',target_size = (320, 320), 
                                       color_mode='rgb',
                                       batch_size = batch_size, 
                                       class_mode = 'binary', 
                                       shuffle = True, validate_filenames=False)

Use instead *flow()*, with X_train, Y_train, X_val, Y_val

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True, brightness_range=[0.2,1.0])
#train_datagen = ImageDataGenerator(horizontal_flip=True, brightness_range=[0.2,1.0])

train_generator = train_datagen.flow(X_train, y=Y_train, 
                                       batch_size = batch_size,  
                                       shuffle = True)

val_datagen = ImageDataGenerator(rescale=1./255)
val_generator = val_datagen.flow(
            X_val,
            y=Y_val,
            batch_size=batch_size,
            shuffle=False)

Try to use DataGenerator and normalize the data according to mean and standard deviation

In [None]:
datagen = ImageDataGenerator(
        featurewise_center=True,
        featurewise_std_normalization= True)

train_generator = datagen.flow(X_train, y=Y_train, 
                                       batch_size = batch_size,  
                                       shuffle = True)

sample_size=100
raw_train_generator = ImageDataGenerator().flow(
        X_train,Y_train, 
        batch_size=sample_size, 
        shuffle=False)

# get data sample
batch = raw_train_generator.next()
data_sample = batch[0]

image_generator = ImageDataGenerator(
        featurewise_center=True,
        featurewise_std_normalization= True)

# fit generator to sample from training data
image_generator.fit(data_sample)

val_generator = image_generator.flow(
            X_val,
            y=Y_val,
            batch_size=batch_size,
            shuffle=False)