# For days 2-3: transfer learning using VGG16, model training

In [1]:
import os;
os.environ['KMP_DUPLICATE_LIB_OK']='True';

In [2]:
import tensorflow as tf;
import numpy as np;
import pandas as pd;
import matplotlib.pyplot as plt;

In [4]:
# load data
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    width_shift_range=0.05,
    height_shift_range=0.15,
    rotation_range=10,
    zoom_range=0.1,
)
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/Users/chensuyun/Dropbox/chest-x-ray/data/train',
    target_size=(224,224),
    batch_size=100,
    class_mode="binary",
    #color_mode="grayscale",
    shuffle=True,
)
 
validation_generator = validation_datagen.flow_from_directory(
    '/Users/chensuyun/Dropbox/chest-x-ray/data/val',
    target_size=(224,224),
    class_mode="binary",
    #color_mode="grayscale",
    shuffle=False,
    batch_size=1,
)
test_generator = test_datagen.flow_from_directory(
    '/Users/chensuyun/Dropbox/chest-x-ray/data/test',
    target_size=(224,224),
    class_mode="binary",
    #color_mode="grayscale",
    shuffle=False,
    batch_size=1,
)
# num classes
num_classes = 2;

Found 5216 images belonging to 2 classes.
Found 16 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [5]:
# Directory Number
i = 1;
while(1):
    if os.path.isdir('./{}' . format(i)) == False:
        break;
    else:
        i = i + 1;
savedir = './{}' . format(i);
os.mkdir(savedir);

In [6]:
model = tf.keras.Sequential()
pmodel=tf.keras.applications.vgg16.VGG16(weights='imagenet',include_top=False,input_tensor=tf.keras.layers.Input(shape=(224,224,3)));
pmodel.trainable = False;
model.add(pmodel);
model.add(tf.keras.layers.Flatten());
model.add(tf.keras.layers.Dense(256));
model.add(tf.keras.layers.Dropout(0.5));
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=tf.keras.optimizers.RMSprop(), metrics=[tf.keras.metrics.binary_crossentropy,'accuracy'])

# fit
history = model.fit(train_generator, epochs=5, validation_data=test_generator)
score = model.evaluate(test_generator, verbose=0)
print('Test loss:', score[0]);
print('Test accuracy:', score[2]);
model.save_weights(savedir + '/dense-layer_weights.h5');

Train for 53 steps, validate for 624 steps
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 1.121593779681344
Test accuracy: 0.88301283


In [7]:
score # 0: total loss = cross_entropy + regularization, 1: cross-entropy, 2: accuracy

[1.121593779681344, 1.121593, 0.88301283]

In [8]:
# save history
history_json = pd.DataFrame(history.history);
with open(savedir + '/history.json', 'w') as f:
    history_json.to_json(f);

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.savefig(savedir + '/accuracy.png');
plt.clf();

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.savefig(savedir + '/loss.png');
plt.clf();

# Plot training & validation loss without regularization term
plt.plot(history.history['binary_crossentropy'])
plt.plot(history.history['val_binary_crossentropy'])
plt.title('Model loss without regularization term')
plt.ylabel('Loss without Regularization Term')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.savefig(savedir + '/loss-without-regularization.png');
plt.clf();

# Plot reconstruction error
plt.plot(np.array(history.history['loss'])-np.array(history.history['binary_crossentropy']))
plt.plot(np.array(history.history['val_loss'])-np.array(history.history['val_binary_crossentropy']))
plt.title('Penalty based on reconstruction error')
plt.ylabel('Penality')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.savefig(savedir + '/regularization.png');
plt.clf();


<Figure size 432x288 with 0 Axes>

In [9]:
model.summary();

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense (Dense)                (None, 256)               6422784   
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 257       
Total params: 21,137,729
Trainable params: 6,423,041
Non-trainable params: 14,714,688
_________________________________________________________________
