In [30]:
import keras, os
import numpy as np
from keras.models import Model
from keras.layers import Dense
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16

In [2]:
path_to_train_folder = os.path.join(os.path.pardir, "data", "train_images")
path_to_test_folder = os.path.join(os.path.pardir, "data", "test_images")
path_to_validation_folder = os.path.join(os.path.pardir, "data", "valid_images")

## Use ImageDataGenerator to import all the images to the model. ImageDataGenerator will automatically label the data and map all the labels to its specific data.

In [12]:
trdata = ImageDataGenerator()
traindata = trdata.flow_from_directory(directory=path_to_train_folder,target_size=(224,224))
tsdata = ImageDataGenerator()
testdata = tsdata.flow_from_directory(directory=path_to_test_folder, target_size=(224,224))
valdata = ImageDataGenerator()
validationdata = tsdata.flow_from_directory(directory=path_to_validation_folder, target_size=(224,224))

Found 3200 images belonging to 16 classes.
Found 1600 images belonging to 16 classes.
Found 1600 images belonging to 16 classes.


## Import VGG16 from keras with pre-trained weights which was trained on imagenet

In [5]:
vggmodel = VGG16(weights='imagenet', include_top=True)

In [6]:
vggmodel.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

## Use the first 19 layers of the model as they are, no training needed

In [7]:
for layers in (vggmodel.layers)[:19]:
    print(layers)
    layers.trainable = False

<keras.engine.input_layer.InputLayer object at 0x000001E71829C588>
<keras.layers.convolutional.Conv2D object at 0x000001E718280748>
<keras.layers.convolutional.Conv2D object at 0x000001E7182A4488>
<keras.layers.pooling.MaxPooling2D object at 0x000001E7182A8B88>
<keras.layers.convolutional.Conv2D object at 0x000001E7183B4FC8>
<keras.layers.convolutional.Conv2D object at 0x000001E718639A08>
<keras.layers.pooling.MaxPooling2D object at 0x000001E71828B748>
<keras.layers.convolutional.Conv2D object at 0x000001E718644A88>
<keras.layers.convolutional.Conv2D object at 0x000001E7182B96C8>
<keras.layers.convolutional.Conv2D object at 0x000001E7182B9BC8>
<keras.layers.pooling.MaxPooling2D object at 0x000001E71836FB48>
<keras.layers.convolutional.Conv2D object at 0x000001E71836F7C8>
<keras.layers.convolutional.Conv2D object at 0x000001E71835ED48>
<keras.layers.convolutional.Conv2D object at 0x000001E718340188>
<keras.layers.pooling.MaxPooling2D object at 0x000001E7182F1808>
<keras.layers.convoluti

## The last dense layer of the model should be a 16 unit softmax dense layer - because there are 16 typs of documents

In [8]:
X= vggmodel.layers[-2].output
predictions = Dense(16, activation="softmax")(X)
model_final = Model(input = vggmodel.input, output = predictions)

  This is separate from the ipykernel package so we can avoid doing imports until


In [20]:
model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"]) 

In [21]:
model_final.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0   

In [48]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpoint = ModelCheckpoint("vgg16_1.h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=40, verbose=1, mode='auto')
model_final.fit_generator(generator= traindata, steps_per_epoch= 2, epochs= 10, validation_data = validationdata, validation_steps=1, callbacks=[checkpoint,early])

Epoch 1/10

Epoch 00001: val_accuracy improved from -inf to 0.56250, saving model to vgg16_1.h5
Epoch 2/10

Epoch 00002: val_accuracy improved from 0.56250 to 0.68750, saving model to vgg16_1.h5
Epoch 3/10

Epoch 00003: val_accuracy did not improve from 0.68750
Epoch 4/10

Epoch 00004: val_accuracy did not improve from 0.68750
Epoch 5/10

Epoch 00005: val_accuracy did not improve from 0.68750
Epoch 6/10

Epoch 00006: val_accuracy did not improve from 0.68750
Epoch 7/10

Epoch 00007: val_accuracy did not improve from 0.68750
Epoch 8/10

Epoch 00008: val_accuracy did not improve from 0.68750
Epoch 9/10

Epoch 00009: val_accuracy did not improve from 0.68750
Epoch 10/10

Epoch 00010: val_accuracy did not improve from 0.68750


<keras.callbacks.callbacks.History at 0x1e7a99e1148>