<h1>Creating a CNN to Find Forged Documents with very little data</h1>

<h2 style="color:red"> Training Phase </h2>

<h4>Import Statements</h4>

In [9]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K

<h4> Having a common image size for all documents </h4>

In [10]:
img_width, img_height = 150, 150

<h4> Splitting the Dataset into Training and Testing Dataset</h4>

In [18]:
train_data_dir = '/home/sreeram_0xb5e/Desktop/Classy/data/train'
validation_data_dir = '/home/sreeram_0xb5e/Desktop/Classy/data/validation'
nb_train_samples = 151
nb_validation_samples = 20
epochs = 50
batch_size = 16

In [19]:
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

<h4> Designing and Training a CNN (Convolutional Neural Network) with five layers </h4>

In [20]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('tanh'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (2, 2)))
model.add(Activation('tanh'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('tanh'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (2, 2)))
model.add(Activation('tanh'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('tanh'))
model.add(MaxPooling2D(pool_size=(2, 2)))

<h4>Generalising the Input images by performing basic operations on them</h4>

In [21]:
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('tanh'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

<h4>Compiling the Neural network with a Keras optimiser</h4>

In [22]:
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

<h4>Choosing a Augmentation configuration for Training and Testing datasets</h4>

In [23]:
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1. / 255)


<h4>Choosing the Parameters and training the CNN </h4>

In [24]:
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

Found 705 images belonging to 2 classes.
Found 39 images belonging to 2 classes.


In [25]:
model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f99a57570d0>

<h4> Saving the Model to Classify the Forged Documnents </h4>

In [None]:
model.save('large_savepoint.h5')

<h2 style="color:red"> Testing Phase </h2>

<h4>Import Statements</h4>

In [28]:
from keras.models import load_model
import cv2
import numpy as np
from keras import backend as K

<h4>Setting parameters for the input images</h4>

In [29]:
img_width =  150
img_height = 150
if K.image_data_format() == 'channels_first':
    input_shape = (1,3, img_width, img_height)
else:
    input_shape = (1,img_width, img_height, 3)


<h4>Pre-Processing input images</h4>

In [45]:
#processing input image

file=cv2.imread('/home/sreeram_0xb5e/Desktop/Classy/data/validation/real/10.jpg')
file=cv2.resize(file,(150,150))
file=np.array(file).reshape(input_shape)

<h4>Pre-Processing input images</h4>

In [46]:
#Predicting the input image
model = load_model('savepoint.h5')
yFit = model.predict(file, batch_size = 1)
print(yFit)

[[ 0.80295151]]
