# **'Fraudify.py' - A Convolutional Neural Network based signature verification program**




---


## *With references to the work of multiple authors of which helped in implementation and/or understanding;*


*  Real-Forge-Signature-Detection - GitHub (akhiilkasare, 2020)


*   Image Classification Using CNN -Understanding Computer Vision - (Analytics Vidhya 2021)



*   CEDAR Dataset 
*   CNN for Human Activity Recognition - GitHub (aqibsaeed, 2019)


*     IUST Pattern Recognition Projects - GitHub (Parsa Abbasi 2021)

*   Handwritten Signature - Classification - GitHub (Yasha, G., 2021)


*  S3: pytorch Speaker verification - test model - GitHub (VERMA , G, K., 2021)
 
---











---
## *Phase 1: CNN Fraudify.py application*  

Creating a CNN based Static Signature Verification application to comparatively assess and analysis its accuracy and optimisation, im comparison to Phase 2 SCNN-based Fraudify application.


In [1]:
#Importing all required libraries for the System


import pandas as pd
import numpy as np
import skimage.io as sk
from skimage import img_as_ubyte
from skimage.io import imread
from scipy import spatial
from tensorflow.keras.layers import Dense, Flatten, Input, Lambda, MaxPooling2D, Conv2D, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
from keras.models import Sequential
from PIL import Image
import cv2
import matplotlib.pyplot as plt

import tensorflow as tf
import datetime, os

**Checking if GPU is used**

In [2]:
#to check if GPU is being used

import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


**Mounting google drive**

In [3]:
#Mounting my drive to google colab for access within the drive
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [4]:
#defining the paths for the training and testing data with loaded directories. 

train_path = '/content/gdrive/MyDrive/FYP/Train/CEDAR_1'
test_path = '/content/gdrive/MyDrive/FYP/Test/CEDAR_1'

In [5]:
# As part of the pre-processing of the data, is creating variables for the width and height dimensions
# to make all the data instances of the same. 
# Creating an image_channel variable with the number 3 as the data instances are RGB.

Image_Width = 512
Image_Height = 512
Image_Size = (Image_Width, Image_Height)
Image_Channel = 3
batch_size=64

**Creating the CNN Model**

In [6]:
#CNN model including multiple convolutional layers, pooling layers and fully connected layer.
#2 Dense layers based of Yoshia Bengio recommendation. 
#There is dropout to optimise and reduce chances of overfitting.
#Dropout works by randomly setting the outgoing edges of hidden units (neurons that make up hidden layers) to 0 at each update of the training phase.


#def createCNN(input_shape):

model = Sequential()

##input layer
## Conv layer 1
model.add(Conv2D(32, (3,3), activation='relu', input_shape=(Image_Width,Image_Height, Image_Channel)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
#model.add(Dropout(0.25))

## Conv layer 2
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
#model.add(Dropout(0.25))

## Conv layer 3
model.add(Conv2D(128, (3,3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
#model.add(Dropout(0.25))

## Conv layer 4
model.add(Conv2D(256, (3,3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
#model.add(Dropout(0.25))

## Conv layer 5
#model.add(Conv2D(256, (3,3), activation='relu'))
#model.add(BatchNormalization())
#model.add(MaxPooling2D(pool_size=(2,2)))
#model.add(Dropout(0.25))

## Conv layer 6
model.add(Conv2D(512, (3,3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
#model.add(Dropout(0.25))


#Fully connected layer
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

**Establishing Hyper-parameters**

In [8]:
#Early stopping is a callback hyperparameter that will stop the training when a the validation loss has stopped improving.
#Reduce LR is another keras callback, which allows for the reducing of the Learning Rate where Validation Accracy has no imporvements after 5 epochs. 
#TensorBoard is a callback that will allow for further diagnostic charts and graphs to be displayed for analysis.

from keras.callbacks import EarlyStopping, ReduceLROnPlateau

early_stop = EarlyStopping(monitor= 'val_loss', patience=10, verbose=0, mode = 'auto')
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=5, verbose=1, factor=0.2, min_lr=0.00025)

#logdir = os.path.join("logs" , datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
#tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

callbacks = [early_stop, learning_rate_reduction, #tensorboard_callback
             ]

**Training data generating - Preprocessing and Data Augmenting**

In [9]:
# Scaling all the images between 0 to 1 and applying Data Augmentation.

train_datagen = ImageDataGenerator(rotation_range=15,
                                  rescale=1./255,
                                  shear_range=0.1,
                                  zoom_range=0.2,
                                  horizontal_flip=True,
                                  width_shift_range=0.1,
                                  height_shift_range=0.1,)

In [10]:
#For generating the training, the directory called is the CEDAR Training which has 1269 images across Genuines and Forgeries. (k/fold ration 8:2)
#to optimise the batch size, 64 was found to be the most appropriate, increasing the validation accuracies. 
#Batch size controls the accuracy of the estimate of the error gradient when training neural networks.

train_generator = train_datagen.flow_from_directory('/content/gdrive/MyDrive/FYP/Train/CEDAR_1',
                                              target_size=Image_Size,
                                              batch_size=64,
                                              class_mode = 'categorical')

Found 1269 images belonging to 2 classes.


**Testing data generating - Preprocessing (No Data Augmentation)**

In [11]:
# Performing only scaling on the test dataset and not Data Augmentation

test_datagen = ImageDataGenerator(rescale=1./255)

In [12]:
#For generating the testing, the directory called is the CEDAR Testing which has 306 images across Genuines and Forgeries. (k/fold ration 8:2)
#to optimise the batch size, 64 was found to be the most appropriate, increasing the validation accuracies. 
#Batch size controls the accuracy of the estimate of the error gradient when training neural networks

test_generator = test_datagen.flow_from_directory('/content/gdrive/MyDrive/FYP/Test/CEDAR_1',
                                                  target_size=Image_Size,
                                                  batch_size = 64,
                                                  class_mode='categorical')

Found 306 images belonging to 2 classes.


**Training data against model**

In [None]:
#With hyperparameter tuning of the epochs, 10 were found to be an optimised number for achieving a consistently growing validation accuracy.
#Ensuring that the callbacks are called in the model.fit as well as a number of steps per epochs.

epochs = 10

history = model.fit(train_generator,
                             epochs=epochs,
                             validation_data=test_generator,
                             validation_steps=len(test_generator),
                             steps_per_epoch=len(train_generator),
                             callbacks=callbacks)

In [14]:
#Loading the Tensorboard magic tool, for more graphs and histograms on each section within the training.

#%load_ext tensorboard
#%tensorboard --logdir logs

In [15]:
#Loading the trained model and proceeding to save it as 'TrainedSignature_CNNmodel' in h5 format.

from tensorflow.keras.models import load_model

model.save('TrainedSignature_model.h5')

In [40]:
#loading the model 

model = load_model('TrainedSignature_CNNmodel.h5')

In [None]:
#model prediction for the test directories. 
pred = model.predict(test_generator)
pred

In [None]:
#Multi-class classification is set. (selecting the class with the highest probability)
#numpy Argmax returns the indices of the full values across the axis of the array (in this instance 'pred')
#With the axis being set at 1, numpy looks through all the rows within the prediction variable
#indexing of the prediction within the argmax results.

import numpy as np

pred = np.argmax(pred, axis=1)

#printing the predictions, we can see that numpy has looked through all rows of the array, with the prediction labels set 
#for each of the data instances within the 

pred

# **MAKE PREDICTIONS**

In [39]:
#Importing image module from keras to load the data for testing.
#Assigning the path file of a signature to test and predict - making sure that the size of the image is targeted at 512x512 which is the same as the training and testing data.


from tensorflow.keras.preprocessing import image

img = image.load_img('/content/gdrive/MyDrive/FYP/Test/CEDAR_1/forged/forgeries_10_13.png', target_size=(512,512))



In [None]:
#Converting the PIL image loaded into a numpy array. 

x = image.img_to_array(img)
x

In [None]:
#printing the shape of x, which displays the size as 512x512 and the 3 which is the RGB coloring

x.shape

In [None]:
#The expansion of dimensions before putting the image data through 'preprocess input'
#This is because we need an addition dimension to get a batch of the image.
#Printing of the image.data.shape shows the addition dimensions for the samples. 

x = x/255

from tensorflow.keras.applications.resnet50 import preprocess_input

x=np.expand_dims(x,axis=0)
img_data=preprocess_input(x)
img_data.shape

In [23]:
#Predicting the image instance that will be tested against the model

model.predict(img_data)

array([[1., 0.]], dtype=float32)

In [24]:
#For multi-Class classification (such as using Softmax layers) 

a=np.argmax(model.predict(img_data), axis=1)

In [None]:
if(a==1):
    print("The signature is Genuine")
else:
    print("The signature is Forged")

# **METRICS OF EVALUATION**

In [26]:
# Importing metrics
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report

from sklearn.metrics import precision_recall_fscore_support
import seaborn as sn

%matplotlib inline
from sklearn.metrics import confusion_matrix
import itertools

In [None]:
from tensorflow.keras.utils import plot_model
#
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
#Evaluating the validation accuracy and loss.
#There is a small difference between the two.


score = model.evaluate(test_generator)
print('Validation loss:', score[0])
print('Validation accuracy:', score[1])

In [29]:
test_true_labels = test_generator.classes

***Calculating Accuracy and F1- Scores***

In [None]:
# Calculating the accuracy and f1-score
acc = accuracy_score( test_true_labels   , pred )
f1 = f1_score(test_true_labels, pred , average='weighted')
print('Test results:', 'accuracy=', acc, ', Weighted F1-score=', f1)

***Confusion Matrix with Percentages***

In [None]:
#Confusing Matrix set of Precision, Recall and F1 scores in percentage.


print("P,R,F1:",precision_recall_fscore_support(test_true_labels, pred, average='macro'))
df_cm = pd.DataFrame(confusion_matrix(test_true_labels, pred,normalize = 'true'), index = [i for i in "01"],
                     columns = [i for i in "01"])
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True)

In [32]:
#Establishing variable 'cm' with confusion matrix

cm = confusion_matrix(y_true=test_true_labels, y_pred= pred )

In [33]:
def plot_confusion_matrix(cm, classes,
                        normalize=False,
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [34]:
#Plotting the labels as genuine signatures and forged signatures

cm_plot_labels = ['Genuine Signatures','Forged Signatures']

In [None]:
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Phase 1 Confusion Matrix')

In [None]:
#Creating a figure which displays the training loss, training accuracy, validation loss and validation accuracy. 

plt.figure(figsize=(10,7))
plt.plot(history.history['loss'], label='Training loss')
#plt.plot(history.history['val_loss'], label='Val loss')
#plt.plot(history.history['accuracy'], label='Training acc')
#plt.plot(history.history['val_accuracy'], label='val acc')
plt.title("Training Loss and Accuracy on CEDAR Dataset")
plt.xlabel("Epochs")
plt.legend()
plt.show()
plt.savefig('lossval_loss')

In [None]:
from tensorflow.keras.utils import plot_model
#
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [38]:
#Loading the Tensorboard magic tool, for more graphs and histograms on each section within the training.

#%load_ext tensorboard
#%tensorboard --logdir logs