# Run the Two sections in different Notebooks

# Section-1 (Cross Validation Training)

In [None]:
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt

In [None]:
sample = '../input/signature-forgery-detection/Signatures_2/Train_Set/001/Real/001_02.PNG'
sample = tf.io.read_file(sample)
sample = tf.image.decode_jpeg(sample)
sample.shape
shape = (400,800,3)

In [None]:
def load_img(path):
  image_file = tf.io.read_file(path)
  image = tf.image.decode_jpeg(image_file)
  image = np.resize(image,shape)
  return image

In [None]:
!find . -name "*.DS_Store" -type f -delete

In [None]:
train_path = '../input/signature-forgery-detection/Signatures_2/Train_Set/'
dataset = []
targets = []

real_count =0
forged_count = 0

persons = os.listdir(train_path)
for person in persons:
  path = os.path.join(train_path,person)
  real = os.path.join(path,'Real/')
  real_files = os.listdir(real)
  fraud = os.path.join(path,'Forged/')
  fraud_files = os.listdir(fraud)

  for j in range(len(real_files)):
        for k in range(len(real_files)):
            if j==k:
                continue
            real_count +=1
            img1 = load_img(os.path.join(real,real_files[j]))
            img2 = load_img(os.path.join(real,real_files[k]))
            dataset.append([img1,img2])
            targets.append(0.)
    
  for j in range(len(real_files)):
        for k in range(len(fraud_files)):
            if j==k:
                continue
            forged_count+=1
            img1 = load_img(os.path.join(real,real_files[j]))
            img2 = load_img(os.path.join(fraud,fraud_files[k]))
            dataset.append([img1,img2])
            targets.append(1.)

In [None]:
print(real_count)
print(forged_count)

In [None]:
def Siamese_Model(input_shape=(400,800,3)):
  
  input_one = tf.keras.layers.Input(shape=input_shape)
  input_two = tf.keras.layers.Input(shape=input_shape)

  cnn = tf.keras.models.Sequential()
  cnn.add(tf.keras.layers.Conv2D(32,(3,3),activation='relu',padding='same'))
  cnn.add(tf.keras.layers.AveragePooling2D((2,2)))
  cnn.add(tf.keras.layers.Conv2D(64,(3,3),activation='relu',padding='same'))
  cnn.add(tf.keras.layers.AveragePooling2D((2,2)))
  cnn.add(tf.keras.layers.Conv2D(64,(3,3),activation='relu',padding='same'))
  cnn.add(tf.keras.layers.AveragePooling2D((2,2)))
  cnn.add(tf.keras.layers.Flatten())
  cnn.add(tf.keras.layers.Dropout(0.3))
  cnn.add(tf.keras.layers.Dense(128))

  distance_layer = tf.keras.layers.Lambda(lambda tensor: abs(tensor[0]-tensor[1]))
  out1 = cnn(input_one)
  out2 = cnn(input_two)
  l1_distance = distance_layer([out1,out2])
  final_out = tf.keras.layers.Dense(1,activation='sigmoid')(l1_distance)

  model = tf.keras.Model([input_one,input_two],final_out)
  return model

In [None]:
dataset = np.array(dataset)
targets = np.array(targets)

In [None]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=True,random_state=32)
cvscores = []

for train,val in kf.split(dataset,targets):
  train = np.array(train)
  val = np.array(val)
  model = Siamese_Model()
  model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
  model.fit([dataset[train,0],dataset[train,1]],targets[train],epochs =15,verbose=0)
  score = model.evaluate([dataset[val,0],dataset[val,1]],targets[val],verbose=0)
  print(score)
  cvscores.append(score)
  tf.keras.backend.clear_session()

In [None]:
cvscores = np.array(cvscores)
errors = cvscores[:,1]
mean = np.mean(errors)
median = np.median(errors)
std = np.std(errors)
print('Mean  Median  Std')
print(mean,median,std)

In [None]:
tf.keras.backend.clear_session()

# Section-2  (Normal Training)

In [None]:
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt

In [None]:
sample = '../input/signature-forgery-detection/Signatures_2/Train_Set/001/Real/001_02.PNG'
sample = tf.io.read_file(sample)
sample = tf.image.decode_jpeg(sample)
sample.shape
shape = (400,800,3)

In [None]:
def load_img(path):
  image_file = tf.io.read_file(path)
  image = tf.image.decode_jpeg(image_file)
  image = np.resize(image,shape)
  return image

In [None]:
!find . -name "*.DS_Store" -type f -delete

In [None]:
pairs = 20 # KEEP 10 FOR CROSS VALIDATION

In [None]:
train_path = '../input/signature-forgery-detection/Signatures_2/Train_Set'
dataset = []
targets = []
persons = os.listdir(train_path)
for person in persons:
  path = os.path.join(train_path,person)
  real = os.path.join(path,'Real/')
  real_files = os.listdir(real)
  fraud = os.path.join(path,'Forged/')
  fraud_files = os.listdir(fraud)
  for j in range(pairs//2):

    ind1 = np.random.randint(0,len(real_files)-1)
    ind2 = np.random.randint(0,len(real_files)-1)
    ind3 = np.random.randint(0,len(fraud_files)-1)

    img1 = load_img(os.path.join(real,real_files[ind1]))
    img2 = load_img(os.path.join(real,real_files[ind2]))
    img3 = load_img(os.path.join(fraud,fraud_files[ind3]))

    dataset.append([img1,img2])
    dataset.append([img1,img3])
    dataset.append([img2,img3])

    targets.append(0.)
    targets.append(1.)
    targets.append(1.)

In [None]:
def Siamese_Model(input_shape=(400,800,3)):
  
  input_one = tf.keras.layers.Input(shape=input_shape)
  input_two = tf.keras.layers.Input(shape=input_shape)

  cnn = tf.keras.models.Sequential()
  cnn.add(tf.keras.layers.Conv2D(32,(3,3),activation='relu',padding='same'))
  cnn.add(tf.keras.layers.AveragePooling2D((2,2)))
  cnn.add(tf.keras.layers.Conv2D(64,(3,3),activation='relu',padding='same'))
  cnn.add(tf.keras.layers.AveragePooling2D((2,2)))
  cnn.add(tf.keras.layers.Conv2D(64,(3,3),activation='relu',padding='same'))
  cnn.add(tf.keras.layers.AveragePooling2D((2,2)))
  cnn.add(tf.keras.layers.Flatten())
  cnn.add(tf.keras.layers.Dropout(0.3))
  cnn.add(tf.keras.layers.Dense(128))

  distance_layer = tf.keras.layers.Lambda(lambda tensor: abs(tensor[0]-tensor[1]))
  out1 = cnn(input_one)
  out2 = cnn(input_two)
  l1_distance = distance_layer([out1,out2])
  final_out = tf.keras.layers.Dense(1,activation='sigmoid')(l1_distance)

  model = tf.keras.Model([input_one,input_two],final_out)
  return model

In [None]:
dataset = np.array(dataset)
targets = np.array(targets)

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=5, min_lr=0.001)

In [None]:
model = Siamese_Model()
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),loss='binary_crossentropy',metrics=['accuracy'])
history = model.fit([dataset[:,0],dataset[:,1]],targets,epochs=30,callbacks=[reduce_lr],validation_split=0.1)

In [None]:
history_dict = history.history
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']
plt.plot(acc)
plt.plot(val_acc)
plt.plot(loss)
plt.plot(val_loss)

# Testing Manually

In [None]:
sample1 = load_img('../input/signature-forgery-detection/Signatures_2/Test_Set/006/Real/11_054.png') # True Signature of a person
sample2 = load_img('../input/signature-forgery-detection/Signatures_2/Test_Set/006/Forged/01_0102054.PNG') # Forged version of the signature
sample3 = load_img('../input/signature-forgery-detection/Signatures_2/Test_Set/006/Forged/01_0207054.PNG') # Another Forged Version of the Signature

sample1 = np.expand_dims(sample1,0)
sample2 = np.expand_dims(sample2,0)
sample3 = np.expand_dims(sample3,0)

ans = model.predict([sample1,sample3])
ans = np.around(ans,decimals=2)

if ans < 0.5:
  print("Genuine Signature")
else:
  print("Fruad Signature")

# Test Accuracy

In [None]:
train_path = '../input/signature-forgery-detection/Signatures_2/Test_Set'
dataset = []
targets = []
persons = os.listdir(train_path)
for person in persons:
  path = os.path.join(train_path,person)
  real = os.path.join(path,'Real/')
  real_files = os.listdir(real)
  fraud = os.path.join(path,'Forged/')
  fraud_files = os.listdir(fraud)
  for j in range(pairs//2):

    ind1 = np.random.randint(0,len(real_files)-1)
    ind2 = np.random.randint(0,len(real_files)-1)
    ind3 = np.random.randint(0,len(fraud_files)-1)

    img1 = load_img(os.path.join(real,real_files[ind1]))
    img2 = load_img(os.path.join(real,real_files[ind2]))
    img3 = load_img(os.path.join(fraud,fraud_files[ind3]))

    dataset.append([img1,img2])
    dataset.append([img1,img3])
    dataset.append([img2,img3])

    targets.append(0.)
    targets.append(1.)
    targets.append(1.)

In [None]:
dataset = np.array(dataset)
targets = np.array(targets)

In [None]:
score = model.evaluate([dataset[:,0],dataset[:,1]],targets)
print("Test Accuracy ",end='')
print(score[1])

* There is memory limit in dealing with images especially in Siamese networks.
* So, Cross Val training is done on 15 samples(true and fraud) from each person in the train set.
* Normal Training is done on 30 samples(true and fraud) from each person in the train set.
* The batch Size is set to automatic by default due to the limit on memory.
* If more RAM is available, the overall accuracy can be greatly improved by taking more samples from the training.