In [1]:
  # Step 1: Import Libraries
import os
import glob
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Lambda, Dropout, BatchNormalization, MaxPooling2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf

In [2]:
# Step 2: Download Dataset (Make sure you have Kaggle API set up)
import kagglehub
path = kagglehub.dataset_download("divyanshrai/handwritten-signatures")
dataset_path = '/root/.cache/kagglehub/datasets/divyanshrai/handwritten-signatures/versions/2/Dataset_Signature_Final/Dataset'

Downloading from https://www.kaggle.com/api/v1/datasets/download/divyanshrai/handwritten-signatures?dataset_version_number=2...


100%|██████████| 370M/370M [00:08<00:00, 47.3MB/s]

Extracting files...





In [3]:
# Step 3: Load Paths
real_signs = []
forged_signs = []

for i in range(1, 5):  # Iterate through dataset1, dataset2, dataset3, dataset4
    real_path = os.path.join(dataset_path, f'dataset{i}', 'real', '*.*')
    forged_path = os.path.join(dataset_path, f'dataset{i}', 'forge', '*.*')

    real_signs.extend(glob.glob(real_path))
    forged_signs.extend(glob.glob(forged_path))

In [4]:
# Step 4: Split Data into Train, Validation, and Test Sets
real_train, real_temp = train_test_split(real_signs, test_size=0.3, random_state=42)
real_val, real_test = train_test_split(real_temp, test_size=0.5, random_state=42)

forged_train, forged_temp = train_test_split(forged_signs, test_size=0.3, random_state=42)
forged_val, forged_test = train_test_split(forged_temp, test_size=0.5, random_state=42)

train_paths = real_train + forged_train
val_paths = real_val + forged_val
test_paths = real_test + forged_test

train_labels = [1] * len(real_train) + [0] * len(forged_train)
val_labels = [1] * len(real_val) + [0] * len(forged_val)
test_labels = [1] * len(real_test) + [0] * len(forged_test)

In [5]:
# Step 5: Preprocessing Function
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    noise_removed = cv2.GaussianBlur(gray_image, (5, 5), 0)
    _, binary_image = cv2.threshold(noise_removed, 127, 255, cv2.THRESH_BINARY)
    resized_image = cv2.resize(binary_image, (128, 128))
    rgb_image = cv2.cvtColor(resized_image, cv2.COLOR_GRAY2RGB)
    return rgb_image

def load_data(image_paths):
    images = [preprocess_image(path) for path in image_paths]
    return np.array(images)

# Load data for each set
X_train = load_data(train_paths)
X_val = load_data(val_paths)
X_test = load_data(test_paths)

# Change the data type to float32
X_train = X_train.astype('float32')
X_val = X_val.astype('float32')
X_test = X_test.astype('float32')

# Normalize pixel values
X_train /= 255.0
X_val /= 255.0
X_test /= 255.0

## Step 6: Define Models for Comparison

In [6]:
# Custom CNN Model
def create_custom_cnn(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    return model

In [7]:
# VGG16 Model with Transfer Learning
from tensorflow.keras.applications import VGG16

def create_vgg16_model(input_shape):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    model = Sequential([
        base_model,
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    return model

In [8]:
# Siamese Network Model
def create_siamese_model(input_shape):
    input_layer = Input(shape=input_shape)

    x = Conv2D(64, (5, 5), activation='relu')(input_layer)
    x = Flatten()(x)
    x = Dense(128)(x)

    model = Model(inputs=input_layer, outputs=x)
    return model

def siamese_network(input_shape):
    base_model = create_siamese_model(input_shape)

    input_a = Input(shape=input_shape)
    input_b = Input(shape=input_shape)

    encoded_a = base_model(input_a)
    encoded_b = base_model(input_b)

    distance = Lambda(lambda tensors: tf.abs(tensors[0] - tensors[1]))([encoded_a, encoded_b])

    output = Dense(1, activation='sigmoid')(distance)

    return Model(inputs=[input_a, input_b], outputs=output)

## Step 7: Train and Evaluate Each Model

In [9]:
def train_and_evaluate(model_name):
    if model_name == "Custom CNN":
        model = create_custom_cnn((128, 128, 3))
    elif model_name == "VGG16":
        model = create_vgg16_model((128, 128, 3))

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.00001), loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model
    history = model.fit(X_train,
                        np.array(train_labels),  # Convert train_labels to a NumPy array
                        validation_data=(X_val, np.array(val_labels)),  # Convert val_labels to a NumPy array
                        epochs=20,
                        batch_size=32,
                        verbose=1)

    # Evaluate on test data
    test_loss, test_accuracy = model.evaluate(X_test, np.array(test_labels))  # Convert test_labels to a NumPy array

    print(f"{model_name} Test Accuracy: {test_accuracy * 100:.2f}%")

    return history

In [11]:
# Train Custom CNN and VGG16 models
custom_cnn_history=train_and_evaluate("Custom CNN")
vgg16_history=train_and_evaluate("VGG16")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 1s/step - accuracy: 0.5252 - loss: 0.7041 - val_accuracy: 0.5745 - val_loss: 0.6813
Epoch 2/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 872ms/step - accuracy: 0.5751 - loss: 0.6853 - val_accuracy: 0.5745 - val_loss: 0.6799
Epoch 3/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 864ms/step - accuracy: 0.5191 - loss: 0.6938 - val_accuracy: 0.5745 - val_loss: 0.6739
Epoch 4/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 845ms/step - accuracy: 0.5977 - loss: 0.6670 - val_accuracy: 0.5745 - val_loss: 0.6714
Epoch 5/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 820ms/step - accuracy: 0.5776 - loss: 0.6731 - val_accuracy: 0.5745 - val_loss: 0.6685
Epoch 6/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 731ms/step - accuracy: 0.5646 - loss: 0.6772 - val_accuracy: 0.5745 - val_loss: 0.6681
Epoch 7/20
[1m14/14[0m 

In [12]:
def create_pairs(X,y):
    pairs=[]
    labels=[]

    num_classes=np.unique(y).shape[0]

    for i in range(len(X)):
        for j in range(i + 1,len(X)):
            pairs.append([X[i],X[j]])
            labels.append(1 if y[i]==y[j] else 0)

    return np.array(pairs),np.array(labels)

In [13]:
# Step 8: Evaluate Siamese Network Model (requires pairs creation)

def create_pairs(X,y):
    pairs=[]
    labels=[]

    num_classes=np.unique(y).shape[0]

    for i in range(len(X)):
        for j in range(i + 1,len(X)):
            pairs.append([X[i],X[j]])
            labels.append(1 if y[i]==y[j] else 0)

    return np.array(pairs),np.array(labels)

In [10]:
train_pairs ,train_labels=create_pairs(X_train.reshape(-1 ,128*128*3),train_labels)
val_pairs ,val_labels=create_pairs(X_val.reshape(-1 ,128*128*3),val_labels)

siamese_model=siamese_network((128 ,128 ,3))
siamese_model.compile(optimizer=Adam(learning_rate=0.00001),loss='binary_crossentropy',metrics=['accuracy'])

history_siamese=siamese_model.fit([train_pairs[:,0],train_pairs[:,1]],train_labels,
                                   validation_data=([val_pairs[:,0],val_pairs[:,1]],val_labels),
                                   epochs=20,batch_size=32)

# Evaluate Siamese Network on Test Data (Create Test Pairs)
test_pairs,test_labels=create_pairs(X_test.reshape(-1 ,128*128*3),test_labels)

NameError: name 'create_pairs' is not defined

In [None]:
def evaluate_siamese_model(model,X,y_true):
   y_pred=(model.predict([X[:,0],X[:,1]]) > 0.5).astype("int32")
   print(classification_report(y_true,y_pred))
   cm=confusion_matrix(y_true,y_pred)
   sns.heatmap(cm ,annot=True ,fmt='d' ,cmap='Blues' ,xticklabels=["Forged","Real"] ,yticklabels=["Forged","Real"])
   plt.xlabel('Predicted')
   plt.ylabel('True')
   plt.show()

evaluate_siamese_model(siamese_model,test_pairs,test_labels)

In [None]:
# Step 9: Compare Results - Plot Learning Curves for Each Model

def plot_learning_curve(history,title):
   plt.figure(figsize=(10 ,6))
   plt.plot(history.history['accuracy'], label='Train Accuracy')
   plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
   plt.title(f'{title} Learning Curve')
   plt.legend()
   plt.show()

plot_learning_curve(custom_cnn_history,"Custom CNN")
plot_learning_curve(vgg16_history,"VGG16")
plot_learning_curve(history_siamese,"Siamese Network")


In [None]:
def create_pairs(X,y):
    pairs=[]
    labels=[]

    num_classes=np.unique(y).shape[0]

    for i in range(len(X)):
        for j in range(i + 1,len(X)):
            pairs.append([X[i],X[j]])
            labels.append(1 if y[i]==y[j] else 0)

    return np.array(pairs),np.array(labels)

# Step 8: Evaluate Siamese Network Model (requires pairs creation)

def create_pairs(X,y):
    pairs=[]
    labels=[]

    num_classes=np.unique(y).shape[0]

    for i in range(len(X)):
        for j in range(i + 1,len(X)):
            pairs.append([X[i],X[j]])
            labels.append(1 if y[i]==y[j] else 0)

    return np.array(pairs),np.array(labels)

train_pairs ,train_labels=create_pairs(X_train.reshape(-1 ,128*128*3),train_labels)
val_pairs ,val_labels=create_pairs(X_val.reshape(-1 ,128*128*3),val_labels)

siamese_model=siamese_network((128 ,128 ,3))
siamese_model.compile(optimizer=Adam(learning_rate=0.00001),loss='binary_crossentropy',metrics=['accuracy'])

history_siamese=siamese_model.fit([train_pairs[:,0],train_pairs[:,1]],train_labels,
                                   validation_data=([val_pairs[:,0],val_pairs[:,1]],val_labels),
                                   epochs=20,batch_size=32)

# Evaluate Siamese Network on Test Data (Create Test Pairs)
test_pairs,test_labels=create_pairs(X_test.reshape(-1 ,128*128*3),test_labels)

def evaluate_siamese_model(model,X,y_true):
   y_pred=(model.predict([X[:,0],X[:,1]]) > 0.5).astype("int32")
   print(classification_report(y_true,y_pred))
   cm=confusion_matrix(y_true,y_pred)
   sns.heatmap(cm ,annot=True ,fmt='d' ,cmap='Blues' ,xticklabels=["Forged","Real"] ,yticklabels=["Forged","Real"])
   plt.xlabel('Predicted')
   plt.ylabel('True')
   plt.show()

evaluate_siamese_model(siamese_model,test_pairs,test_labels)

# Step 9: Compare Results - Plot Learning Curves for Each Model

def plot_learning_curve(history,title):
   plt.figure(figsize=(10 ,6))
   plt.plot(history.history['accuracy'], label='Train Accuracy')
   plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
   plt.title(f'{title} Learning Curve')
   plt.legend()
   plt.show()

plot_learning_curve(custom_cnn_history,"Custom CNN")
plot_learning_curve(vgg16_history,"VGG16")
plot_learning_curve(history_siamese,"Siamese Network")
