In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K
from sklearn.model_selection import train_test_split




In [2]:
base_path = "processed_signature/processed"
csv_path = os.path.join(base_path, "signature_dataset.csv")

df = pd.read_csv(csv_path)

print(df.head())


             image_path  person_id  label  split
0  train/031/05_031.png         31      0  train
1  train/031/07_031.png         31      0  train
2  train/031/11_031.png         31      0  train
3  train/031/08_031.png         31      0  train
4  train/031/12_031.png         31      0  train


In [3]:
def load_image(img_path):
    img = image.load_img(os.path.join(base_path, img_path), target_size=(224,224))
    img = image.img_to_array(img)
    img = img / 255.0
    return img


In [4]:
print(df.columns)


Index(['image_path', 'person_id', 'label', 'split'], dtype='str')


In [5]:
#Create Pairs for Siamese Training

def create_pairs(df):
    pairs = []
    labels = []

    grouped = df.groupby("person_id")

    for writer_id, group in grouped:
        genuine = group[group['label'] == 0]['image_path'].values
        forged  = group[group['label'] == 1]['image_path'].values

        # Genuine-Genuine pairs (label=1)
        for i in range(len(genuine)-1):
            img1 = load_image(genuine[i])
            img2 = load_image(genuine[i+1])
            pairs.append([img1, img2])
            labels.append(1)

        # Genuine-Forged pairs (label=0)
        for i in range(min(len(genuine), len(forged))):
            img1 = load_image(genuine[i])
            img2 = load_image(forged[i])
            pairs.append([img1, img2])
            labels.append(0)

    return np.array(pairs), np.array(labels)


In [6]:
pairs, labels = create_pairs(df)


In [7]:
# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    pairs, labels, test_size=0.2, random_state=42
)


In [8]:
#Build Embedding Network (ResNet50)
def build_embedding_model():

    base_model = ResNet50(
        weights='imagenet',
        include_top=False,
        input_shape=(224,224,3)
    )

    base_model.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)

    model = Model(base_model.input, x)
    return model



In [9]:
#Build Siamese Network
def euclidean_distance(vectors):
    x, y = vectors
    return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))


In [10]:
embedding_model = build_embedding_model()

input_a = Input(shape=(224,224,3))
input_b = Input(shape=(224,224,3))

embedding_a = embedding_model(input_a)
embedding_b = embedding_model(input_b)

distance = Lambda(euclidean_distance)([embedding_a, embedding_b])

output = Dense(1, activation='sigmoid')(distance)

siamese_model = Model(inputs=[input_a, input_b], outputs=output)

siamese_model.compile(
    loss='binary_crossentropy',
    optimizer=Adam(0.0001),
    metrics=['accuracy']
)

siamese_model.summary()





In [11]:
#Train Model
siamese_model.fit(
    [X_train[:,0], X_train[:,1]],
    y_train,
    validation_data=([X_test[:,0], X_test[:,1]], y_test),
    epochs=10,
    batch_size=8
)


Epoch 1/10
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m428s[0m 2s/step - accuracy: 0.5415 - loss: 0.9954 - val_accuracy: 0.4975 - val_loss: 0.9043
Epoch 2/10
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m378s[0m 2s/step - accuracy: 0.5415 - loss: 0.8007 - val_accuracy: 0.4975 - val_loss: 0.8332
Epoch 3/10
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m377s[0m 2s/step - accuracy: 0.5415 - loss: 0.7551 - val_accuracy: 0.4975 - val_loss: 0.8071
Epoch 4/10
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m373s[0m 2s/step - accuracy: 0.5415 - loss: 0.7298 - val_accuracy: 0.4975 - val_loss: 0.7884
Epoch 5/10
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m363s[0m 2s/step - accuracy: 0.5415 - loss: 0.7123 - val_accuracy: 0.4975 - val_loss: 0.7775
Epoch 6/10
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m371s[0m 2s/step - accuracy: 0.5415 - loss: 0.6981 - val_accuracy: 0.4975 - val_loss: 0.7653
Epoch 7/10
[1m202/202

<keras.src.callbacks.history.History at 0x242f22016a0>

In [12]:
siamese_model.save("signature_siamese_model.h5")




#Customer Registration

In [31]:
#Customer Registration Function
def get_embedding(img_path):
    img = load_image(img_path)
    img = np.expand_dims(img, axis=0)
    embedding = embedding_model.predict(img)
    return embedding


In [32]:
customer_database = {}


In [33]:
def register_customer(customer_id, signature_paths):
    embeddings = []
    for path in signature_paths:
        emb = get_embedding(path)
        embeddings.append(emb)
    customer_database[customer_id] = embeddings


In [35]:
def verify_signature(customer_id, test_signature_path):

    test_embedding = get_embedding(test_signature_path)

    stored_embeddings = customer_database[customer_id]

    distances = []

    for emb in stored_embeddings:
        dist = np.linalg.norm(test_embedding - emb)
        distances.append(dist)

    avg_distance = np.mean(distances)

    threshold = 0.5  # You can tune this

    if avg_distance < threshold:
        print("Genuine Signature")
        print("Confidence:", round((1-avg_distance)*100,2), "%")
    else:
        print("Forged Signature")
        print("Confidence:", round(avg_distance*100,2), "%")


In [36]:
def get_embedding(img_path):
    img = image.load_img(img_path, target_size=(224,224))
    img = image.img_to_array(img)
    img = img / 255.0
    img = np.expand_dims(img, axis=0)

    embedding = embedding_model.predict(img)
    return embedding


In [37]:
import os

def register_customer(customer_id, folder_path):

    embeddings = []

    # Get all image files from folder
    image_files = [
        os.path.join(folder_path, file)
        for file in os.listdir(folder_path)
        if file.lower().endswith(('.png', '.jpg', '.jpeg'))
    ]

    if len(image_files) == 0:
        print("No images found in folder!")
        return

    for img_path in image_files:
        emb = get_embedding(img_path)
        embeddings.append(emb)

    customer_database[customer_id] = embeddings

    print(f"✅ Customer {customer_id} registered successfully.")
    print(f"Stored {len(embeddings)} signature samples.")


In [38]:
register_customer("C101", "processed")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 182ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step
✅ Customer C101 registered successfully.
Stored 6 signature samples.


In [39]:
import numpy as np

def verify_signature(customer_id, test_signature_path, threshold=0.6):

    if customer_id not in customer_database:
        print("Customer not found!")
        return

    test_embedding = get_embedding(test_signature_path)

    stored_embeddings = customer_database[customer_id]

    distances = []

    for emb in stored_embeddings:
        dist = np.linalg.norm(test_embedding - emb)
        distances.append(dist)

    avg_distance = np.mean(distances)

    print("Average Distance:", avg_distance)

    if avg_distance < threshold:
        confidence = (1 - avg_distance/threshold) * 100
        print("✅ Genuine Signature")
        print("Confidence:", round(confidence,2), "%")
    else:
        confidence = (avg_distance/threshold - 1) * 100
        print("❌ Forged Signature")
        print("Confidence:", round(confidence,2), "%")


In [43]:
verify_signature(
    "C101",
    "more forg.jpeg"
)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step
Average Distance: 0.66138506
❌ Forged Signature
Confidence: 10.23 %
