In [2]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Paths to the folders
org_folder = './data/full_org/'
forg_folder = './data/full_forg/'

# Lists to hold images, labels, and person IDs
images = []
labels = []
person_ids = []

# Function to extract person ID from filename
def get_person_id(filename):
    return int(filename.split('_')[1])

# Load original signatures
for filename in os.listdir(org_folder):
    img = cv2.imread(os.path.join(org_folder, filename), cv2.IMREAD_GRAYSCALE)
    if img is not None:
        img = cv2.resize(img, (128, 128))
        images.append(img)
        labels.append(1)  # Genuine label
        person_ids.append(get_person_id(filename))

# Load forged signatures
for filename in os.listdir(forg_folder):
    img = cv2.imread(os.path.join(forg_folder, filename), cv2.IMREAD_GRAYSCALE)
    if img is not None:
        img = cv2.resize(img, (128, 128))
        images.append(img)
        labels.append(0)  # Forged label
        person_ids.append(get_person_id(filename))

# Convert lists to numpy arrays
images = np.array(images)
labels = np.array(labels)
person_ids = np.array(person_ids)

# Normalize images
images = images / 255.0

# Encode person IDs to start from 0
le = LabelEncoder()
person_ids_encoded = le.fit_transform(person_ids)

# Split the data for both models
X_train, X_test, y_train, y_test, person_train, person_test = train_test_split(images, labels, person_ids_encoded, test_size=0.2, random_state=42)



In [4]:
from skimage.feature import hog

def extract_hog_features(images):
    hog_features = []
    for img in images:
        features = hog(img, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1))
        hog_features.append(features)
    return np.array(hog_features)

features_train = extract_hog_features(X_train)
features_test = extract_hog_features(X_test)


**SVM**

In [5]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Train the SVM model for person classification
svm_person_model = SVC(kernel='linear')
svm_person_model.fit(features_train, person_train)

# Evaluate the model
person_pred = svm_person_model.predict(features_test)
person_accuracy = accuracy_score(person_test, person_pred)
print(f'Person Classification Accuracy: {person_accuracy}')


Person Classification Accuracy: 0.8768939393939394


In [7]:
# Train the SVM model for forgery detection
svm_forgery_model = SVC(kernel='linear')
svm_forgery_model.fit(features_train, y_train)

# Evaluate the model
forgery_pred = svm_forgery_model.predict(features_test)
forgery_accuracy = accuracy_score(y_test, forgery_pred)
print(f'Forgery Detection Accuracy: {forgery_accuracy}')


Forgery Detection Accuracy: 0.990530303030303


In [8]:
# Load and preprocess the test image
test_image_path = './data/full_org/original_5_4.png'
test_image = cv2.imread(test_image_path, cv2.IMREAD_GRAYSCALE)
test_image = cv2.resize(test_image, (128, 128))
test_image = test_image / 255.0

# Extract HOG features from the test image
test_feature = hog(test_image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1))
test_feature = np.array(test_feature).reshape(1, -1)

# Predict the person
predicted_person = svm_person_model.predict(test_feature)
print(f'This signature belongs to person ID: {predicted_person[0]+1}')

# Predict if the signature is genuine or forged
predicted_forgery = svm_forgery_model.predict(test_feature)
if predicted_forgery == 1:
    print("It's a Genuine Signature")
else:
    print("It's a Forged Signature")


This signature belongs to person ID: 5
It's a Genuine Signature


**Random Forest Classifier**

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Train the Random Forest model for person classification
rf_person_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_person_model.fit(features_train, person_train)

# Evaluate the model
person_pred = rf_person_model.predict(features_test)
person_accuracy = accuracy_score(person_test, person_pred)
print(f'Person Classification Accuracy with Random Forest: {person_accuracy}')

Person Classification Accuracy with Random Forest: 0.7537878787878788


In [10]:
# Train the Random Forest model for forgery detection
rf_forgery_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_forgery_model.fit(features_train, y_train)

# Evaluate the model
forgery_pred_rf = rf_forgery_model.predict(features_test)
forgery_accuracy_rf = accuracy_score(y_test, forgery_pred_rf)
print(f'Forgery Detection Accuracy (Random Forest): {forgery_accuracy_rf}')

Forgery Detection Accuracy (Random Forest): 0.9734848484848485


**XGBoost**

In [11]:
import xgboost as xgb

# Train the XGBoost model for person classification
xgb_person_model = xgb.XGBClassifier(n_estimators=100, random_state=42)
xgb_person_model.fit(features_train, person_train)

# Evaluate the model
person_pred_xgb = xgb_person_model.predict(features_test)
person_accuracy_xgb = accuracy_score(person_test, person_pred_xgb)
print(f'Person Classification Accuracy (XGBoost): {person_accuracy_xgb}')


Person Classification Accuracy (XGBoost): 0.7083333333333334


In [12]:
# Train the XGBoost model for forgery detection
xgb_forgery_model = xgb.XGBClassifier(n_estimators=100, random_state=42)
xgb_forgery_model.fit(features_train, y_train)

# Evaluate the model
forgery_pred_xgb = xgb_forgery_model.predict(features_test)
forgery_accuracy_xgb = accuracy_score(y_test, forgery_pred_xgb)
print(f'Forgery Detection Accuracy (XGBoost): {forgery_accuracy_xgb}')


Forgery Detection Accuracy (XGBoost): 0.9810606060606061


In [13]:
import joblib

joblib.dump(rf_person_model,'rfp.joblib')
joblib.dump(rf_forgery_model,'rff.joblib')
joblib.dump(svm_person_model,'svmp.joblib')
joblib.dump(svm_forgery_model,'svmf.joblib')
joblib.dump(xgb_person_model,'xgbp.joblib')
joblib.dump(xgb_forgery_model,'xgbf.joblib')

['xgbf.joblib']