## 📌 Step 1: Install & Import Required Libraries
We'll use OpenCV, NumPy, Pandas, and Matplotlib for image processing and visualization.


In [2]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.feature import hog
from skimage import io, color

# Ignore warnings for cleaner output
import warnings
warnings.filterwarnings("ignore")


## 📌 Step 2: Load Dataset Paths
We define the file paths for real and forged signatures.


In [None]:
# Set dataset directory
DATASET_PATH = "your_path_here"  # Example: "C:/Users/John/Downloads/CEDAR_Signature_Dataset"

# Define real and forged signature folders
REAL_PATH = os.path.join(DATASET_PATH, "full_org")
FORGED_PATH = os.path.join(DATASET_PATH, "full_forg")

# Check if paths exist
print("Real signatures found:", len(os.listdir(REAL_PATH)))
print("Forged signatures found:", len(os.listdir(FORGED_PATH)))


# Signature Verification using Classical Machine Learning

## **Objective**
- Detect forged signatures using classical ML techniques.
- Extract relevant numerical features from images.
- Train and evaluate a binary classifier.

## **Dataset Information**
- **full_org/**: 1320 genuine signatures.
- **full_forg/**: 1320 forged signatures.
- **Total Writers**: 55

## **Approach**
1. Load the dataset.
2. Preprocess the data.
3. Train a model using Scikit-learn.
4. Evaluate the model’s accuracy.

---


In [1]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [2]:
# Define the main dataset path
DATASET_PATH = r"C:\Users\johnp\Downloads\CSI\signatures"
# Define paths for genuine and forged signatures
ORG_PATH = os.path.join(DATASET_PATH, "full_org")
FORG_PATH = os.path.join(DATASET_PATH, "full_forg")
# Load file paths
org_files = [os.path.join(ORG_PATH, f) for f in os.listdir(ORG_PATH) if f.endswith('.png')]
forg_files = [os.path.join(FORG_PATH, f) for f in os.listdir(FORG_PATH) if f.endswith('.png')]
print("Total Genuine Signatures:", len(org_files))
print("Total Forged Signatures:", len(forg_files))

Total Genuine Signatures: 1320
Total Forged Signatures: 1320


In [3]:
# Function to preprocess images
def preprocess_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Load as grayscale
    img = cv2.resize(img, (64, 64))  # Resize for consistency
    img = img.flatten()  # Convert to 1D array
    return img

# Process images
org_data = [preprocess_image(f) for f in org_files]
forg_data = [preprocess_image(f) for f in forg_files]

# Convert to DataFrame
df_org = pd.DataFrame(org_data)
df_forg = pd.DataFrame(forg_data)

# Add labels (1 = Genuine, 0 = Forged)
df_org['label'] = 1
df_forg['label'] = 0

# Combine both datasets
df = pd.concat([df_org, df_forg], ignore_index=True)
print("Dataset Shape:", df.shape)


Dataset Shape: (2640, 4097)


In [4]:
# Separate features and labels
X = df.drop(columns=['label'])
y = df['label']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Training set size:", X_train.shape)
print("Testing set size:", X_test.shape)


Training set size: (2112, 4096)
Testing set size: (528, 4096)


In [5]:
# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)


Model Accuracy: 0.7840909090909091


In [6]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Classification Report
print("Classification Report:\n", classification_report(y_test, y_pred))


Confusion Matrix:
 [[202  62]
 [ 52 212]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.77      0.78       264
           1       0.77      0.80      0.79       264

    accuracy                           0.78       528
   macro avg       0.78      0.78      0.78       528
weighted avg       0.78      0.78      0.78       528



---
## **Done by John Poly**
