In [1]:
pip install opencv-python numpy torch torchvision scikit-image pillow scikit-learn joblib

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from skimage.feature import hog
from PIL import Image
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
import joblib  # For saving intermediate files

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Dataset Path
root_dir = r"D:\Signature verification\Signature verification\full"

file_paths = []
labels = []

# Read images and labels
for class_name in os.listdir(root_dir):
    class_path = os.path.join(root_dir, class_name)
    if os.path.isdir(class_path):
        for file in os.listdir(class_path):
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                file_paths.append(os.path.join(class_path, file))
                labels.append(class_name)

print(f"Total images: {len(file_paths)}")

# Load VGG16 for feature extraction
vgg16 = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).to(device)
vgg16.eval()

# Remove classifier layers
vgg16_extractor = nn.Sequential(*list(vgg16.children())[:-1]).to(device)

# Image transformation (Resized to 224x224 for VGG16)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Process images in batches (Reduce memory usage)
batch_size = 20  

hog_features_list = []
vgg16_features_list = []

for i in range(0, len(file_paths), batch_size):
    print(f"Processing batch {i//batch_size + 1}/{(len(file_paths)//batch_size) + 1}")

    batch_hog = []
    batch_vgg16 = []

    for image_path in file_paths[i:i + batch_size]:
        # Load Image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Warning: Could not load {image_path}")
            continue

        # Convert to grayscale for HOG
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Extract HOG Features (Reduce size for efficiency)
        hog_features = hog(gray, pixels_per_cell=(8, 8), cells_per_block=(1, 1), feature_vector=True)
        batch_hog.append(hog_features.astype(np.float16))  # Use float16 to save memory

        # Convert image for VGG16
        image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        image_pil = transform(image_pil).unsqueeze(0).to(device)

        # Extract VGG16 features
        with torch.no_grad():
            vgg16_feature = vgg16_extractor(image_pil).cpu().numpy().flatten()

        batch_vgg16.append(vgg16_feature.astype(np.float16))  # Use float16

    hog_features_list.extend(batch_hog)
    vgg16_features_list.extend(batch_vgg16)

    # Save intermediate features to disk (Reduce RAM usage)
    joblib.dump(batch_hog, f"hog_batch_{i//batch_size}.pkl")
    joblib.dump(batch_vgg16, f"vgg16_batch_{i//batch_size}.pkl")

# Convert lists to NumPy arrays
print(f"Extracted {len(hog_features_list)} HOG and {len(vgg16_features_list)} VGG16 features")

# Find max HOG feature length
max_hog_length = max(len(f) for f in hog_features_list)

# Convert to NumPy arrays
hog_features_padded = np.zeros((len(hog_features_list), max_hog_length), dtype=np.float16)
for i in range(len(hog_features_list)):
    hog_features_padded[i, :len(hog_features_list[i])] = hog_features_list[i]

vgg16_features_array = np.array(vgg16_features_list, dtype=np.float16)

# Concatenate HOG + VGG16 features
features = np.hstack((hog_features_padded, vgg16_features_array))

# Apply PCA to reduce dimensionality
pca = PCA(n_components=500)  # Reduce to 500 components for efficiency
features = pca.fit_transform(features)

# Save reduced features to disk
np.save("hog_vgg16_features_reduced.npy", features)

print("Feature extraction complete! Saved as 'hog_vgg16_features_reduced.npy'")


Total images: 4289
Processing batch 1/215
Processing batch 2/215
Processing batch 3/215
Processing batch 4/215
Processing batch 5/215
Processing batch 6/215
Processing batch 7/215
Processing batch 8/215
Processing batch 9/215
Processing batch 10/215
Processing batch 11/215
Processing batch 12/215
Processing batch 13/215
Processing batch 14/215
Processing batch 15/215
Processing batch 16/215
Processing batch 17/215
Processing batch 18/215
Processing batch 19/215
Processing batch 20/215
Processing batch 21/215
Processing batch 22/215
Processing batch 23/215
Processing batch 24/215
Processing batch 25/215
Processing batch 26/215
Processing batch 27/215
Processing batch 28/215
Processing batch 29/215
Processing batch 30/215
Processing batch 31/215
Processing batch 32/215
Processing batch 33/215
Processing batch 34/215
Processing batch 35/215
Processing batch 36/215
Processing batch 37/215
Processing batch 38/215
Processing batch 39/215
Processing batch 40/215
Processing batch 41/215
Proces

In [3]:
# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels[:len(features)])

# Split data
X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels, test_size=0.2, random_state=42)

# Train Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=1000, random_state=42)
rf_classifier.fit(X_train, y_train)

# Make Predictions
y_pred = rf_classifier.predict(X_test)

# Evaluate Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Accuracy: {accuracy:.4f}")


Random Forest Accuracy: 0.9161


In [4]:
print(f"VGG16 feature vector size: {vgg16_features_array.shape[1]}")


VGG16 feature vector size: 25088


In [5]:
print(f"Max HOG feature length: {max_hog_length}")


Max HOG feature length: 149283


In [6]:
import joblib

# Save PCA model
joblib.dump(pca, "pca_model.pkl")
print("PCA model saved as 'pca_model.pkl'")

# Save Random Forest model
joblib.dump(rf_classifier, "random_forest_model.pkl")
print("Random Forest model saved as 'random_forest_model.pkl'")


PCA model saved as 'pca_model.pkl'
Random Forest model saved as 'random_forest_model.pkl'


In [7]:
from sklearn.metrics import classification_report

# Predict labels using the trained Random Forest model
y_pred = rf_classifier.predict(X_test)

# Generate and print the classification report
report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)


Classification Report:
               precision    recall  f1-score   support

           0       0.83      1.00      0.91         5
           1       1.00      1.00      1.00         1
           2       1.00      1.00      1.00         7
           3       1.00      1.00      1.00         4
           4       0.83      1.00      0.91         5
           5       1.00      0.50      0.67         4
           6       0.80      1.00      0.89         4
           7       1.00      0.50      0.67         2
           8       0.73      1.00      0.84         8
           9       1.00      1.00      1.00         3
          10       0.88      1.00      0.93         7
          11       0.00      0.00      0.00         1
          12       1.00      1.00      1.00         6
          13       1.00      0.67      0.80         3
          14       0.75      1.00      0.86         3
          15       1.00      0.67      0.80         3
          16       0.45      1.00      0.62         5
   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
