##Imports


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import os
os.chdir('/content/drive/MyDrive') # Change this path to where your 'src' folder is located
print(os.getcwd())

/content/drive/MyDrive


In [11]:
import os, cv2, numpy as np, matplotlib.pyplot as plt, seaborn as sns, joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, roc_auc_score, roc_curve, confusion_matrix

import sys, os
sys.path.append(os.path.abspath('/content/drive/MyDrive/Signature_Project/src'))
from preprocess import preprocess_image
from features import extract_features

print("✅ Imports successful!")


✅ Imports successful!


##Paths

In [14]:
DATA_DIR = "/content/drive/MyDrive/Signature_Project/data/signatures"
GENUINE_DIR = os.path.join(DATA_DIR, "full_forg")
FORGED_DIR = os.path.join(DATA_DIR, "full_org")

##Load images and labels

In [15]:
def load_dataset():
    X, y = [], []
    for path, label in [(GENUINE_DIR, 1), (FORGED_DIR, 0)]:
        for f in os.listdir(path):
            if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                X.append(os.path.join(path, f))
                y.append(label)
    return X, np.array(y)

image_paths, labels = load_dataset()
print(f"Loaded {len(image_paths)} images (genuine={sum(labels)}, forged={len(labels)-sum(labels)})")

Loaded 2640 images (genuine=1320, forged=1320)


##Preprocess and extract features

In [None]:
features = []
for path in image_paths:
    img = preprocess_image(path)
    feat = extract_features(img)
    features.append(feat)

X = np.array(features)
y = labels
print("Feature matrix shape:", X.shape)

##train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

##Pipelines


In [None]:
# STEP 5 — Define pipelines
# =============================================================
def make_pipeline(model):
    return Pipeline([
        ('scaler', StandardScaler()),
        ('pca', PCA(n_components=0.95)),
        ('clf', model)
    ])

models = {
    "LogisticRegression": LogisticRegression(max_iter=2000, class_weight='balanced'),
    "SVM": SVC(kernel='rbf', probability=True, class_weight='balanced'),
    "DecisionTree": DecisionTreeClassifier(max_depth=10, class_weight='balanced'),
    "SGDClassifier": SGDClassifier(loss='log_loss', max_iter=1000, class_weight='balanced')
}

# =============================================================
# STEP 6 — Train and evaluate
# =============================================================
results = {}
for name, model in models.items():
    print(f"\nTraining {name}...")
    pipe = make_pipeline(model)
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    y_prob = pipe.predict_proba(X_test)[:,1] if hasattr(pipe, "predict_proba") else None

    print(classification_report(y_test, y_pred))
    auc = roc_auc_score(y_test, y_prob) if y_prob is not None else None
    print(f"AUC: {auc:.4f}" if auc else "No probability output for AUC.")

    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title(f"{name} Confusion Matrix")
    plt.xlabel("Predicted"); plt.ylabel("Actual")
    plt.show()

    results[name] = {"model": pipe, "AUC": auc}

# =============================================================
# STEP 7 — Compute FAR / FRR / EER (for best model)
# =============================================================
best_model = max(results.items(), key=lambda kv: kv[1]["AUC"] if kv[1]["AUC"] else 0)[1]["model"]
y_score = best_model.predict_proba(X_test)[:,1]
fpr, tpr, thr = roc_curve(y_test, y_score)
FAR = fpr
FRR = 1 - tpr
eer_index = np.argmin(np.abs(FAR - FRR))
EER = (FAR[eer_index] + FRR[eer_index]) / 2
print(f"Equal Error Rate (EER): {EER*100:.2f}%")

plt.plot(FAR, 1-FRR, label='ROC')
plt.xlabel('False Acceptance Rate')
plt.ylabel('True Acceptance Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

# =============================================================
# STEP 8 — Save model
# =============================================================
os.makedirs("../models", exist_ok=True)
joblib.dump(best_model, "../models/best_signature_model.joblib")
print("✅ Model saved at ../models/best_signature_model.joblib")
