# 🧠 Signature Verification — Notebook #4 (XGBoost, MLP, Voting)
In this notebook, we experiment with three additional models:
- XGBoost Classifier
- MLP Classifier (Neural Network)
- Voting Classifier (Ensemble of the above)

We evaluate all models using Accuracy, F1, EER, and Confusion Matrix.

In [None]:
!pip install xgboost

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, ConfusionMatrixDisplay
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier

sns.set_style('whitegrid')

In [None]:
DATA_DIR = '/content/drive/My Drive/ProjectLabDataset'
TRAIN_FILE = f'{DATA_DIR}/mcytTraining.txt'
TEST_FILE = f'{DATA_DIR}/mcytTesting.txt'

cols = ['ID', 'SigID', 'X', 'Y', 'P', 'al', 'az', 'signatureOrigin']
train_df = pd.read_csv(TRAIN_FILE, names=cols, skiprows=1)
test_df  = pd.read_csv(TEST_FILE, names=cols, skiprows=1)

for df in [train_df, test_df]:
    df.columns = df.columns.str.strip()
    for col in ['X', 'Y', 'P', 'al', 'az']:
        df[col] = pd.to_numeric(df[col], errors='coerce')

label_map = {'Genuine': 1, 'Forged': 0}
train_df['label'] = train_df['signatureOrigin'].map(label_map)
test_df['label']  = test_df['signatureOrigin'].map(label_map)

drop_cols = ['ID', 'SigID', 'signatureOrigin', 'al']
X_train = train_df.drop(columns=drop_cols + ['label'])
y_train = train_df['label']
X_test  = test_df.drop(columns=drop_cols + ['label'])
y_test  = test_df['label']

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

In [None]:
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train_scaled, y_train)
y_pred_xgb = xgb.predict(X_test_scaled)
y_prob_xgb = xgb.predict_proba(X_test_scaled)[:, 1]

acc = accuracy_score(y_test, y_pred_xgb)
f1 = f1_score(y_test, y_pred_xgb)
print(f'✅ XGBoost — Accuracy: {acc:.3f}, F1: {f1:.3f}')

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=500, random_state=42)
mlp.fit(X_train_scaled, y_train)
y_pred_mlp = mlp.predict(X_test_scaled)
y_prob_mlp = mlp.predict_proba(X_test_scaled)[:, 1]

acc = accuracy_score(y_test, y_pred_mlp)
f1 = f1_score(y_test, y_pred_mlp)
print(f'✅ MLPClassifier — Accuracy: {acc:.3f}, F1: {f1:.3f}')

In [None]:
voting = VotingClassifier(estimators=[
    ('xgb', xgb),
    ('mlp', mlp)
], voting='soft')

voting.fit(X_train_scaled, y_train)
y_pred_vote = voting.predict(X_test_scaled)
y_prob_vote = voting.predict_proba(X_test_scaled)[:, 1]

acc = accuracy_score(y_test, y_pred_vote)
f1 = f1_score(y_test, y_pred_vote)
print(f'✅ VotingClassifier — Accuracy: {acc:.3f}, F1: {f1:.3f}')

In [None]:
def compute_eer(y_true, y_score):
    fpr, tpr, thresholds = roc_curve(y_true, y_score)
    fnr = 1 - tpr
    idx = np.nanargmin(np.abs(fpr - fnr))
    eer = (fpr[idx] + fnr[idx]) / 2
    return eer, thresholds[idx]

eer, threshold = compute_eer(y_test, y_prob_vote)
print(f'🔍 VotingClassifier EER = {eer:.3f} at threshold = {threshold:.3f}')

ConfusionMatrixDisplay.from_estimator(voting, X_test_scaled, y_test)
plt.title("Confusion Matrix — Voting Classifier")
plt.show()