## Build a fake or not table

In [None]:
from sign_utils import *
from pathlib import Path

def compare():
    dissim = []
    writers = pd.read_csv("signatures/writers.tsv", sep="\t", header=None)

    for writer in writers[0]:
        writer = f"{writer:03d}"
        print(writer)
        features = ["x", "y", "pressure", "penup", "azimuth", "inclination", "v_x", "v_y"]
        enrollment = pd.read_csv(f"signatures/enrollment/{writer}-g-01.tsv", sep ="\t", header=0)
        # Source - https://stackoverflow.com/a
        # Posted by Cina, modified by community. See post 'Timeline' for change history
        # Retrieved 2025-12-08, License - CC BY-SA 4.0
        enrollment = enrollment[features]
        normalized_enrollment=(enrollment-enrollment.min())/(enrollment.max()-enrollment.min())

        seq1 = normalized_enrollment.select_dtypes(include=["number"]).values
        for i in range(1,46):
            idx = f"{i:02d}"  # produces "01","02",...,"10",...,"45"
            verification = pd.read_csv(f"signatures/verification/{writer}-{idx}.tsv", sep ="\t", header=0)
            verification = verification[features]
            normalized_verification=(verification-verification.min())/(verification.max()-verification.min())
            seq2 = normalized_verification.select_dtypes(include=["number"]).values
            score = DTW(seq1, seq2, 0.5)
            dissim.append({'writer': writer, 'idx': idx, 'dissim': score})
    return pd.DataFrame(dissim)


In [None]:
dissim = compare()

In [None]:
dissim.to_csv("dissim.csv")

## Classifying

In [2]:
import pandas as pd

def classify_sign(top_k):
    # Load the dissimilarity CSV
    df = pd.read_csv('dissim.csv')

    # Assuming columns: writer_id, signature_id, dissimilarity
    results = []

    for writer, group in df.groupby('writer'):

        writer_idx = f"{writer:03d}"
        # Sort by dissimilarity (ascending)
        sorted_group = group.sort_values('dissim')
        # Top k as genuine
        genuine = sorted_group.head(top_k)
        # Rest as forgery
        forgery = sorted_group.iloc[top_k:]
        # Append results
        results.extend([(writer_idx + "-" + str(int(row['idx'])), 'genuine') for _, row in genuine.iterrows()])
        results.extend([(writer_idx + "-" + str(int(row['idx'])), 'forgery') for _, row in forgery.iterrows()])

    # Save to CSV
    output_df = pd.DataFrame(results, columns=['signature_id', 'classification'])
    output_df.to_csv('classified_signatures.csv', index=False)

Evaluate

In [3]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, average_precision_score

k_list = [10, 15, 18, 20, 25]

for k in k_list:

    classify_sign(k)

    # Load predictions
    pred_df = pd.read_csv('classified_signatures.csv')

    # Load ground truth
    gt_df = pd.read_csv('gt.tsv', sep='\t', names=['signature_id', 'ground_truth'])

    # Merge on signature_id
    merged = pd.merge(pred_df, gt_df, on='signature_id')

    # Convert labels to binary (genuine=1, forgery=0)
    y_true = merged['ground_truth'].map({'genuine': 1, 'forgery': 0})
    y_pred = merged['classification'].map({'genuine': 1, 'forgery': 0})

    # Calculate precision and recall
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    map_score = average_precision_score(y_true, y_pred)

    print(f"Top K = {k}")
    print(f'Precision: {precision:.3f}')
    print(f'Recall: {recall:.3f}')
    print(f'Mean Average Precision (MAP): {map_score:.3f}')

Top K = 10
Precision: 0.987
Recall: 0.478
Mean Average Precision (MAP): 0.705
Top K = 15
Precision: 0.969
Recall: 0.716
Mean Average Precision (MAP): 0.821
Top K = 18
Precision: 0.944
Recall: 0.830
Mean Average Precision (MAP): 0.859
Top K = 20
Precision: 0.918
Recall: 0.905
Mean Average Precision (MAP): 0.873
Top K = 25
Precision: 0.787
Recall: 0.969
Mean Average Precision (MAP): 0.776
