## Build a fake or not table

In [23]:
from sign_utils import DTW
import pandas as pd
import numpy as np

def compare(win_size=0.5):
    dissim = []
    writers = pd.read_csv("./writers.tsv", sep="\t", header=None)

    features = ["x", "y", "pressure", "v_x", "v_y"]

    for writer in writers[0]:
        writer = f"{int(writer):03d}"
        print("writer", writer)

        # --- load ALL 5 genuine enrollment signatures as templates ---
        enroll_seqs = []
        for g in range(1, 6):
            enr = pd.read_csv(f"./enrollment/{writer}-g-{g:02d}.tsv", sep="\t", header=0)
            enr = enr[features].apply(pd.to_numeric, errors="coerce").fillna(0.0)

            # Normalize with z-score
            enr_norm = (enr - enr.mean()) / (enr.std() + 1e-8)
            enroll_seqs.append(enr_norm.values)

        # --- verification: compute min DTW distance to any enrollment template ---
        for i in range(1, 46):
            idx = f"{i:02d}"
            ver = pd.read_csv(f"./verification/{writer}-{idx}.tsv", sep="\t", header=0)
            ver = ver[features].apply(pd.to_numeric, errors="coerce").fillna(0.0)

            # Normalize with z-score
            ver_norm = (ver - ver.mean()) / (ver.std() + 1e-8)
            seq2 = ver_norm.values

            # Use mean of all DTW distances
            score = np.mean([DTW(seq1, seq2, win_size) for seq1 in enroll_seqs])

            dissim.append({
                "writer": writer,
                "idx": idx,
                "signature_id": f"{writer}-{idx}",
                "dissim": float(score),
            })

    return pd.DataFrame(dissim)

In [24]:
dissim = compare(win_size=0.15)
dissim.to_csv("dissim.csv", index=False)
dissim.head()

writer 001
writer 002
writer 003
writer 004
writer 005
writer 006
writer 007
writer 008
writer 009
writer 010
writer 011
writer 012
writer 013
writer 014
writer 015
writer 016
writer 017
writer 018
writer 019
writer 020
writer 021
writer 022
writer 023
writer 024
writer 025
writer 026
writer 027
writer 028
writer 029
writer 030


Unnamed: 0,writer,idx,signature_id,dissim
0,1,1,001-01,71.834969
1,1,2,001-02,68.827057
2,1,3,001-03,220.977055
3,1,4,001-04,367.496447
4,1,5,001-05,72.564685


In [25]:
dissim.to_csv("dissim.csv")

In [26]:
from sklearn.metrics import average_precision_score
import pandas as pd

gt_df = pd.read_csv("gt.tsv", sep="\t", header=None, names=["signature_id", "ground_truth"])
df = pd.merge(dissim, gt_df, on="signature_id", how="inner")

y_true = df["ground_truth"].map({"genuine": 1, "forgery": 0}).values
y_score = (-df["dissim"]).values  # IMPORTANT: higher score => more genuine

ap = average_precision_score(y_true, y_score)
print("Average Precision (ranking on -dissim):", ap)


Average Precision (ranking on -dissim): 0.7155351922588026


## Classifying

In [27]:
import pandas as pd

def classify_sign(top_k):
    # Load the dissimilarity CSV
    df = pd.read_csv('dissim.csv')

    # Assuming columns: writer_id, signature_id, dissimilarity
    results = []

    for writer, group in df.groupby('writer'):

        writer_idx = f"{writer:03d}"
        # Sort by dissimilarity (ascending)
        sorted_group = group.sort_values('dissim')
        # Top k as genuine
        genuine = sorted_group.head(top_k)
        # Rest as forgery
        forgery = sorted_group.iloc[top_k:]
        # Append results
        results.extend([(writer_idx + "-" + str(int(row['idx'])), 'genuine') for _, row in genuine.iterrows()])
        results.extend([(writer_idx + "-" + str(int(row['idx'])), 'forgery') for _, row in forgery.iterrows()])

    # Save to CSV
    output_df = pd.DataFrame(results, columns=['signature_id', 'classification'])
    output_df.to_csv('classified_signatures.csv', index=False)

Evaluate

In [28]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, average_precision_score

k_list = [10, 15, 18, 20, 25]

for k in k_list:

    classify_sign(k)

    # Load predictions
    pred_df = pd.read_csv('classified_signatures.csv')

    # Load ground truth
    gt_df = pd.read_csv('gt.tsv', sep='\t', names=['signature_id', 'ground_truth'])

    # Merge on signature_id
    merged = pd.merge(pred_df, gt_df, on='signature_id')

    # Convert labels to binary (genuine=1, forgery=0)
    y_true = merged['ground_truth'].map({'genuine': 1, 'forgery': 0})
    y_pred = merged['classification'].map({'genuine': 1, 'forgery': 0})

    # Calculate precision and recall
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    map_score = average_precision_score(y_true, y_pred)

    print(f"Top K = {k}")
    print(f'Precision: {precision:.3f}')
    print(f'Recall: {recall:.3f}')
    print(f'Mean Average Precision (MAP): {map_score:.3f}')

Top K = 10
Precision: 0.996
Recall: 0.499
Mean Average Precision (MAP): 0.721
Top K = 15
Precision: 0.992
Recall: 0.743
Mean Average Precision (MAP): 0.852
Top K = 18
Precision: 0.984
Recall: 0.878
Mean Average Precision (MAP): 0.918
Top K = 20
Precision: 0.973
Recall: 0.965
Mean Average Precision (MAP): 0.954
Top K = 25
Precision: 0.779
Recall: 0.975
Mean Average Precision (MAP): 0.770
