## Build a fake or not table

In [None]:
from sign_utils import *
from pathlib import Path

def compare():
    dissim = []
    writers = pd.read_csv("writers.tsv", sep="\t", header=None)

    for writer in writers[0]:
        writer = f"{writer:03d}"
        print(writer)
        enrollment = pd.read_csv(f"enrollment/{writer}-g-01.tsv", sep ="\t", header=0)
        seq1 = enrollment.select_dtypes(include=["number"]).values
        for i in range(1,46):
            idx = f"{i:02d}"  # produces "01","02",...,"10",...,"45"
            verification = pd.read_csv(f"verification/{writer}-{idx}.tsv", sep ="\t", header=0)
            seq2 = verification.select_dtypes(include=["number"]).values
            score = DTW(seq1, seq2, 0.5)
            dissim.append({'writer': writer, 'idx': idx, 'dissim': score})
    return pd.DataFrame(dissim)


In [None]:
dissim = compare()

In [None]:
dissim.to_csv("Dissimilarities.csv")

## Classifying

In [None]:
import pandas as pd

def classify_sign(top_k):
    # Load the dissimilarity CSV
    df = pd.read_csv('dissim.csv')

    # Assuming columns: writer_id, signature_id, dissimilarity
    results = []

    for writer, group in df.groupby('writer'):

        writer_idx = f"{writer:03d}"
        # Sort by dissimilarity (ascending)
        sorted_group = group.sort_values('dissim')
        # Top k as genuine
        genuine = sorted_group.head(top_k)
        # Rest as forgery
        forgery = sorted_group.iloc[top_k:]
        # Append results
        results.extend([(writer_idx + "-" + str(int(row['idx'])), 'genuine') for _, row in genuine.iterrows()])
        results.extend([(writer_idx + "-" + str(int(row['idx'])), 'forgery') for _, row in forgery.iterrows()])

    # Save to CSV
    output_df = pd.DataFrame(results, columns=['signature_id', 'classification'])
    output_df.to_csv('classified_signatures.csv', index=False)

Evaluate

In [19]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, average_precision_score

classify_sign(15)

# Load predictions
pred_df = pd.read_csv('classified_signatures.csv')

# Load ground truth
gt_df = pd.read_csv('gt.tsv', sep='\t', names=['signature_id', 'ground_truth'])

# Merge on signature_id
merged = pd.merge(pred_df, gt_df, on='signature_id')

# Convert labels to binary (genuine=1, forgery=0)
y_true = merged['ground_truth'].map({'genuine': 1, 'forgery': 0})
y_pred = merged['classification'].map({'genuine': 1, 'forgery': 0})

# Calculate precision and recall
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
map_score = average_precision_score(y_true, y_pred)


print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'Mean Average Precision (MAP): {map_score:.3f}')

Precision: 0.969
Recall: 0.716
Mean Average Precision (MAP): 0.821


In [20]:
merged

Unnamed: 0,signature_id,classification,ground_truth
0,001-43,genuine,genuine
1,001-26,genuine,genuine
2,001-13,genuine,genuine
3,001-11,genuine,genuine
4,001-29,genuine,genuine
...,...,...,...
1075,030-42,forgery,forgery
1076,030-11,forgery,forgery
1077,030-37,forgery,forgery
1078,030-21,forgery,forgery
