In [None]:
import os
import glob
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

enrollment_folder = "enrollment"
verification_folder = "verification"

def list_signature_files(folder, pattern):
    return sorted(glob.glob(os.path.join(folder, pattern)))

def extract_xy_from_tsv(tsv_path):
    df = pd.read_csv(tsv_path, sep="\t")
    x = df.iloc[:, 1].values
    y = df.iloc[:, 2].values
    return np.stack([x, y], axis=1)

def resample_signature(sig_xy, num_points=128):
    t = np.linspace(0, 1, len(sig_xy))
    t_resampled = np.linspace(0, 1, num_points)
    x_interp = interp1d(t, sig_xy[:, 0], kind='linear')(t_resampled)
    y_interp = interp1d(t, sig_xy[:, 1], kind='linear')(t_resampled)
    return np.stack([x_interp, y_interp], axis=1)

def normalize_signature(sig_xy):
    mu = sig_xy.mean(axis=0)
    sigma = sig_xy.std(axis=0)
    sigma[sigma == 0] = 1
    return (sig_xy - mu) / sigma

def load_signatures(folder, writer_id, is_enrollment=False):
    pattern = f"{writer_id}-g-*.tsv" if is_enrollment else f"{writer_id}-*.tsv"
    file_list = list_signature_files(folder, pattern)
    signatures = {}
    for fpath in file_list:
        sig_id = os.path.splitext(os.path.basename(fpath))[0]
        sig_xy = extract_xy_from_tsv(fpath)
        sig_xy = resample_signature(sig_xy, num_points=128)
        sig_xy = normalize_signature(sig_xy)
        signatures[sig_id] = sig_xy
    return signatures

def get_all_writer_ids(enrollment_folder):
    files = list_signature_files(enrollment_folder, "*.tsv")
    writer_ids = sorted(set([os.path.basename(f)[:3] for f in files]))
    return writer_ids

writer_ids = get_all_writer_ids(enrollment_folder)

enrollment_data = {}
verification_data = {}

for writer_id in writer_ids:
    enrollment_data[writer_id] = load_signatures(enrollment_folder, writer_id, is_enrollment=True)
    verification_data[writer_id] = load_signatures(verification_folder, writer_id, is_enrollment=False)


In [None]:


from dtaidistance import dtw

def dtw_distance_2d(sig1, sig2):
    """
    Compute DTW distance between two 2D signatures by summing DTW(X) + DTW(Y).
    Each signature: shape (num_points, 2)
    """
    x1, y1 = sig1[:, 0], sig1[:, 1]
    x2, y2 = sig2[:, 0], sig2[:, 1]
    dist_x = dtw.distance(x1, x2)
    dist_y = dtw.distance(y1, y2)
    return dist_x + dist_y



# Output: Store minimum DTW dissimilarity per verification signature
dissimilarity_scores = {}  # {writer_id: {verification_sig_id: min_dtw, ...}}

for writer_id in writer_ids:
    dissimilarity_scores[writer_id] = {}
    enroll_sigs = list(enrollment_data[writer_id].values())
    for verif_id, verif_sig in verification_data[writer_id].items():
        # Compute axis-wise DTW to all enrollment signatures
        dtw_scores = []
        for enroll_sig in enroll_sigs:
            score = dtw_distance_2d(verif_sig, enroll_sig)
            dtw_scores.append(score)
        # Aggregate (use minimum score)
        min_score = min(dtw_scores)
        dissimilarity_scores[writer_id][verif_id] = min_score

# Example: print results for first writer
writer_example = writer_ids[0]
sorted_scores = sorted(dissimilarity_scores[writer_example].items(), key=lambda x: x[1])
print(f"Classic DTW (X+Y) scores for writer {writer_example}:")
for sig_id, score in sorted_scores:
    print(f"{sig_id}: {score:.4f}")



Classic DTW (X+Y) scores for writer 031:
031-24: 2.9892
031-23: 3.3009
031-05: 3.3500
031-41: 3.4155
031-18: 3.6298
031-11: 3.6468
031-34: 3.7409
031-36: 3.8756
031-15: 3.9114
031-28: 3.9117
031-31: 4.0415
031-02: 4.0790
031-14: 4.0875
031-33: 4.1292
031-25: 4.2284
031-26: 4.2382
031-06: 4.2643
031-35: 4.2716
031-22: 4.2965
031-39: 4.2999
031-10: 4.3858
031-07: 4.4266
031-01: 4.6216
031-45: 4.7091
031-32: 4.8317
031-04: 4.8434
031-12: 4.9149
031-30: 5.4484
031-40: 5.8002
031-17: 6.1365
031-43: 6.1452
031-38: 6.2048
031-21: 6.6401
031-09: 6.7501
031-03: 6.9539
031-37: 7.0601
031-20: 7.4213
031-19: 7.5768
031-13: 8.0754
031-16: 8.4684
031-27: 9.7753
031-44: 10.1644
031-42: 10.4354
031-29: 11.0916
031-08: 11.1593


In [None]:
output_file = "test.tsv"

with open(output_file, "w") as f:
    for writer_id, scores in dissimilarity_scores.items():
        # Sorted verification signatures 
        sorted_scores = sorted(scores.items(), key=lambda x: x[1])
        
        # Writer_ID  sig1  score1  sig2  score2  ...
        line_parts = [writer_id]
        for sig_id, score in sorted_scores:
            line_parts.append(sig_id)
            line_parts.append(f"{score:.6f}")  # Format will be  score to 6 decimal places
            
        line = "\t".join(line_parts)
        f.write(line + "\n")

print(f"Saved results to {output_file}")


Saved results to test.tsv
