In [None]:
import os
import csv
import numpy as np
from PIL import Image
import imagehash
import pandas as pd
from hexhamming import hamming_distance_string

In [None]:
input_folders = [
    ("Images/Genuine_Minutiae/Minutiae", "Hashes/Genuine"),
    ("Images/Impostor_Minutiae/Minutiae", "Hashes/Impostor")
]

hamming_folder = "Hashes"
output_hamming_folder = "HammingDistances"
prob_folder = "Probabilities"
hash_types = ["average_hash", "phash", "dhash", "whash"]

if not hasattr(Image, 'ANTIALIAS'):
    Image.ANTIALIAS = Image.Resampling.LANCZOS

def hashCalculation(hashTechnique, input_filepaths, output_loc):
    header = ['F_id', 'Hash']
    HashValuesList = []
    for filepath in input_filepaths:
        fileName = os.path.basename(filepath)
        fileName_without_ext = os.path.splitext(fileName)[0]
        hash_id = "f_" + fileName_without_ext
        match hashTechnique:
            case "average_hash":
                h = imagehash.average_hash(Image.open(filepath))
            case "phash":
                h = imagehash.phash(Image.open(filepath))
            case "dhash":
                h = imagehash.dhash(Image.open(filepath))
            case "whash":
                h = imagehash.whash(Image.open(filepath))
            case _:
                print("Please input a correct hash technique listed")
                continue
        HashValuesList.append([hash_id, str(h)])

    with open(output_loc, 'w', newline="") as f:
        writer = csv.writer(f)
        writer.writerow(header)
        writer.writerows(HashValuesList)

def calculateHammingDistance(hash1, hash2):
    return hamming_distance_string(hash1, hash2)

def tableforGenuine(hamming_distances, threshold):
    count = sum(1 for dist in hamming_distances if dist <= threshold)
    return count / len(hamming_distances) if hamming_distances else 0

def tableforImpostor(hamming_distances, threshold):
    count = sum(1 for dist in hamming_distances if dist >= threshold)
    return count / len(hamming_distances) if hamming_distances else 0

os.makedirs(output_hamming_folder, exist_ok=True)
os.makedirs(prob_folder, exist_ok=True)

for input_dir, output_prefix in input_folders:
    if not os.path.exists(input_dir):
        os.makedirs(input_dir)
        print(f"Created directory: {input_dir}. Please add .jpg images to this directory.")
        continue

    input_filepaths = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.lower().endswith(".jpg")]

    if not input_filepaths:
        print(f"No .jpg images found in the input directory {input_dir}.")
        continue

    os.makedirs(os.path.dirname(output_prefix), exist_ok=True)

    for hashTechnique in hash_types:
        output_csv = f"{output_prefix}_{hashTechnique}.csv"
        hashCalculation(hashTechnique, input_filepaths, output_csv)

for hash_type in hash_types:
    genuine_path = f"{hamming_folder}/Genuine_{hash_type}.csv"
    impostor_path = f"{hamming_folder}/Impostor_{hash_type}.csv"
    output_genuine_hd = f"{output_hamming_folder}/Genuine_{hash_type}.csv"
    output_impostor_hd = f"{output_hamming_folder}/Impostor_{hash_type}.csv"
    genuine_df = pd.read_csv(genuine_path)
    impostor_df = pd.read_csv(impostor_path)
    min_len = min(len(genuine_df), len(impostor_df))
    genuine_hd_results = [
        [genuine_df.loc[i, "F_id"], calculateHammingDistance(genuine_df.loc[i, "Hash"], impostor_df.loc[i, "Hash"])]
        for i in range(min_len)
    ]
    with open(output_genuine_hd, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['F_id', 'Hamming Distance'])
        writer.writerows(genuine_hd_results)

    impostor_hd_results = []
    for i in range(len(genuine_df)):
        for j in range(i + 1, len(genuine_df)):
            hd = calculateHammingDistance(genuine_df.loc[i, "Hash"], genuine_df.loc[j, "Hash"])
            impostor_hd_results.append([genuine_df.loc[i, "F_id"], genuine_df.loc[j, "F_id"], hd])

    with open(output_impostor_hd, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['F_id1', 'F_id2', 'Hamming Distance'])
        writer.writerows(impostor_hd_results)

thresholdList = [3,6,9,12,15,18,21,24,27,30,33,36]

for hash_type in hash_types:
    genuine_hd_path = f"{output_hamming_folder}/Genuine_{hash_type}.csv"
    impostor_hd_path = f"{output_hamming_folder}/Impostor_{hash_type}.csv"
    genuine_prob_path = f"{prob_folder}/Genuine_{hash_type}.csv"
    impostor_prob_path = f"{prob_folder}/Impostor_{hash_type}.csv"

    genuine_hd_df = pd.read_csv(genuine_hd_path)
    impostor_hd_df = pd.read_csv(impostor_hd_path)

    genuine_distances = genuine_hd_df["Hamming Distance"].tolist()
    impostor_distances = impostor_hd_df["Hamming Distance"].tolist()

    genuine_probs = {thr: round(tableforGenuine(genuine_distances, thr), 4) for thr in thresholdList}
    impostor_probs = {thr: round(tableforImpostor(impostor_distances, thr), 4) for thr in thresholdList}

    with open(genuine_prob_path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['Threshold', 'Frequency of Fingerprints'])
        writer.writerows(genuine_probs.items())

    with open(impostor_prob_path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['Threshold', 'Frequency of Fingerprints'])
        writer.writerows(impostor_probs.items())

print("\n*** Processed Completed ***")