<a href="https://colab.research.google.com/github/PhilipGunjari12/Projects/blob/main/Fingerprint_Based_Cryptographic_Key_Generation_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

gpkchvp_data_sets_path = kagglehub.dataset_download('gpkchvp/data-sets')

print('Data source import complete.')


**Install packages**

In [None]:
!pip install datasketch
!pip install reedsolo
!pip install cryptography

**Import libraries**

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import math
import reedsolo
import hashlib
from collections import defaultdict
import tensorflow as tf
from datasketch import MinHash
from cryptography.hazmat.primitives.asymmetric import ec
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
from cryptography.hazmat.primitives.hashes import SHA256
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.asymmetric import ec
from cryptography.hazmat.backends import default_backend
from tensorflow.keras.applications.mobilenet_v3 import preprocess_input
from tensorflow.keras.applications import MobileNetV3Small
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Input


**Contents Of DB1_A DATASET**

In [None]:
dataset_path_a = "/kaggle/input/data-sets/74034_3_En_4_MOESM1_ESM/FVC2004/Dbs/DB1_A/"
dataset_path_b = "/kaggle/input/data-sets/74034_3_En_4_MOESM1_ESM/FVC2004/Dbs/DB1_B/"
def extract_two_numbers(filename):
    parts = filename.split('_')
    return (int(parts[0]), int(parts[1].split('.')[0]))

sorted_filenames_a = sorted(
    [f for f in os.listdir(dataset_path_a) if f.endswith('.tif')],
    key=extract_two_numbers
)

print("Contents of dataset folder:", sorted_filenames_a)

**Contents Of DB1_B DATASET**

In [None]:
sorted_filenames_b = sorted(
    [f for f in os.listdir(dataset_path_b) if f.endswith('.tif')],
    key=extract_two_numbers
)

print("Contents of dataset folder:", sorted_filenames_b)

**Load DB1_A & DB1_B Datasets**

In [None]:
def load_rgb_images_from_files(file_list, folder_path, img_size=(128, 128)):
    images = []
    for filename in file_list:
        img_path = os.path.join(folder_path, filename)
        img_gray = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img_gray is None:
            continue
        img_gray = cv2.resize(img_gray, img_size)
        img_rgb = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2RGB)
        images.append(img_rgb)
    return np.array(images)

X = load_rgb_images_from_files(sorted_filenames_a, dataset_path_a)
X = X / 255.0  # Normalize
print(f"Loaded {len(X)} RGB images with shape {X.shape}")

Y = load_rgb_images_from_files(sorted_filenames_b, dataset_path_b)
Y = Y / 255.0  # Normalize
print(f"Loaded {len(Y)} RGB images with shape {Y.shape}")


**Display DB1_A DATASET**

In [None]:

images_by_person_a = defaultdict(list)
for fname, img in zip(sorted_filenames_a, X):
    person_id = fname.split('_')[0]
    images_by_person_a[person_id].append(img)

for idx, (person_id, imgs) in enumerate(images_by_person_a.items()):
    if idx >= 1:
        print("\nChange the number to display more persons...\n")
        break

    print(f"\nPerson ID: {person_id} | Total Images: {len(imgs)}")

    plt.figure(figsize=(15, 2))
    for i, img in enumerate(imgs):
        plt.subplot(1, len(imgs), i+1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(f"{i+1}")
    plt.suptitle(f" Fingerprints of Person {person_id}")
    plt.show()


**Display DB1_A DATASET**

In [None]:

images_by_person_b = defaultdict(list)
for fname, img in zip(sorted_filenames_b, Y):
    person_id = fname.split('_')[0]
    images_by_person_b[person_id].append(img)


for idx, (person_id, imgs) in enumerate(images_by_person_b.items()):
    if idx >= 1:
        print("\nChange the number to display more persons...\n")
        break

    print(f"\nPerson ID: {person_id} | Total Images: {len(imgs)}")

    plt.figure(figsize=(15, 2))
    for i, img in enumerate(imgs):
        plt.subplot(1, len(imgs), i+1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(f"{i+1}")
    plt.suptitle(f" Fingerprints of Person {person_id}")
    plt.show()


**MobileNetV3 Model for Feature Extraction**

In [None]:

def build_mobilenetv3_feature_extractor(input_shape=(128, 128, 3)):
    #base_model = MobileNetV2(input_shape=input_shape, include_top=False, weights=None)
    base_model = MobileNetV3Small(input_shape=(128, 128, 3), include_top=False, weights=None)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(64, activation='relu')(x)
    model = Model(inputs=base_model.input, outputs=x)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

model = build_mobilenetv3_feature_extractor()
model.summary()


**Train DB1_A Dataset**

In [None]:
y = np.random.rand(len(X), 64)
model.fit(X, y, epochs=200, batch_size=16, verbose=1)

**Train DB1_B Dataset**

In [None]:
y2 = np.random.rand(len(Y), 64)
model.fit(Y, y2, epochs=200, batch_size=16, verbose=1)

**Feature vectors of DB1_A**

In [None]:
def extract_feature_vector(image_rgb):
    img = cv2.resize(image_rgb, (128, 128))
    img = img.reshape(1, 128, 128, 3) / 255.0
    return model.predict(img, verbose=0).flatten()

person_features_a = defaultdict(list)

for person_id, imgs in images_by_person_a.items():
    for img in imgs:
        vec = extract_feature_vector(img)
        person_features_a[person_id].append(vec)

for person_id in person_features_a:
    person_features_a[person_id] = np.array(person_features_a[person_id])

sample_id = list(person_features_a.keys())[0]
print(f"\nFeature Vectors for Person {sample_id} | Total: {len(person_features_a[sample_id])}")
for i, vec in enumerate(person_features_a[sample_id]):
    print(f"  Feature Vector {i+1}: {vec}\n")


**Feature vectors of DB1_B**

In [None]:
def extract_feature_vector(image_rgb):
    img = cv2.resize(image_rgb, (128, 128))
    img = img.reshape(1, 128, 128, 3) / 255.0
    return model.predict(img, verbose=0).flatten()

person_features_b = defaultdict(list)

for person_id, imgs in images_by_person_b.items():
    for img in imgs:
        vec = extract_feature_vector(img)
        person_features_b[person_id].append(vec)

for person_id in person_features_b:
    person_features_b[person_id] = np.array(person_features_b[person_id])

sample_id = list(person_features_b.keys())[0]
print(f"\nFeature Vectors for Person {sample_id} | Total: {len(person_features_b[sample_id])}")
for i, vec in enumerate(person_features_b[sample_id]):
    print(f"  Feature Vector {i+1}: {vec}\n")


**Feature Quantization and Secure Key Binding with Reed-Solomon Error Correction**

In [None]:
def quantize_features(fv, decimals=2):
    return np.round(fv, decimals=decimals)

rs = reedsolo.RSCodec(32)

def enroll(feature_vector):
    quantized = quantize_features(feature_vector)
    data_bytes = bytearray(np.clip((quantized * 100).astype(int), 0, 255).tolist()[:223])
    key = os.urandom(16)
    codeword = rs.encode(key)
    helper = bytearray(a ^ b for a, b in zip(codeword, data_bytes))
    return helper, hashlib.sha256(key).hexdigest()

def authenticate(feature_vector, helper, stored_hash):
    quantized = quantize_features(feature_vector)
    data_bytes = bytearray(np.clip((quantized * 100).astype(int), 0, 255).tolist()[:223])
    codeword = bytearray(a ^ b for a, b in zip(helper, data_bytes))
    try:
        recovered_key = rs.decode(codeword)[0]
        recovered_hash = hashlib.sha256(recovered_key).hexdigest()
        return recovered_hash == stored_hash
    except:
        return False


**Key Generation Function**

In [None]:
def generate_stable_key(feature_vector, num_hashes=149):

    feature_vector = np.array(feature_vector)
    feature_vector = (feature_vector - feature_vector.min()) / (feature_vector.max() - feature_vector.min() + 1e-8)

    minhash = MinHash(num_perm=num_hashes)
    for val in feature_vector:
        val_str = f"{val:.6f}"
        minhash.update(val_str.encode("utf-8"))

    minhash_digest = minhash.digest()
    rs = reedsolo.RSCodec(10)
    encoded_digest = rs.encode(minhash_digest)

    stable_key = hashlib.sha256(encoded_digest).hexdigest()
    return stable_key, encoded_digest


**Generate Stable Keys for DB1_A**

In [None]:

stable_keys_per_person_a = defaultdict(list)

for person_id, feature_array in person_features_a.items():
    for i, feature_vec in enumerate(feature_array):
        stable_key, _ = generate_stable_key(feature_vec)
        stable_keys_per_person_a[person_id].append(stable_key)

print(f" Generated stable keys for {len(stable_keys_per_person_a)} persons.\n")

sample_id = list(stable_keys_per_person_a.keys())[0]
print(f" Person ID: {sample_id} | Total Keys: {len(stable_keys_per_person_a[sample_id])}")
for i, key in enumerate(stable_keys_per_person_a[sample_id]):
    print(f"  Key {i+1}: {key}")


**Generate Stable Keys for DB1_B**

In [None]:
stable_keys_per_person_b = defaultdict(list)

for person_id, feature_array in person_features_b.items():
    for i, feature_vec in enumerate(feature_array):
        stable_key, _ = generate_stable_key(feature_vec)
        stable_keys_per_person_b[person_id].append(stable_key)

print(f" Generated stable keys for {len(stable_keys_per_person_b)} persons.\n")

sample_id = list(stable_keys_per_person_b.keys())[0]
print(f" Person ID: {sample_id} | Total Keys: {len(stable_keys_per_person_b[sample_id])}")
for i, key in enumerate(stable_keys_per_person_b[sample_id]):
    print(f"  Key {i+1}: {key}")


**Hamming Distance for Genuine Fingerprint Pairs**

In [None]:

from itertools import combinations

def hamming_distance(hex1, hex2):
    bin1 = bin(int(hex1, 16))[2:].zfill(256)
    bin2 = bin(int(hex2, 16))[2:].zfill(256)
    return sum(c1 != c2 for c1, c2 in zip(bin1, bin2))

print("\n Genuine Hamming Distances (same person):\n")

for person_id, keys in stable_keys_per_person_a.items():
    print(f" Person {person_id}:")

    for i, j in combinations(range(len(keys)), 2):  # all pairs
        hd = hamming_distance(keys[i], keys[j])
        print(f"   Key {i+1} vs Key {j+1} → Hamming Distance: {hd}")



**Impostor Hamming Distances (first key of different persons)**

In [None]:
print("\n Impostor Hamming Distances (first key of different persons):\n")

# Collect first key from each person
first_keys = {pid: keys[0] for pid, keys in stable_keys_per_person_b.items() if len(keys) > 0}

for (pid1, key1), (pid2, key2) in combinations(first_keys.items(), 2):
    hd = hamming_distance(key1, key2)
    print(f" Person {pid1} vs Person {pid2} → Hamming Distance: {hd}")


**Mapping Of DB1_A & DB1_B IDs**

In [None]:
standardized_keys_a = {}
for pid_str, keys in stable_keys_per_person_a.items():
    pid = int(pid_str)
    if 1 <= pid <= 10:
        new_pid = str(100 + pid)
        standardized_keys_a[new_pid] = keys


In [None]:
print("\n Remapped person IDs in DB1_A:", list(standardized_keys_a.keys()))
print(" Person IDs in DB1_B:", list(stable_keys_per_person_b.keys()))

# Print key counts for common IDs
common_ids = sorted(set(standardized_keys_a) & set(stable_keys_per_person_b))
print(f" Common IDs: {common_ids}\n")

for pid in common_ids:
    len_a = len(standardized_keys_a[pid])
    len_b = len(stable_keys_per_person_b[pid])
    print(f" Person {pid}: A = {len_a} keys, B = {len_b} keys")


**EER Computation**

In [None]:
def compute_eer_and_print(genuine_distances, impostor_distances, key_length):
    if not genuine_distances or not impostor_distances:
        print(f" Couldn't compute EER for key length {key_length} (empty distance list)")
        return

    min_th = min(genuine_distances + impostor_distances)
    max_th = max(genuine_distances + impostor_distances)
    thresholds = range(min_th, max_th + 1)

    eer = None
    eer_threshold = None

    print(f"\n EER Calculation for Key Length = {key_length} bits:\n")
    print("Threshold |   FAR (%) |   FRR (%) | |FAR - FRR|")

    for th in thresholds:
        FAR = sum(d <= th for d in impostor_distances) / len(impostor_distances)
        FRR = sum(d > th for d in genuine_distances) / len(genuine_distances)
        diff = abs(FAR - FRR)

        print(f"{th:9} | {FAR*100:9.2f} | {FRR*100:9.2f} | {diff:.4f}")

        if eer is None or diff < abs(eer[0] - eer[1]):
            eer = (FAR, FRR)
            eer_threshold = th

    print(f"\n Equal Error Rate (EER) for Key Length {key_length}:")
    print(f"   → Threshold = {eer_threshold}")
    print(f"   → FAR = {eer[0]*100:.2f}%, FRR = {eer[1]*100:.2f}%")
    print(f"   → EER = {(eer[0]*100 + eer[1]*100)/2:.2f}%")

genuine_distances = []
impostor_distances = []

for pid in common_ids:
    keys_a = standardized_keys_a[pid]
    keys_b = stable_keys_per_person_b[pid]
    for k1 in keys_a:
        for k2 in keys_b:
            genuine_distances.append(hamming_distance(k1, k2))

for pid1, pid2 in combinations(common_ids, 2):
    k1 = standardized_keys_a[pid1][0]
    k2 = stable_keys_per_person_b[pid2][0]
    impostor_distances.append(hamming_distance(k1, k2))


current_key_length = 128
compute_eer_and_print(genuine_distances, impostor_distances, current_key_length)


**Entropy Computation**

In [None]:

def shannon_entropy(p):
    if p == 0 or p == 1:
        return 0.0
    return -p * math.log2(p) - (1 - p) * math.log2(1 - p)

def compute_entropy_from_keys(hex_keys):
    binary_keys = [bin(int(k, 16))[2:].zfill(256) for k in hex_keys]
    bit_columns = list(zip(*binary_keys))  # bit i = all i-th bits from keys

    entropies = []
    for col in bit_columns:
        ones = col.count('1')
        total = len(col)
        p_one = ones / total
        entropy = shannon_entropy(p_one)
        entropies.append(entropy)

    return sum(entropies), entropies

all_keys_a = [k for keys in stable_keys_per_person_a.values() for k in keys]
all_keys_b = [k for keys in stable_keys_per_person_b.values() for k in keys]

entropy_a, bitwise_a = compute_entropy_from_keys(all_keys_a)
entropy_b, bitwise_b = compute_entropy_from_keys(all_keys_b)

print(f" DB1_A Total Entropy: {entropy_a:.4f} bits")
print(f" DB1_B Total Entropy: {entropy_b:.4f} bits")
print(f" Avg Bitwise Entropy A: {sum(bitwise_a)/256:.4f}")
print(f" Avg Bitwise Entropy B: {sum(bitwise_b)/256:.4f}")