In [None]:
import hashlib
import time
import pandas as pd
from collections import Counter
import math
import psutil

# Define hash functions
def sha256(data):
    return hashlib.sha256(data).digest()

def sha512(data):
    return hashlib.sha512(data).digest()

def sha3_256(data):
    return hashlib.sha3_256(data).digest()

def sha3_512(data):
    return hashlib.sha3_512(data).digest()

def md5(data):
    return hashlib.md5(data).digest()

# Define a dictionary of hash functions
hash_functions = {
    'SHA-256': sha256,
    'SHA-512': sha512,
    'SHA-3-256': sha3_256,
    'SHA-3-512': sha3_512,
    'MD5': md5,
}
# Load the dataset
file_path = r'C:\Users\REPA IT SOLUTION\Downloads\modified_votes_dataset.csv'  # Adjust the path if needed
votes_df = pd.read_csv(file_path)

# Generate combined string for hashing
def generate_combined_string(votes_df):
    return votes_df.to_string().encode()

# Metrics calculation functions
def measure_speed(hash_func, data):
    start_time = time.time()
    hash_func(data)
    return time.time() - start_time

def test_collisions(hash_func, votes_df, num_samples=10000):
    hashes = set()
    collisions = 0
    for _ in range(num_samples):
        sample = votes_df.sample().to_string().encode()
        hash_value = hash_func(sample)
        if hash_value in hashes:
            collisions += 1
        hashes.add(hash_value)
    return collisions

def per_image_resistance(hash_func, original, modified):
    original_hash = hash_func(original)
    modified_hash = hash_func(modified)
    return original_hash != modified_hash

def second_per_image_resistance(hash_func, votes_df, num_hashes=1000):
    start_time = time.time()
    for _ in range(num_hashes):
        sample = votes_df.sample().to_string().encode()
        hash_func(sample)
    return time.time() - start_time

def calculate_entropy(hash_func, data):
    hash_output = hash_func(data)
    byte_counts = Counter(hash_output)
    total_bytes = len(hash_output)
    entropy = -sum(count / total_bytes * math.log2(count / total_bytes) for count in byte_counts.values())
    return entropy

def memory_test(hash_func, data):
    process = psutil.Process()
    mem_usage_before = process.memory_info().rss  # Current memory usage
    hash_func(data)
    mem_usage_after = process.memory_info().rss  # Memory usage after hashing
    return (mem_usage_after - mem_usage_before) / (1024 * 1024)  # Convert to MB

def efficiency_metric(speed, collisions, entropy, memory_usage):
    return 1 / (speed + collisions + (1 - entropy) + memory_usage + 1e-10)  # Avoid division by zero

# Main function to run all metrics for each hash function
def run_multiple_evaluations(votes_df, iterations=100):
    combined_data = generate_combined_string(votes_df)  # Get combined data for hashing
    modified_data = combined_data + b"modified"  # Slightly modified data for per-image resistance test

    results_list = []  # List to store all results

    for i in range(iterations):
        print(f"Iteration {i + 1} Results:")
        for name, func in hash_functions.items():
            # Run each metric
            speed = measure_speed(func, combined_data)
            collisions = test_collisions(func, votes_df)
            per_image_res = per_image_resistance(func, combined_data, modified_data)
            second_res = second_per_image_resistance(func, votes_df)
            entropy = calculate_entropy(func, combined_data)
            memory_usage = memory_test(func, combined_data)
            efficiency = efficiency_metric(speed, collisions, entropy, memory_usage)

            # Store the result
            result = {
                'Iteration': i + 1,
                'Hash Function': name,
                'Speed (s)': speed,
                'Collisions': collisions,
                'Per-Image Resistance': per_image_res,
                'Second-per-Image Resistance (s)': second_res,
                'Entropy': entropy,
                'Memory Usage (MiB)': memory_usage,
                'Efficiency Score': efficiency
            }
            results_list.append(result)  # Append the result to the list
            print(result)  # Print the result dictionary
        print("-" * 50)  # Separator between iterations

    # Convert results to DataFrame
    results_df = pd.DataFrame(results_list)

    # Display results DataFrame
    print("\nAll Results:")
    print(results_df)
    
    # Save to CSV
    results_df.to_csv('hash_functions_results.csv', index=False)  # Save to CSV
    print("Results saved to 'hash_functions_results.csv'.")

# Run the evaluation 500 times and display results
run_multiple_evaluations(votes_df, iterations=500)

