# Hash functions

In [28]:
import hashlib
import time
import random
import string
from scipy import stats

data = input("Enter data to hash: ")
data = data.encode('utf-8')

# 1. Let's explore the hashes generated by the most popular functions.

In [29]:
def functions_presentation(data):
    # MD5
    md5_hash = hashlib.md5(data).hexdigest()
    print("MD5 Hash:", md5_hash)

    # SHA-1
    sha1_hash = hashlib.sha1(data).hexdigest()
    print("SHA-1 Hash:", sha1_hash)

    # SHA-2 (SHA-256)
    sha256_hash = hashlib.sha256(data).hexdigest()
    print("SHA-256 Hash:", sha256_hash)

    # SHA-2 (SHA-512)
    sha512_hash = hashlib.sha512(data).hexdigest()
    print("SHA-512 Hash:", sha512_hash)

    # SHA-3 (SHA-3-256)
    sha3_256_hash = hashlib.sha3_256(data).hexdigest()
    print("SHA-3-256 Hash:", sha3_256_hash)

    # SHA-3 (SHA-3-512)
    sha3_512_hash = hashlib.sha3_512(data).hexdigest()
    print("SHA-3-512 Hash:", sha3_512_hash)
    
functions_presentation(data)

MD5 Hash: 8d777f385d3dfec8815d20f7496026dc
SHA-1 Hash: a17c9aaa61e80a1bf71d0d850af4e5baa9800bbd
SHA-256 Hash: 3a6eb0790f39ac87c94f3856b2dd2c5d110e6811602261a9a923d3bb23adc8b7
SHA-512 Hash: 77c7ce9a5d86bb386d443bb96390faa120633158699c8844c30b13ab0bf92760b7e4416aea397db91b4ac0e5dd56b8ef7e4b066162ab1fdc088319ce6defc876
SHA-3-256 Hash: efda893aa850b0c0e61f33325615b9d93bcf6b42d60d8f5d37ebc720fd4e3daf
SHA-3-512 Hash: ceca4daf960c2bbfb4a9edaca9b8137a801b65bae377e0f534ef9141c8684c0fedc1768d1afde9766572846c42b935f61177eaf97d355fa8dc2bca3fecfa754d


# 2. And measure their performance in terms of speed.

In [30]:
def generate_random_string(min_length, max_length):
    length = random.randint(min_length, max_length)
    return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))

def time_measurement(hash_function, num_strings = 1000, min_length = 1, max_length = 10000):
    strings = [generate_random_string(min_length, max_length) for _ in range(num_strings)]
    start_time = time.time()
    for string in strings:
        encoded_data = string.encode('utf-8')
        hash_function(encoded_data).hexdigest()
    end_time = time.time()
    return end_time - start_time

def time_comparision():
    # Measure time for MD5
    md5_time = time_measurement(hashlib.md5)
    print("MD5 Time:", md5_time)

    # Measure time for SHA-1
    sha1_time = time_measurement(hashlib.sha1)
    print("SHA-1 Time:", sha1_time)

    # Measure time for SHA-256
    sha256_time = time_measurement(hashlib.sha256)
    print("SHA-256 Time:", sha256_time)

    # Measure time for SHA-512
    sha512_time = time_measurement(hashlib.sha512)
    print("SHA-512 Time:", sha512_time)

    # Measure time for SHA-3-256
    sha3_256_time = time_measurement(hashlib.sha3_256)
    print("SHA-3-256 Time:", sha3_256_time)

    # Measure time for SHA-3-512
    sha3_512_time = time_measurement(hashlib.sha3_512)
    print("SHA-3-512 Time:", sha3_512_time)

time_comparision()

MD5 Time: 0.020796537399291992
SHA-1 Time: 0.01561594009399414
SHA-256 Time: 0.03124213218688965
SHA-512 Time: 0.015603303909301758
SHA-3-256 Time: 0.031243085861206055
SHA-3-512 Time: 0.05622744560241699


# 3. Now, verify if the hash of a very short word matches a hash from a rainbow table.

In [31]:
test = b'test'
md5_short_hash = hashlib.md5(test).hexdigest()
print("MD5 Hash of a short word:", md5_short_hash)

MD5 Hash of a short word: 098f6bcd4621d373cade4e832627b4f6


<img src="MD5hash.jpg" alt="Description">

Even ChatGPT recognizes this hash, leading us to conclude that short passwords are undoubtedly unsafe.

# 4. MD5 safety

There were a few notable examples of MD5 collisions:

* Flame malware collision: In 2012, researchers found a collision in MD5 that allowed them to create two different executable files with the same MD5 hash. This collision was used in the Flame malware to generate fake digital certificates.

* Certificate authority attack: In 2008, researchers demonstrated a practical collision attack against MD5 that allowed them to create a rogue certificate authority (CA) certificate with the same MD5 hash as a legitimate CA certificate. This could potentially be used to issue fraudulent SSL certificates.

* FastColl tool: In 2007, researchers developed the FastColl tool, which could generate MD5 collisions in less than a minute on a standard PC. This tool highlighted the practicality of collision attacks against MD5.

# 5. Examining collisions

Let's examine if we can detect collisions within the first 12 bits for the SHA3_512 function.

In [32]:
# Function to generate SHA-3-512 hash and extract first 12 bits
def sha3_512_hash_first_12_bits(data):
    hash_value = hashlib.sha3_512(data.encode()).hexdigest()
    return hash_value[:3]  # Extract first 12 bits (3 hex characters)

# Generate inputs and compute hashes
min_length = 10
max_length = 10000
num_strings = 2**12
inputs = [generate_random_string(min_length, max_length) for _ in range(num_strings)]
hashed_values = {}

for input_data in inputs:
    hash_value = sha3_512_hash_first_12_bits(input_data)
    if hash_value not in hashed_values:
        hashed_values[hash_value] = [input_data]
    else:
        hashed_values[hash_value].append(input_data)

# Identify collisions
collisions = [inputs for inputs in hashed_values.values() if len(inputs) > 1]

# Print collisions
if collisions:
    print("There are", len(collisions), "collisions.")
    '''
    print("Collisions found:")
    for collision in collisions:
        print("Inputs:", collision)
    '''
else:
    print("No collisions found.")


There are 1090 collisions.


Collisions are inevitable, particularly within the first 12 bits of a 1024-bit hash. However, SHA3_512 significantly outperforms randomly assigning hashes. Moreover, the likelihood of collisions diminishes exponentially as we consider more leading bits.

# 6. Strict Avalanche Criterium

In [33]:
def string_to_binary(input_string):
    # Convert the input string to binary representation
    binary_representation = ''.join(format(ord(char), '08b') for char in input_string)
    return binary_representation

def binary_to_string(binary_representation):
    # Join the list of binary digits into a single binary string
    binary_string = ''.join(binary_representation)
    # Split the binary string into 8-bit chunks
    chunks = [binary_string[i:i+8] for i in range(0, len(binary_string), 8)]
    # Convert each 8-bit chunk to its corresponding ASCII character
    string_representation = ''.join(chr(int(chunk, 2)) for chunk in chunks)
    return string_representation

def hash_to_binary(hash_string):
    # Convert hexadecimal hash string to binary string
    binary_representation = bin(int(hash_string, 16))[2:]
    # Pad with leading zeros to ensure each chunk has 4 bits
    binary_representation = '0' * (len(hash_string) * 4 - len(binary_representation)) + binary_representation
    return binary_representation

def sac_test(num_tests):
    failed = 0
    passed = 0
    average_probability = 0
    for _ in range(num_tests):
        original_input = generate_random_string(min_length=10, max_length=100)
        binary_input = list(string_to_binary(original_input))
        index_to_flip = random.randint(0, len(binary_input) - 1)
        binary_input[index_to_flip] = '1' if binary_input[index_to_flip] == '0' else '0'
        modified_input = binary_to_string(binary_input)
        
        hash_original = hashlib.sha3_256(original_input.encode('utf-8')).hexdigest()
        hash_modified = hashlib.sha3_256(modified_input.encode('utf-8')).hexdigest()

        hash_original_binary = hash_to_binary(hash_original)
        hash_modified_binary = hash_to_binary(hash_modified)

        length = len(hash_original_binary)
        # Count flipped bits
        flipped_bits = 0
        for i in range(length):
            if hash_original_binary[i] != hash_modified_binary[i]:
                flipped_bits += 1
        probability = flipped_bits / length
        #print(probability)
        average_probability+=probability
        if abs(probability - 0.5) > 0.05:  # Allow for a 5% margin of error
            failed+=1
        else:
            passed+=1
    average_probability/=num_tests
    print("Average probability of flipping:", average_probability)
    print("Number of individual tests failed:", failed)
    print("Number of individual tests passed:", passed)
    if(abs(average_probability - 0.5) < 0.05):
        return True
    else:
        return False

num_tests = 1000
if sac_test(num_tests):
    print("SAC test passed.")
else:
    print("SAC test failed.")


Average probability of flipping: 0.50152734375
Number of individual tests failed: 116
Number of individual tests passed: 884
SAC test passed.


SHA3-512 passes the SAC test because the probability of bit flipping is just about 50%.

# 7. Salting

The role of salting in hashing is to add a unique, random value to each plaintext before hashing, enhancing security by mitigating against rainbow table attacks and increasing entropy. This means even if two plaintexts are identical, their hashes will be different due to the unique salt.

**Example:**

1. **Without salt:**
   - `Hash("password123")` = `5f4dcc3b5aa765d61d8327deb882cf99`

2. **With salt:**
   - Salt = `abc123`
   - `Hash("password123" + "abc123")` = `f5000cd08879a020bc7d62d174798a7a`
