# Hash algorithm avalanche property

Create a Python program that computes the MD5, SHA-1 and SHA-256 sum of the contents of a given text file and then again those hash sums of the contents of the same file after changing the first character read from that text file to “A”.

Then for each hashing algorithm, compare the bit changes between the original hash digest and the hash digest of the modified file and print what percentage of bits changed.

You can assume that the file will never be empty and that the first character will never be “A”.

In [None]:
import sys
from cryptography.hazmat.primitives import hashes
import argparse



In [None]:

def bytes_to_int(text: bytes):
    return int.from_bytes(text, 'big')


def int_to_bytes(num):
    num_int = int(num)
    return int.to_bytes(num_int, (num_int.bit_length()+7)//8, "big")


In [None]:

def xor(a: bytes, b: bytes):
    return bytes([char_a ^ char_b for char_a, char_b in zip(a, b)])


def get_diff(a: bytes, b: bytes):
    return bytes_to_int(xor(a, b)).bit_count()


def main(arguments):
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        "-f", "--file", help="String to be digest", type=str)

    args = parser.parse_args(arguments)

    if args.file:
        contents: bytes = args.file.encode()

        md5_hasher = hashes.Hash(hashes.MD5())
        md5_hasher.update(contents)
        md5_hash_orig = md5_hasher.finalize()

        sha1_hasher = hashes.Hash(hashes.SHA1())
        sha1_hasher.update(contents)
        sha1_hash_orig = sha1_hasher.finalize()

        sha256_hasher = hashes.Hash(hashes.SHA256())
        sha256_hasher.update(contents)
        sha256_hash_orig = sha256_hasher.finalize()

        contents = b'A'+contents[1:]
        md5_hasher = hashes.Hash(hashes.MD5())
        md5_hasher.update(contents)
        md5_hash_changed = md5_hasher.finalize()

        sha1_hasher = hashes.Hash(hashes.SHA1())
        sha1_hasher.update(contents)
        sha1_hash_changed = sha1_hasher.finalize()

        sha256_hasher = hashes.Hash(hashes.SHA256())
        sha256_hasher.update(contents)
        sha256_hash_changed = sha256_hasher.finalize()

        print(
            f"MD5:\t{get_diff(md5_hash_orig, md5_hash_changed)/(len(md5_hash_orig)*8)*100: .3f}%")
        print(
            f"SHA1:\t{get_diff(sha1_hash_orig,sha1_hash_changed)/(len(sha1_hash_orig)*8)*100: .3f}%")
        print(
            f"SHA256:\t{get_diff(sha256_hash_orig,sha256_hash_changed)/(len(sha256_hash_orig)*8)*100: .3f}%")




In [None]:
sys.argv = [
    "",
    "-f", "Hello, World!"
    ]
main(sys.argv[1:])