# Brute-forcing hashed passwords

In this exercise you will notice how the method of storing a password may slow down an attacker.


Create a Python program that does the following ten times, using each of the hashing algorithms MD5, SHA-1, SHA-256:

● For each of the row and column combinations shown in the table below, randomly select a password of the specified length decided by the row and consisting of any of the characters decided by the column.

● Compute the hash for this password.

● In a loop, iterate through all possible passwords for the case you are testing.

    ○ Hash each password and compare against the test hash.

    ○ When you find a match, stop.

● Compute the amount of time it took to find a match.

![Alt text](./assign3_q8.png)

How long, on average, did it take to find a match for a random password in each case?

How many total symbols are in each input set?

Try the same process as above by using the hashing algorithm scrypt and a password of length 4 which consists of all printable characters, what do you notice?

After the conclusion of the execution of your program, the results with the average time it took to find a match for a random password in each case, should be printed in a table format as shown above for each hashing algorithm. In total you should print 3 tables, one for each algorithm. 

Nostatistics are required to be printed for the scrypt hashing algorithm.

In [None]:
import datetime
from multiprocessing import Manager, Pool
from multiprocessing.managers import ListProxy
import os
import secrets
import string
import timeit
from itertools import product
from statistics import mean
from typing import Iterable

import tabulate
from cryptography.hazmat.primitives.hashes import MD5, SHA1, SHA256, Hash, HashAlgorithm
from cryptography.hazmat.primitives.kdf.scrypt import Scrypt
from cryptography.exceptions import InvalidKey

DEBUG = 1
RUN_SCRYPT = False
MIN_LENGTH = 1

MAX_LENGTH = 4

In [None]:



def bruteforce_hash(pass_hash: bytes, search_space: Iterable[bytes], algorithm: HashAlgorithm):
    if DEBUG >= 3:
        start = datetime.datetime.now()
        print(f"Process started at {start.strftime('%H:%M:%S.%f')}")

    for candidate in search_space:
        hasher = Hash(algorithm)
        hasher.update(candidate)
        attempt = hasher.finalize()
        if pass_hash == attempt:
            if DEBUG >= 3:
                end = datetime.datetime.now()
                print(
                    f"Process finished at {end.strftime('%H:%M:%S.%f')}. Time taken: {end-start}")

            return
    else:
        raise ValueError("Password not found")


def hashing_process(search_space: list[bytes], algorithm: HashAlgorithm):
    hasher = Hash(algorithm)
    hasher.update(secrets.choice(search_space))
    pass_hash = hasher.finalize()

    time_taken = timeit.timeit(lambda: bruteforce_hash(pass_hash, search_space, algorithm),
                               number=1)
    if DEBUG >= 2:
        print(f"Process finished taking: {time_taken}")

    return time_taken


def benchmark_hashing(algorithm: HashAlgorithm, charset: dict[str, str], lengths: Iterable[int], iterations: int = 10):
    result = {}

    for num_char in lengths:
        for ss_name, search_space in charset.items():
            search_space = [
                "".join(word).encode() for word in product(search_space, repeat=num_char)]

            times = [hashing_process(search_space, algorithm)
                     for _ in range(iterations)]

            if result.get(ss_name, []) == []:
                result[ss_name] = []

            result[ss_name].append(mean(times))
            if DEBUG >= 1:
                print(
                    f"{algorithm.name} over {ss_name} of length {num_char} took {mean(times)} on average")

    return result


def bruteforce_scrypt(key: bytes, search_space: Iterable, salt: bytes, id):
    if DEBUG >= 3:
        start = datetime.datetime.now()
        print(f"Process {id} started at {start.strftime('%H:%M:%S.%f')}")

    for candidate in search_space:
        hasher = Scrypt(salt, 32, 2**14, 8, 1)
        try:
            hasher.verify(candidate, key)
        except InvalidKey:
            pass
        else:
            if DEBUG >= 3:
                end = datetime.datetime.now()
                print(
                    f"Process {id} finished at {end.strftime('%H:%M:%S.%f')}. Time taken: {end-start}")

            return
    else:
        raise ValueError("Password not found")


def scrypt_process(queue: ListProxy, search_space: list[bytes], id: int):
    salt = os.urandom(16)
    hasher = Scrypt(salt, 32, 2**14, 8, 1)
    key = hasher.derive(secrets.choice(search_space))

    time_taken = timeit.timeit(lambda: bruteforce_scrypt(key, search_space, salt, id),
                               number=1)
    if DEBUG >= 2:
        print(f"Process {id} finished taking: {time_taken}")

    queue.append(time_taken)


def benchmark_scrypt(charset: dict[str, str], lengths: Iterable[int], iterations: int = 10, cores: int | None = None):
    result = {}

    for num_char in lengths:
        for ss_name, search_space in charset.items():
            search_space = [
                "".join(word).encode() for word in product(search_space, repeat=num_char)]
            with Manager() as manager, Pool(processes=cores) as pool:
                times = manager.list()
                processes = [pool.apply_async(func=scrypt_process, args=(times, search_space, id+1))
                             for id in range(iterations)]

                [process.wait() for process in processes]

                if result.get(ss_name, []) == []:
                    result[ss_name] = []

                result[ss_name].append(mean(times))
                if DEBUG >= 1:
                    print(
                        f"scrypt over {ss_name} of length {num_char} took {mean(times)} on average")
    return result


def main():

    charset = {"Lowercase": string.ascii_lowercase,
               "Lower + Upper case": string.ascii_letters,
               "Letters + Numbers": string.ascii_letters+string.digits,
               "Printable": string.printable}
    algorithms = {"MD5": MD5,
                  "SHA1": SHA1,
                  "SHA256": SHA256}

    benchmarks = [(alg_name, benchmark_hashing(algorithm(), charset, range(MIN_LENGTH, MAX_LENGTH+1)))
                  for alg_name, algorithm in algorithms.items()]

    for alg_name, benchmark in benchmarks:
        print(f"{alg_name}:")
        print(tabulate.tabulate(benchmark,
                                headers=['#char']+list(charset.keys()),
                                showindex=range(MIN_LENGTH, MAX_LENGTH+1),
                                tablefmt="fancy_grid",
                                floatfmt=".10f",
                                ))
        print()

    if RUN_SCRYPT:
        print("scrypt:")
        print(tabulate.tabulate(benchmark_scrypt({"Printable": string.printable}, (4,)),
                                headers=['#char', 'Printable'],
                                showindex=(4,),
                                tablefmt="fancy_grid",
                                floatfmt=".10f",
                                ))


if __name__ == "__main__":
    main()