# Undestanding SHA-256 from First Principles

This is a project where we're going to figure out, from scratch, how to build the SHA-256 algorithm, which is a one-way cryptographic function, converting any input, such as `Hi!`, into the 256-bit (32-bye) hash: `ca51ce1fb15acc6d69b8a5700256172fcc507e02073e6f19592e341bd6508ab8`.

**NOTE:** This is a beginner-friendly tutorial where I'm assuming you have _some_ Python experience, but are unfamiliar with some funtions which I will xplain on the way.

# Full Hashing Algorithm

# UTF-8 Version

In [None]:
import struct

# Constants for SHA-256
K = [
    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
]

# Initial hash values
H = [
    0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
]

def rightrotate(value, shift):
    """Right rotate a 32-bit integer value by shift bits."""
    return (value >> shift) | (value << (32 - shift)) & 0xFFFFFFFF

def sha256(message):
    # Pre-processing: Padding the message
    message = bytearray(message, 'utf-8')  # Convert to bytearray using UTF-8 encoding
    original_length = len(message) * 8  # Length in bits
    message.append(0x80)  # Append the bit '1' to the message

    while (len(message) * 8 + 64) % 512 != 0:
        message.append(0)  # Append '0' bits until message length is 64 bits shy of 512

    message += struct.pack('>Q', original_length)  # Append the original length as a 64-bit big-endian integer

    # Process the message in successive 512-bit chunks
    for i in range(0, len(message), 64):
        chunk = message[i:i + 64]
        W = [0] * 64

        # Break chunk into sixteen 32-bit big-endian words
        for j in range(16):
            W[j] = struct.unpack('>I', chunk[j * 4:j * 4 + 4])[0]

        # Extend the first 16 words into the remaining 48 words
        for j in range(16, 64):
            s0 = rightrotate(W[j - 15], 7) ^ rightrotate(W[j - 15], 18) ^ (W[j - 15] >> 3)
            s1 = rightrotate(W[j - 2], 17) ^ rightrotate(W[j - 2], 19) ^ (W[j - 2] >> 10)
            W[j] = (W[j - 16] + s0 + W[j - 7] + s1) & 0xFFFFFFFF

        # Initialize working variables to current hash value
        a, b, c, d, e, f, g, h = H

        # Compression function main loop
        for j in range(64):
            S1 = rightrotate(e, 6) ^ rightrotate(e, 11) ^ rightrotate(e, 25)
            ch = (e & f) ^ (~e & g)
            temp1 = (h + S1 + ch + K[j] + W[j]) & 0xFFFFFFFF
            S0 = rightrotate(a, 2) ^ rightrotate(a, 13) ^ rightrotate(a, 22)
            maj = (a & b) ^ (a & c) ^ (b & c)
            temp2 = (S0 + maj) & 0xFFFFFFFF

            h = g
            g = f
            f = e
            e = (d + temp1) & 0xFFFFFFFF
            d = c
            c = b
            b = a
            a = (temp1 + temp2) & 0xFFFFFFFF

        # Add the compressed chunk to the current hash value
        H[0] = (H[0] + a) & 0xFFFFFFFF
        H[1] = (H[1] + b) & 0xFFFFFFFF
        H[2] = (H[2] + c) & 0xFFFFFFFF
        H[3] = (H[3] + d) & 0xFFFFFFFF
        H[4] = (H[4] + e) & 0xFFFFFFFF
        H[5] = (H[5] + f) & 0xFFFFFFFF
        H[6] = (H[6] + g) & 0xFFFFFFFF
        H[7] = (H[7] + h) & 0xFFFFFFFF

    # Produce the final hash value (big-endian) as a 256-bit number
    return ''.join(f'{x:08x}' for x in H)

# Example usage
message = "Hi!"
hash_value = sha256(message)
print(f"SHA-256 Hash of '{message}': {hash_value}")

SHA-256 Hash of 'Hi!': ca51ce1fb15acc6d69b8a5700256172fcc507e02073e6f19592e341bd6508ab8


In [None]:
import struct

# Constants for SHA-256
K = [
    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
]

# Initial hash values
H = [
    0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
]

def rightrotate(value, shift):
    """Right rotate a 32-bit integer value by shift bits."""
    return (value >> shift) | (value << (32 - shift)) & 0xFFFFFFFF

def sha256(message):
    # Pre-processing: Padding the message
    message = bytearray(message, 'ascii')  # Convert to bytearray
    original_length = len(message) * 8  # Length in bits
    message.append(0x80)  # Append the bit '1' to the message

    while (len(message) * 8 + 64) % 512 != 0:
        message.append(0)  # Append '0' bits until message length is 64 bits shy of 512

    message += struct.pack('>Q', original_length)  # Append the original length as a 64-bit big-endian integer

    # Process the message in successive 512-bit chunks
    for i in range(0, len(message), 64):
        chunk = message[i:i + 64]
        W = [0] * 64

        # Break chunk into sixteen 32-bit big-endian words
        for j in range(16):
            W[j] = struct.unpack('>I', chunk[j * 4:j * 4 + 4])[0]

        # Extend the first 16 words into the remaining 48 words
        for j in range(16, 64):
            s0 = rightrotate(W[j - 15], 7) ^ rightrotate(W[j - 15], 18) ^ (W[j - 15] >> 3)
            s1 = rightrotate(W[j - 2], 17) ^ rightrotate(W[j - 2], 19) ^ (W[j - 2] >> 10)
            W[j] = (W[j - 16] + s0 + W[j - 7] + s1) & 0xFFFFFFFF

        # Initialize working variables to current hash value
        a, b, c, d, e, f, g, h = H

        # Compression function main loop
        for j in range(64):
            S1 = rightrotate(e, 6) ^ rightrotate(e, 11) ^ rightrotate(e, 25)
            ch = (e & f) ^ (~e & g)
            temp1 = (h + S1 + ch + K[j] + W[j]) & 0xFFFFFFFF
            S0 = rightrotate(a, 2) ^ rightrotate(a, 13) ^ rightrotate(a, 22)
            maj = (a & b) ^ (a & c) ^ (b & c)
            temp2 = (S0 + maj) & 0xFFFFFFFF

            h = g
            g = f
            f = e
            e = (d + temp1) & 0xFFFFFFFF
            d = c
            c = b
            b = a
            a = (temp1 + temp2) & 0xFFFFFFFF

        # Add the compressed chunk to the current hash value
        H[0] = (H[0] + a) & 0xFFFFFFFF
        H[1] = (H[1] + b) & 0xFFFFFFFF
        H[2] = (H[2] + c) & 0xFFFFFFFF
        H[3] = (H[3] + d) & 0xFFFFFFFF
        H[4] = (H[4] + e) & 0xFFFFFFFF
        H[5] = (H[5] + f) & 0xFFFFFFFF
        H[6] = (H[6] + g) & 0xFFFFFFFF
        H[7] = (H[7] + h) & 0xFFFFFFFF

    # Produce the final hash value (big-endian) as a 256-bit number
    return ''.join(f'{x:08x}' for x in H)

# Example usage
message = "Hi!"
hash_value = sha256(message)
print(f"SHA-256 Hash of '{message}': {hash_value}")

SHA-256 Hash of 'Hi!': ca51ce1fb15acc6d69b8a5700256172fcc507e02073e6f19592e341bd6508ab8


Constants and Initial Hash Values: The constants K and initial hash values H are defined as per the SHA-256 specification.

rightrotate Function: This function performs a right rotation on a 32-bit integer.
sha256 Function:

Padding: The message is padded to ensure its length is a multiple of 512 bits.

Processing: The message is processed in 512-bit chunks. Each chunk is extended into a 64-word message schedule array W.

Compression: The main loop of the compression function updates the working variables and the hash values.

Final Hash: The final hash value is produced by concatenating the hash values in hexadecimal format.

### Input and Output

During this tutorial, we're going to use `Hi!` as the input message, but you can make it anything you want.

In [None]:
sha_256_input = r"Hi!"

#### SHA-256: The _EASY_ Way.

The easiest way to get the SHA-256 Hash is to use `hashlib`.

In [None]:
import hashlib

def quick_sha256(text):
    return hashlib.sha256(text.encode('utf-8')).hexdigest()

correct_sha_256_output_hash = quick_sha256(sha_256_input)

print(f"SHA-256 INPUT: {sha_256_input}")
print("\nIf our algorithm is correct, this should be the output hash:\n")
print(f"SHA-256 OUTPUT: {correct_sha_256_output_hash}")


SHA-256 INPUT: Hi!

If our algorithm is correct, this should be the output hash:

SHA-256 OUTPUT: ca51ce1fb15acc6d69b8a5700256172fcc507e02073e6f19592e341bd6508ab8
