# Computational Theory Assessment: Winter 25/26

In [27]:
import numpy as np
import math
import struct
from typing import Generator
from typing import List, Tuple

## Problem 1: Binary Words and Operations

In [28]:
# Main SHA-256 functions
def Parity(x, y, z):
    """
    Calculate the parity of three 32-bit words.
    
    The parity function returns 1 when an odd number of inputs are 1, 
    and 0 when an even number of inputs are 1. Equivalent to x XOR y XOR z.
    
    Parameters:
        x (int): First 32-bit integer
        y (int): Second 32-bit integer  
        z (int): Third 32-bit integer
        
    Returns:
        numpy.uint32: x XOR y XOR z
    """
    x = np.uint32(x)
    y = np.uint32(y)
    z = np.uint32(z)
    return x ^ y ^ z

def Ch(x, y, z):
    """
    Choose between y and z based on value of x.
    
    For each bit position, chooses y if x is 1, z if x is 0.
    Equivalent to (x AND y) XOR (NOT x AND z).
    
    Parameters:
        x (int): 32-bit integer used as selector
        y (int): 32-bit integer chosen when x=1
        z (int): 32-bit integer chosen when x=0
        
    Returns:
        numpy.uint32: (x AND y) XOR (NOT x AND z)
    """
    x = np.uint32(x)
    y = np.uint32(y)
    z = np.uint32(z)
    return (x & y) ^ (~x & z)

def Maj(x, y, z):
    """
    Calculate majority value at each bit position.
    
    For each bit position, returns 1 if at least two inputs are 1.
    Equivalent to (x AND y) XOR (x AND z) XOR (y AND z).
    
    Parameters:
        x (int): First 32-bit integer
        y (int): Second 32-bit integer
        z (int): Third 32-bit integer
        
    Returns:
        numpy.uint32: Majority value at each bit position
    """
    x = np.uint32(x)
    y = np.uint32(y)
    z = np.uint32(z)
    return (x & y) ^ (x & z) ^ (y & z)

In [29]:
def Sigma0(x):
    """
    SHA-256 uppercase Sigma0 function.
    
    Performs three bitwise rotations and XORs the results.
    Equivalent to ROTR-2(x) XOR ROTR-13(x) XOR ROTR-22(x).
    
    Parameters:
        x (int): 32-bit integer input
        
    Returns:
        numpy.uint32: Result of the three rotations XORed together
    """
    x = np.uint32(x)
    # Convert to uint32 after each operation to enforce 32-bit limit
    rotr_2 = np.uint32((x >> 2) | (x << (32 - 2)))
    rotr_13 = np.uint32((x >> 13) | (x << (32 - 13)))
    rotr_22 = np.uint32((x >> 22) | (x << (32 - 22)))
    return rotr_2 ^ rotr_13 ^ rotr_22

def Sigma1(x):
    """
    SHA-256 uppercase Sigma1 function.
    
    Performs three bitwise rotations and XORs the results.
    Equivalent to ROTR-6(x) XOR ROTR-11(x) XOR ROTR-25(x).
    
    Parameters:
        x (int): 32-bit integer input
        
    Returns:
        numpy.uint32: Result of the three rotations XORed together
    """
    x = np.uint32(x)
    rotr_6 = np.uint32((x >> 6) | (x << (32 - 6)))
    rotr_11 = np.uint32((x >> 11) | (x << (32 - 11)))
    rotr_25 = np.uint32((x >> 25) | (x << (32 - 25)))
    return rotr_6 ^ rotr_11 ^ rotr_25

def sigma0(x):
    """
    SHA-256 lowercase sigma0 function.
    
    Performs two rotations and one shift, then XORs the results.
    Equivalent to ROTR-7(x) XOR ROTR-18(x) XOR SHR-3(x).
    
    Parameters:
        x (int): 32-bit integer input
        
    Returns:
        numpy.uint32: Result of the three operations XORed together
    """
    x = np.uint32(x)
    rotr_7 = np.uint32((x >> 7) | (x << (32 - 7)))
    rotr_18 = np.uint32((x >> 18) | (x << (32 - 18)))
    shr_3 = np.uint32(x >> 3)
    return rotr_7 ^ rotr_18 ^ shr_3

def sigma1(x):
    """
    SHA-256 lowercase sigma1 function.
    
    Performs two rotations and one shift, then XORs the results.
    Equivalent to ROTR-17(x) XOR ROTR-19(x) XOR SHR-10(x).
    
    Parameters:
        x (int): 32-bit integer input
        
    Returns:
        numpy.uint32: Result of the three operations XORed together
    """
    x = np.uint32(x)
    rotr_17 = np.uint32((x >> 17) | (x << (32 - 17)))
    rotr_19 = np.uint32((x >> 19) | (x << (32 - 19)))
    shr_10 = np.uint32(x >> 10)
    return rotr_17 ^ rotr_19 ^ shr_10

In [30]:
"""
## Testing and Verification

We test all 7 required functions with representative 32-bit values 
to verify correctness according to the SHA-256 specification.
"""

print("=== Testing Required SHA-256 Functions ===\n")

print("1. Testing Parity, Ch, and Maj functions:")
print(f"Parity(0x00000001, 0x00000001, 0x00000000) = {Parity(0x00000001, 0x00000001, 0x00000000):08x}")
print(f"Ch(0xFFFFFFFF, 0x12345678, 0x87654321) = {Ch(0xFFFFFFFF, 0x12345678, 0x87654321):08x}")
print(f"Maj(0xF0F0F0F0, 0xFF00FF00, 0x0F0F0F0F) = {Maj(0xF0F0F0F0, 0xFF00FF00, 0x0F0F0F0F):08x}")
print()

print("2. Testing Sigma functions:")
test_val = 0x12345678
print(f"Sigma0(0x12345678) = {Sigma0(test_val):08x}")
print(f"Sigma1(0x12345678) = {Sigma1(test_val):08x}")
print(f"sigma0(0x12345678) = {sigma0(test_val):08x}")
print(f"sigma1(0x12345678) = {sigma1(test_val):08x}")

=== Testing Required SHA-256 Functions ===

1. Testing Parity, Ch, and Maj functions:
Parity(0x00000001, 0x00000001, 0x00000000) = 00000000
Ch(0xFFFFFFFF, 0x12345678, 0x87654321) = 12345678
Maj(0xF0F0F0F0, 0xFF00FF00, 0x0F0F0F0F) = ff00ff00

2. Testing Sigma functions:
Sigma0(0x12345678) = 66146474
Sigma1(0x12345678) = 3561abda
sigma0(0x12345678) = e7fce6ee
sigma1(0x12345678) = a1f78649


## Problem 2: Fractional Parts of Cube Roots

In [31]:
def primes(n):
    """
    Generate the first n prime numbers for SHA-256 constant calculation.
    
    Uses the Sieve of Eratosthenes with numpy optimization for efficiency.
    The upper bound estimation ensures we generate exactly n primes.
    
    Parameters:
        n (int): Number of prime numbers to generate
        
    Returns:
        list: First n prime numbers in ascending order
        
    Raises:
        ValueError: If n is not a positive integer
    """
    if not isinstance(n, int) or n <= 0:
        raise ValueError("n must be a positive integer")
    
    if n == 1:
        return [2]
    
    # Estimate upper bound for nth prime
    if n < 6:
        upper_bound = 20  # Small n heuristic
    else:
        upper_bound = int(n * (np.log(n) + np.log(np.log(n)))) + 10
    
    # Boolean array for sieve - True indicates prime candidate
    sieve = np.ones(upper_bound + 1, dtype=bool)
    sieve[0:2] = False  # 0 and 1 are not prime numbers
    
    primes_found = []
    for current in range(2, upper_bound + 1):
        if sieve[current]:
            primes_found.append(current)
            if len(primes_found) == n:
                break
            # Mark multiples as composite using vectorized slicing
            sieve[current*current:upper_bound+1:current] = False
    
    return primes_found

print("=== Prime Generation for SHA-256 Constants ===\n")

# Test with critical values: 1, 5, 10, and the required 64 primes
test_counts = [1, 5, 10, 64]
for count in test_counts:
    prime_sequence = primes(count)
    print(f"First {count} primes: {prime_sequence}")
    print(f"Count: {len(prime_sequence)}, Largest: {prime_sequence[-1]}")
    
    # Verify we can convert to 32-bit numpy array for SHA-256
    if count == 64:
        prime_array = np.array(prime_sequence, dtype=np.uint32)
        print(f"Verification: Array shape={prime_array.shape}, Type={prime_array.dtype}")
    
    print()

=== Prime Generation for SHA-256 Constants ===

First 1 primes: [2]
Count: 1, Largest: 2

First 5 primes: [2, 3, 5, 7, 11]
Count: 5, Largest: 11

First 10 primes: [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]
Count: 10, Largest: 29

First 64 primes: [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311]
Count: 64, Largest: 311
Verification: Array shape=(64,), Type=uint32



In [32]:
def extract_fractional_bits(value, num_bits=32):
    """
    Extract first num_bits from fractional part per FIPS 180-4 specification.
    
    SHA-256 constants are defined as the first 32 bits of fractional parts
    of cube roots. This implements the exact mathematical specification
    using high-precision floating point.
    
    Parameters:
        value (float): Number to extract bits from
        num_bits (int): Number of bits to extract (default 32 for SHA-256)
        
    Returns:
        np.uint32: First num_bits of fractional part as 32-bit integer
    """
    # Isolate fractional part with high precision
    fractional, _ = np.modf(np.float64(value))
    
    # Scale fractional part to integer range [0, 2^32)
    # Equivalent to: fractional × 2^32, then floor
    scaled = fractional * np.float64(2 ** num_bits)
    
    # Convert to 32-bit unsigned integer (implicit floor)
    result = np.uint32(scaled)
    
    # Ensure we only have the requested number of bits
    bitmask = (1 << num_bits) - 1
    return result & bitmask

print("=== Fractional Bit Extraction Test ===\n")

# Test with first four primes as specified in FIPS 180-4
test_primes = [2, 3, 5, 7]
for prime in test_primes:
    # Cube root calculation with high precision (64-bit float)
    cube_root = np.power(prime, 1.0/3.0, dtype=np.float64)
    
    # Extract first 32 bits of fractional part
    fractional_bits = extract_fractional_bits(cube_root, 32)
    
    print(f"Prime {prime}:")
    print(f"  Cube root: {cube_root:.12f}")
    print(f"  Fractional bits (hex): 0x{fractional_bits:08x}")
    print(f"  Fractional bits (dec): {fractional_bits}")
    print()

=== Fractional Bit Extraction Test ===

Prime 2:
  Cube root: 1.259921049895
  Fractional bits (hex): 0x428a2f98
  Fractional bits (dec): 1116352408

Prime 3:
  Cube root: 1.442249570307
  Fractional bits (hex): 0x71374491
  Fractional bits (dec): 1899447441

Prime 5:
  Cube root: 1.709975946677
  Fractional bits (hex): 0xb5c0fbcf
  Fractional bits (dec): 3049323471

Prime 7:
  Cube root: 1.912931182772
  Fractional bits (hex): 0xe9b5dba5
  Fractional bits (dec): 3921009573



In [33]:
def compute_sha256_constants():
    """
    Calculate SHA-256 K constants as defined in FIPS 180-4 Section 4.2.2.
    
    Returns two representations:
    1. Hexadecimal strings for display and verification
    2. Numpy uint32 array for use in SHA-256 computation (Problem 4)
    
    Returns:
        tuple: (hex_strings, k_array) where:
            hex_strings: List of 64 hexadecimal constants
            k_array: Numpy array of 64 uint32 constants
    """
    # Get first 64 prime numbers
    prime_sequence = primes(64)
    
    hex_constants = []     # For display and verification
    constant_values = []   # Raw values for numpy array
    
    for prime in prime_sequence:
        # Cube root with high precision (64-bit float for SHA-256 accuracy)
        cube_root = np.power(prime, 1.0/3.0, dtype=np.float64)
        
        # Extract first 32 bits of fractional part per FIPS specification
        constant = extract_fractional_bits(cube_root, 32)
        
        # Store both representations: hex for display, uint32 for computation
        hex_constants.append(f"{constant:08x}")
        constant_values.append(constant)
    
    # Create numpy array optimized for SHA-256 compression rounds
    # uint32 dtype ensures proper 32-bit modular arithmetic
    K256 = np.array(constant_values, dtype=np.uint32)
    
    return hex_constants, K256

print("=== SHA-256 Constant Computation ===\n")

# Compute all 64 SHA-256 constants
hex_constants, K256 = compute_sha256_constants()

print("Computed Constants Array (K256):")
print(f"  Elements: {K256.shape[0]}")
print(f"  Data Type: {K256.dtype} (32-bit unsigned integer)")
print(f"  Memory: {K256.nbytes} bytes")
print(f"  First: 0x{K256[0]:08x}, Last: 0x{K256[-1]:08x}")
print()

print("Constants in Standard SHA-256 Format:")
print("-" * 40)

def format_hex_constants(constants, per_line=8):
    """Format constants in standard 8-column layout."""
    return "\n".join(
        " ".join(constants[i:i+per_line])
        for i in range(0, len(constants), per_line)
    )

print(format_hex_constants(hex_constants))
print(f"\nTotal: {len(hex_constants)} constants")

# Note: K256 is now available as a numpy uint32 array for Problem 4
# Use K256[t] in compression round t: T1 = h + Σ1(e) + Ch(e,f,g) + K256[t] + W[t]

=== SHA-256 Constant Computation ===

Computed Constants Array (K256):
  Elements: 64
  Data Type: uint32 (32-bit unsigned integer)
  Memory: 256 bytes
  First: 0x428a2f98, Last: 0xc67178f2

Constants in Standard SHA-256 Format:
----------------------------------------
428a2f98 71374491 b5c0fbcf e9b5dba5 3956c25b 59f111f1 923f82a4 ab1c5ed5
d807aa98 12835b01 243185be 550c7dc3 72be5d74 80deb1fe 9bdc06a7 c19bf174
e49b69c1 efbe4786 0fc19dc6 240ca1cc 2de92c6f 4a7484aa 5cb0a9dc 76f988da
983e5152 a831c66d b00327c8 bf597fc7 c6e00bf3 d5a79147 06ca6351 14292967
27b70a85 2e1b2138 4d2c6dfc 53380d13 650a7354 766a0abb 81c2c92e 92722c85
a2bfe8a1 a81a664b c24b8b70 c76c51a3 d192e819 d6990624 f40e3585 106aa070
19a4c116 1e376c08 2748774c 34b0bcb5 391c0cb3 4ed8aa4a 5b9cca4f 682e6ff3
748f82ee 78a5636f 84c87814 8cc70208 90befffa a4506ceb bef9a3f7 c67178f2

Total: 64 constants


In [34]:
# SHA-256 constants from FIPS PUB 180-4 Section 4.2.2
# These 64 hexadecimal values represent the first 32 bits of fractional parts
# of cube roots of the first 64 prime numbers
FIPS_CONSTANTS = [
    '428a2f98', '71374491', 'b5c0fbcf', 'e9b5dba5', '3956c25b', '59f111f1', '923f82a4', 'ab1c5ed5',
    'd807aa98', '12835b01', '243185be', '550c7dc3', '72be5d74', '80deb1fe', '9bdc06a7', 'c19bf174',
    'e49b69c1', 'efbe4786', '0fc19dc6', '240ca1cc', '2de92c6f', '4a7484aa', '5cb0a9dc', '76f988da',
    '983e5152', 'a831c66d', 'b00327c8', 'bf597fc7', 'c6e00bf3', 'd5a79147', '06ca6351', '14292967',
    '27b70a85', '2e1b2138', '4d2c6dfc', '53380d13', '650a7354', '766a0abb', '81c2c92e', '92722c85',
    'a2bfe8a1', 'a81a664b', 'c24b8b70', 'c76c51a3', 'd192e819', 'd6990624', 'f40e3585', '106aa070',
    '19a4c116', '1e376c08', '2748774c', '34b0bcb5', '391c0cb3', '4ed8aa4a', '5b9cca4f', '682e6ff3',
    '748f82ee', '78a5636f', '84c87814', '8cc70208', '90befffa', 'a4506ceb', 'bef9a3f7', 'c67178f2'
]

print("=== FIPS 180-4 Standard Verification ===\n")

print("Comparing Generated vs Reference Constants:")
print("-" * 50)

# Verify each generated constant against the FIPS standard
match_count = 0
for index, (generated, reference) in enumerate(zip(hex_constants, FIPS_CONSTANTS)):
    status = "PASS" if generated == reference else "FAIL"
    if generated == reference:
        match_count += 1
    print(f"{index:2d}. {status} {generated} | {reference}")

print(f"\nMatch Summary: {match_count}/64 constants correct")

if match_count == 64:
    print("\n VALIDATION PASSED: All constants match FIPS 180-4 standard")
    
    # Additional numpy array validation
    print("\n=== Array Properties Validation ===")
    
    # Convert reference to numpy array for direct comparison
    reference_array = np.array([int(h, 16) for h in FIPS_CONSTANTS], dtype=np.uint32)
    
    # Compare generated K256 array with reference using numpy equality
    if np.array_equal(K256, reference_array):
        print("Array comparison passed")
        print(f"Shape: {K256.shape}")
        print(f"Type: {K256.dtype}")
        print(f"Memory: {K256.nbytes} bytes")
        
        print("\nStatistical Properties")
        print(f"Minimum: 0x{K256.min():08x} (decimal: {K256.min()})")
        print(f"Maximum: 0x{K256.max():08x} (decimal: {K256.max()})")
        print(f"Mean:    0x{int(K256.mean()):08x}")
    else:
        print("Array comparison failed")
        
else:
    print(f"\nVALIDATION FAILED: {64 - match_count} mismatches")
    print("\nFirst few mismatches:")
    mismatch_count = 0
    for i, (gen, ref) in enumerate(zip(hex_constants, FIPS_CONSTANTS)):
        if gen != ref and mismatch_count < 5:
            print(f"  Index {i}: Generated {gen}, Expected {ref}")
            mismatch_count += 1


=== FIPS 180-4 Standard Verification ===

Comparing Generated vs Reference Constants:
--------------------------------------------------
 0. PASS 428a2f98 | 428a2f98
 1. PASS 71374491 | 71374491
 2. PASS b5c0fbcf | b5c0fbcf
 3. PASS e9b5dba5 | e9b5dba5
 4. PASS 3956c25b | 3956c25b
 5. PASS 59f111f1 | 59f111f1
 6. PASS 923f82a4 | 923f82a4
 7. PASS ab1c5ed5 | ab1c5ed5
 8. PASS d807aa98 | d807aa98
 9. PASS 12835b01 | 12835b01
10. PASS 243185be | 243185be
11. PASS 550c7dc3 | 550c7dc3
12. PASS 72be5d74 | 72be5d74
13. PASS 80deb1fe | 80deb1fe
14. PASS 9bdc06a7 | 9bdc06a7
15. PASS c19bf174 | c19bf174
16. PASS e49b69c1 | e49b69c1
17. PASS efbe4786 | efbe4786
18. PASS 0fc19dc6 | 0fc19dc6
19. PASS 240ca1cc | 240ca1cc
20. PASS 2de92c6f | 2de92c6f
21. PASS 4a7484aa | 4a7484aa
22. PASS 5cb0a9dc | 5cb0a9dc
23. PASS 76f988da | 76f988da
24. PASS 983e5152 | 983e5152
25. PASS a831c66d | a831c66d
26. PASS b00327c8 | b00327c8
27. PASS bf597fc7 | bf597fc7
28. PASS c6e00bf3 | c6e00bf3
29. PASS d5a79147 | d5

## Problem 3: Padding

In [35]:
def block_parse(msg: bytes) -> Generator[bytes, None, None]:
    """
    Parse and pad a message into 512-bit blocks for SHA-256.
    
    This generator implements the SHA-256 padding standard:
    1. Append '1' bit (as 0x80 byte)
    2. Append k zero bits where ℓ + 1 + k ≡ 448 mod 512
    3. Append 64-bit message length in bits
    4. Yield 512-bit (64-byte) blocks
    
    Parameters:
        msg (bytes): Input message to pad and parse
        
    Yields:
        bytes: Next 512-bit block (64 bytes) of padded message
    """
    # Message length in bits
    msg_len_bits = len(msg) * 8
    
    # Start with message
    padded = bytearray(msg)
    
    # Step 1: Append '1' bit as 0x80 byte
    padded.append(0x80)
    
    # Step 2: Calculate and append zero bits
    current_bits = len(padded) * 8
    zero_bits = (448 - current_bits) % 512
    zero_bytes = (zero_bits + 7) // 8  # Ceiling division
    padded.extend(b'\x00' * zero_bytes)
    
    # Step 3: Append 64-bit message length
    padded.extend(struct.pack('>Q', msg_len_bits))
    
    # Step 4: Yield 512-bit blocks
    for i in range(0, len(padded), 64):
        yield bytes(padded[i:i+64])

In [36]:
"""
## Testing the block_parse Generator

Test 5 different message lengths to verify correct padding.
"""


print("=== Testing block_parse() with 5 Different Message Lengths ===\n")

# Test 1: Empty message
print("Test 1: Empty message (0 bytes)")
blocks = list(block_parse(b''))
print(f"  Blocks: {len(blocks)}")
print(f"  First byte: 0x{blocks[0][0]:02x} (0x80 = '1' bit)")
print(f"  Last 8 bytes: {blocks[0][-8:].hex()} (length = 0)")
print()

# Test 2: "abc" 
print("Test 2: 'abc' (3 bytes, FIPS standard example)")
blocks = list(block_parse(b'abc'))
print(f"  Blocks: {len(blocks)}")
block_hex = blocks[0].hex()
print(f"  Block starts: {block_hex[:32]}...")
print(f"  Block ends: ...{block_hex[-16:]}")
print(f"  Length field: {struct.unpack('>Q', blocks[0][-8:])[0]} bits")
print()

# Test 3: Exactly 55 bytes
print("Test 3: Exactly 55 bytes (fits in one block)")
msg = b'A' * 55
blocks = list(block_parse(msg))
combined = b''.join(blocks)
print(f"  Blocks: {len(blocks)}")
# Find 0x80 in the combined padded message
pos = combined.find(b'\x80')
print(f"  0x80 position: byte {pos}")
if pos == 55:
    print(f"  Correct: 0x80 at position 55 (end of message)")
else:
    print(f"  ERROR: 0x80 at wrong position {pos}")
print()

# Test 4: Exactly 56 bytes
print("Test 4: Exactly 56 bytes (requires two blocks)")
msg = b'B' * 56
blocks = list(block_parse(msg))
combined = b''.join(blocks)
print(f"  Blocks: {len(blocks)}")
print(f"  Block 1 has message: {blocks[0][:56].hex()[:16]}...")
# Find 0x80 in the combined padded message
pos = combined.find(b'\x80')  
print(f"  0x80 position: byte {pos}")
if pos == 56:
    print(f"  Correct: 0x80 at position 56 (start of block 2)")
else:
    print(f"  ERROR: 0x80 at wrong position {pos}")
print()

# Test 5: Long message (100 bytes)
print("Test 5: Long message (100 bytes)")
msg = b'C' * 100
blocks = list(block_parse(msg))
print(f"  Blocks: {len(blocks)}")
print(f"  All blocks 64 bytes: {all(len(b) == 64 for b in blocks)}")
print(f"  Total padded length: {len(blocks) * 64} bytes")
print()
print()
print()

"""
## Demonstration of Generator Behavior

Show that block_parse() yields blocks one at a time.

logic:
48656c6c6f2c2057 = Hex for "Hello, W" 

48 = H, 65 = e, 6c = l, 6c = l, 6f = o, 2c = ,, 20 = space, 57 = W

0000000000000068 = Length in bits (104 bits = 13 bytes × 8)
"""


print("=== Demonstrating Generator Behavior ===\n")

print("Processing 'Hello, World!':")
gen = block_parse(b'Hello, World!')

for i, block in enumerate(gen):
    print(f"  Yielded block {i+1}: {len(block)} bytes")
    print(f"    First 8 bytes: {block[:8].hex()}")
    print(f"    Last 8 bytes: {block[-8:].hex()}")


=== Testing block_parse() with 5 Different Message Lengths ===

Test 1: Empty message (0 bytes)
  Blocks: 1
  First byte: 0x80 (0x80 = '1' bit)
  Last 8 bytes: 0000000000000000 (length = 0)

Test 2: 'abc' (3 bytes, FIPS standard example)
  Blocks: 1
  Block starts: 61626380000000000000000000000000...
  Block ends: ...0000000000000018
  Length field: 24 bits

Test 3: Exactly 55 bytes (fits in one block)
  Blocks: 1
  0x80 position: byte 55
  Correct: 0x80 at position 55 (end of message)

Test 4: Exactly 56 bytes (requires two blocks)
  Blocks: 2
  Block 1 has message: 4242424242424242...
  0x80 position: byte 56
  Correct: 0x80 at position 56 (start of block 2)

Test 5: Long message (100 bytes)
  Blocks: 2
  All blocks 64 bytes: True
  Total padded length: 128 bytes



=== Demonstrating Generator Behavior ===

Processing 'Hello, World!':
  Yielded block 1: 64 bytes
    First 8 bytes: 48656c6c6f2c2057
    Last 8 bytes: 0000000000000068


In [37]:
# Helper function
def block_to_words(block: bytes) -> List[int]:
    """
    Convert 512-bit block to 16 32-bit words for SHA-256.
    
    This conversion is needed for Problem 4's hash() function.
    
    Parameters:
        block (bytes): 64-byte block from block_parse()
        
    Returns:
        List[int]: 16 32-bit words in big-endian format
    """
    if len(block) != 64:
        raise ValueError(f"Block must be 64 bytes (512 bits), got {len(block)} bytes")
    
    words = []
    for i in range(0, 64, 4):
        # Unpack 4 bytes as 32-bit big-endian integer
        word = struct.unpack('>I', block[i:i+4])[0]
        words.append(word)
    
    return words

# Test it
print("\n=== Testing block_to_words for Problem 4 Integration ===")
test_block = next(block_parse(b'test'))
words = block_to_words(test_block)
print(f"Converted block to {len(words)} words")
print(f"First word: 0x{words[0]:08x}")
print(f"Last word: 0x{words[-1]:08x}")


=== Testing block_to_words for Problem 4 Integration ===
Converted block to 16 words
First word: 0x74657374
Last word: 0x00000020


# Problem 4: Hashes

In [51]:
"""
Problem 4: SHA-256 Hash Computation Implementation

Implementation of the SHA-256 hash function according to FIPS PUB 180-4 Section 6.2.2.
This function computes the next hash value given current hash and message block.

Background

SHA-256 is a cryptographic hash function that processes messages in 512-bit blocks.
The `hash(current, block)` function implements the SHA-256 compression function
(Section 6.2.2), which is the core transformation that updates the hash state.
"""

# Required Import
# import numpy as np
# import struct
# from typing import List, Tuple

# Import functions from previous problems
# These imports assume YOU HAVE ALREADY RUN cells from Problems 1-3 in the notebook
try:
    # Use existing implementations from Problem 1
    from __main__ import Ch, Maj, Sigma0, Sigma1, sigma0, sigma1
except ImportError:
    # Fallback definitions if run independently
    def Ch(x, y, z):
        """Choose function: (x AND y) XOR (NOT x AND z)"""
        return (x & y) ^ (~x & z)
    
    def Maj(x, y, z):
        """Majority function: (x AND y) XOR (x AND z) XOR (y AND z)"""
        return (x & y) ^ (x & z) ^ (y & z)
    
    def Sigma0(x):
        """Uppercase Sigma0: ROTR-2(x) XOR ROTR-13(x) XOR ROTR-22(x)"""
        rotr_2 = (x >> 2) | (x << (32 - 2))
        rotr_13 = (x >> 13) | (x << (32 - 13))
        rotr_22 = (x >> 22) | (x << (32 - 22))
        return rotr_2 ^ rotr_13 ^ rotr_22
    
    def Sigma1(x):
        """Uppercase Sigma1: ROTR-6(x) XOR ROTR-11(x) XOR ROTR-25(x)"""
        rotr_6 = (x >> 6) | (x << (32 - 6))
        rotr_11 = (x >> 11) | (x << (32 - 11))
        rotr_25 = (x >> 25) | (x << (32 - 25))
        return rotr_6 ^ rotr_11 ^ rotr_25
    
    def sigma0(x):
        """Lowercase sigma0: ROTR-7(x) XOR ROTR-18(x) XOR SHR-3(x)"""
        rotr_7 = (x >> 7) | (x << (32 - 7))
        rotr_18 = (x >> 18) | (x << (32 - 18))
        shr_3 = x >> 3
        return rotr_7 ^ rotr_18 ^ shr_3
    
    def sigma1(x):
        """Lowercase sigma1: ROTR-17(x) XOR ROTR-19(x) XOR SHR-10(x)"""
        rotr_17 = (x >> 17) | (x << (32 - 17))
        rotr_19 = (x >> 19) | (x << (32 - 19))
        shr_10 = x >> 10
        return rotr_17 ^ rotr_19 ^ shr_10

"""
## Constants Integration

For SHA-256 compression, we need the K256 constants from Problem 2.
These are the first 32 bits of fractional parts of cube roots of first 64 primes.
"""

# Import K256 constants from Problem 2
try:
    from __main__ import K256
except ImportError:
    # 64 constants
    K256 = np.array([
        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 
        0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 
        0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 
        0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 
        0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 
        0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 
        0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 
        0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 
        0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 
        0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 
        0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
    ], dtype=np.uint32)
    
# SHA-256 Initial Hash Values (H⁰) from FIPS 180-4 Section 5.3.3
INITIAL_HASH = np.array([
    0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
], dtype=np.uint32)

In [52]:
"""
Core Implementation: SHA-256 Compression Function

The `hash(current, block)` function implements the SHA-256 compression function
as specified in Section 6.2.2 of FIPS 180-4.

Algorithm Overview:
1. Prepare the message schedule {Wₜ} from the 512-bit block
2. Initialize working variables a-h with the current hash value
3. Perform 64 rounds of transformation
4. Compute the next hash by adding working variables to current hash

All operations are performed modulo 2³² using 32-bit unsigned integers.
"""

def prepare_message_schedule(block: bytes) -> np.ndarray:
    """
    Prepare the 64-word message schedule {Wₜ} from a 512-bit block.
    
    Step 1 of Section 6.2.2:
    - First 16 words: directly from message block (big-endian)
    - Remaining 48 words: expanded using σ₀ and σ₁ functions
    
    Parameters:
        block (bytes): 64-byte (512-bit) message block
        
    Returns:
        numpy.ndarray: Array of 64 uint32 words (W₀ to W₆₃)
    """
    if len(block) != 64:
        raise ValueError(f"Block must be 64 bytes (512 bits), got {len(block)} bytes")
    
    W = np.zeros(64, dtype=np.uint32)
    
    # First 16 words from block (big-endian)
    for t in range(16):
        start = t * 4
        word_bytes = block[start:start + 4]
        W[t] = np.uint32(struct.unpack('>I', word_bytes)[0])
    
    # Expand remaining 48 words
    for t in range(16, 64):
        # Convert to Python int for arithmetic to avoid numpy overflow warnings,
        # then wrap to 32-bit range
        val = int(sigma1(W[t-2])) + int(W[t-7]) + int(sigma0(W[t-15])) + int(W[t-16])
        W[t] = np.uint32(val & 0xFFFFFFFF)
    
    return W


def hash(current: np.ndarray, block: bytes) -> np.ndarray:
    """
    Calculate the next SHA-256 hash value given current hash and next message block.
    
    Implements Section 6.2.2 SHA-256 Hash Computation (FIPS PUB 180-4, page 22).
    This is the SHA-256 compression function that processes one 512-bit block.
    
    Parameters:
        current (numpy.ndarray): Current hash value as 8 uint32 values
                                 (H₀⁽ⁱ⁻¹⁾ to H₇⁽ⁱ⁻¹⁾)
        block (bytes): Next 512-bit message block (64 bytes)
        
    Returns:
        numpy.ndarray: Next hash value as 8 uint32 values (H₀⁽ⁱ⁾ to H₇⁽ⁱ⁾)
        
    Raises:
        ValueError: If inputs don't meet requirements
        
    Algorithm:
        1. Prepare message schedule {Wₜ}
        2. Initialize a-h with current hash
        3. For t = 0 to 63:
            T1 = h + Σ₁(e) + Ch(e,f,g) + Kₜ + Wₜ
            T2 = Σ₀(a) + Maj(a,b,c)
            h = g
            g = f
            f = e
            e = d + T1
            d = c
            c = b
            b = a
            a = T1 + T2
        4. Compute: H₀⁽ⁱ⁾ = a + H₀⁽ⁱ⁻¹⁾, ..., H₇⁽ⁱ⁾ = h + H₇⁽ⁱ⁻¹⁾
    """
    # Input validation
    if not isinstance(current, np.ndarray) or current.shape != (8,) or current.dtype != np.uint32:
        raise ValueError("current must be a numpy array of 8 uint32 values")
    
    if len(block) != 64:
        raise ValueError(f"block must be 64 bytes (512 bits), got {len(block)} bytes")
    
    # Step 1: Prepare message schedule
    W = prepare_message_schedule(block)
    
    # Step 2: Initialize working variables
    a, b, c, d, e, f, g, h = current.copy()
    
    # Step 3: 64 rounds of compression
    for t in range(64):
        # This uses Python's unlimited precision integers during calculation,
        # then wraps to 32-bit range with masking
        
        # T1 = h + Σ₁(e) + Ch(e, f, g) + Kₜ + Wₜ
        T1 = np.uint32((int(h) + int(Sigma1(e)) + int(Ch(e, f, g)) + int(K256[t]) + int(W[t])) & 0xFFFFFFFF)
        
        # T2 = Σ₀(a) + Maj(a, b, c)
        T2 = np.uint32((int(Sigma0(a)) + int(Maj(a, b, c))) & 0xFFFFFFFF)
        
        # Update working variables (simultaneous updates per SHA-256 spec)
        h_new = g
        g_new = f
        f_new = e
        e_new = np.uint32((int(d) + int(T1)) & 0xFFFFFFFF)
        d_new = c
        c_new = b
        b_new = a
        a_new = np.uint32((int(T1) + int(T2)) & 0xFFFFFFFF)
        
        # Assign new values
        a, b, c, d, e, f, g, h = a_new, b_new, c_new, d_new, e_new, f_new, g_new, h_new
    
    # Step 4: Compute next hash value
    next_hash = np.zeros(8, dtype=np.uint32)
    next_hash[0] = np.uint32((int(a) + int(current[0])) & 0xFFFFFFFF)
    next_hash[1] = np.uint32((int(b) + int(current[1])) & 0xFFFFFFFF)
    next_hash[2] = np.uint32((int(c) + int(current[2])) & 0xFFFFFFFF)
    next_hash[3] = np.uint32((int(d) + int(current[3])) & 0xFFFFFFFF)
    next_hash[4] = np.uint32((int(e) + int(current[4])) & 0xFFFFFFFF)
    next_hash[5] = np.uint32((int(f) + int(current[5])) & 0xFFFFFFFF)
    next_hash[6] = np.uint32((int(g) + int(current[6])) & 0xFFFFFFFF)
    next_hash[7] = np.uint32((int(h) + int(current[7])) & 0xFFFFFFFF)
    
    return next_hash

In [54]:
"""
Testing the hash() Function

Direct tests of the hash() function with known values to verify correctness.
These tests focus specifically on the compression function, not the full SHA-256.
"""

def bytes_to_hash_hex(hash_array: np.ndarray) -> str:
    """
    Convert hash array to standard hexadecimal string representation.
    
    Parameters:
        hash_array (numpy.ndarray): Array of 8 uint32 hash values
        
    Returns:
        str: Hexadecimal string (64 characters, 8 words concatenated)
    """
    return ''.join(f'{value:08x}' for value in hash_array)


# Import block_parse from Problem 3 for testing
try:
    from __main__ import block_parse
    BLOCK_PARSE_AVAILABLE = True
except ImportError:
    BLOCK_PARSE_AVAILABLE = False
    print("Note: block_parse from Problem 3 not available for integration tests")


print("=== Basic Functionality Tests ===")
print()

# Test 1: Basic functionality - hash with zero block
print("Test 1: Initial hash with zero block")
print("-" * 40)

zero_block = bytes(64)
result = hash(INITIAL_HASH.copy(), zero_block)

print(f"Initial hash: {bytes_to_hash_hex(INITIAL_HASH)}")
print(f"Zero block  : {'00'*64}...")
print(f"Result      : {bytes_to_hash_hex(result)}")
print(f"Hash changed: {not np.array_equal(INITIAL_HASH, result)}")
print()

# Test 2: Determinism test
print("Test 2: Determinism (same inputs → same output)")
print("-" * 40)

test_block = b'A' * 64  # 64-byte block of 'A'
result1 = hash(INITIAL_HASH.copy(), test_block)
result2 = hash(INITIAL_HASH.copy(), test_block)

print(f"Block      : {'41'*8}... (64 bytes of 'A')")
print(f"Result 1   : {bytes_to_hash_hex(result1)}")
print(f"Result 2   : {bytes_to_hash_hex(result2)}")
print(f"Identical  : {np.array_equal(result1, result2)}")
print()

# Test 3: Sensitivity to input changes
print("Test 3: Avalanche effect (small changes → big differences)")
print("-" * 40)

# Test with slightly different current hashes
current1 = INITIAL_HASH.copy()
current2 = INITIAL_HASH.copy()
current2[0] ^= 0x00000001  # Flip one bit in first word

result1 = hash(current1, test_block)
result2 = hash(current2, test_block)

print(f"Current hash 1: {bytes_to_hash_hex(current1)}")
print(f"Current hash 2: {bytes_to_hash_hex(current2)} (1 bit flipped)")
print(f"Result 1      : {bytes_to_hash_hex(result1)}")
print(f"Result 2      : {bytes_to_hash_hex(result2)}")
print(f"Bytes different: {sum(r1 != r2 for r1, r2 in zip(result1, result2))}/8 words")
print()

# Test with slightly different blocks
block1 = test_block
block2 = bytearray(test_block)
block2[0] ^= 0x01  # Flip first bit
block2 = bytes(block2)

result1 = hash(INITIAL_HASH.copy(), block1)
result2 = hash(INITIAL_HASH.copy(), block2)

print(f"Block 1: {'41'*8}...")
print(f"Block 2: {'40' + '41'*7}... (first byte changed)")
print(f"Result 1: {bytes_to_hash_hex(result1)}")
print(f"Result 2: {bytes_to_hash_hex(result2)}")
print(f"Hex chars different: {sum(c1 != c2 for c1, c2 in zip(bytes_to_hash_hex(result1), bytes_to_hash_hex(result2)))}/64")
print()

# Test 4: Integration with block_parse (Problem 3)
if BLOCK_PARSE_AVAILABLE:
    print("Test 4: Integration with Problem 3 (block_parse)")
    print("-" * 40)
    
    # Process "abc" message using our hash() function
    abc_blocks = list(block_parse(b'abc'))
    print(f"'abc' message produces {len(abc_blocks)} block(s)")
    
    current = INITIAL_HASH.copy()
    for i, block in enumerate(abc_blocks):
        current = hash(current, block)
        print(f"  After block {i+1}: {bytes_to_hash_hex(current)}")
    
    # Compare with known SHA-256 of "abc"
    expected_abc = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
    computed_abc = bytes_to_hash_hex(current)
    
    print(f"Expected SHA-256('abc'): {expected_abc}")
    print(f"Computed SHA-256('abc'): {computed_abc}")
    print(f"Match: {computed_abc == expected_abc}")
    print()
else:
    print("Test 4: Skipped - block_parse not available")
    print("(Integration with Problem 3 would be tested here)")
    print()

# Test 5: Multi-block processing demonstration
print("Test 5: Multi-block processing demonstration")
print("-" * 40)

# Create a message that will span 2 blocks
long_message = b'X' * 100  # 100 bytes > 64 bytes, so 2 blocks

if BLOCK_PARSE_AVAILABLE:
    blocks = list(block_parse(long_message))
    print(f"Message of {len(long_message)} bytes produces {len(blocks)} block(s)")
    
    current = INITIAL_HASH.copy()
    for i, block in enumerate(blocks):
        before = bytes_to_hash_hex(current)
        current = hash(current, block)
        after = bytes_to_hash_hex(current)
        print(f"  Block {i+1}: {before[:16]}... → {after[:16]}...")
    
    # Verify with hashlib
    import hashlib
    expected = hashlib.sha256(long_message).hexdigest()
    computed = bytes_to_hash_hex(current)
    
    print(f"Expected: {expected}")
    print(f"Computed: {computed}")
    print(f"Match: {computed == expected}")
else:
    print("Using simulated 2-block processing...")
    # Simulate processing two different blocks
    block_a = b'A' * 64
    block_b = b'B' * 64
    
    # Process first block
    hash_after_block1 = hash(INITIAL_HASH.copy(), block_a)
    print(f"After block 1: {bytes_to_hash_hex(hash_after_block1)[:16]}...")
    
    # Process second block
    hash_after_block2 = hash(hash_after_block1.copy(), block_b)
    print(f"After block 2: {bytes_to_hash_hex(hash_after_block2)[:16]}...")
    
    print("(Note: This demonstrates chaining, not a specific known result)")
print()

# Test 6: Edge cases and error handling
print("Test 6: Error handling and edge cases")
print("-" * 40)

# Invalid current hash size
try:
    bad_current = np.array([1, 2, 3], dtype=np.uint32)  # Only 3 elements
    hash(bad_current, zero_block)
    print("Should have raised ValueError for wrong size")
except ValueError as e:
    print(f"Correctly rejected wrong size: {e}")

# Invalid block size
try:
    bad_block = b'too short'
    hash(INITIAL_HASH.copy(), bad_block)
    print("Should have raised ValueError for wrong block size")
except ValueError as e:
    print(f"Correctly rejected wrong block size: {e}")

# Non-uint32 current hash
try:
    bad_current = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])  # Float array
    hash(bad_current, zero_block)
    print("Should have raised ValueError for wrong dtype")
except (ValueError, TypeError) as e:
    print(f"Correctly rejected wrong dtype: {e}")

print()
print("All direct tests of hash() function completed")

=== Basic Functionality Tests ===

Test 1: Initial hash with zero block
----------------------------------------
Initial hash: 6a09e667bb67ae853c6ef372a54ff53a510e527f9b05688c1f83d9ab5be0cd19
Zero block  : 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000...
Result      : da5698be17b9b46962335799779fbeca8ce5d491c0d26243bafef9ea1837a9d8
Hash changed: True

Test 2: Determinism (same inputs → same output)
----------------------------------------
Block      : 4141414141414141... (64 bytes of 'A')
Result 1   : 6cb7244102f65790145141e105d2c3d387a1d91e9ceda13a4a3d25dd87beea05
Result 2   : 6cb7244102f65790145141e105d2c3d387a1d91e9ceda13a4a3d25dd87beea05
Identical  : True

Test 3: Avalanche effect (small changes → big differences)
----------------------------------------
Current hash 1: 6a09e667bb67ae853c6ef372a54ff53a510e527f9b05688c1f83d9ab5be0cd19
Current hash 2: 6a09e666bb67ae853c6ef372a54ff53a510e527f9b05688c1f8

# Problem 5: Passwords