<a href="https://colab.research.google.com/github/Supreetha20/DATABASE-SECURITY-LAB-1129M010-/blob/main/exp2d.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import math
import struct

# Type alias, moved to module level for correct scope
DigestArray = list[int]

# The MD5union class definition from the user's input, with corrected indentation.
# Note: This class currently serves as an empty placeholder as the MD5 logic
# is implemented through module-level functions, consistent with the user's original structure.
class MD5union:
    def __init__(self):
        self.w = 0
        self.b = bytearray(4)

# MD5 helper functions (F, G, H, I) - these are standard MD5 logical functions.
# They take an array (abcd) where abcd[1], abcd[2], abcd[3] are X, Y, Z.
# Explicit masking `& 0xFFFFFFFF` is applied for 32-bit arithmetic.
def func0(abcd: DigestArray) -> int:  # F function: (X & Y) | (~X & Z)
    X, Y, Z = abcd[1] & 0xFFFFFFFF, abcd[2] & 0xFFFFFFFF, abcd[3] & 0xFFFFFFFF
    return ((X & Y) | ((~X & 0xFFFFFFFF) & Z)) & 0xFFFFFFFF

def func1(abcd: DigestArray) -> int:  # G function: (X & Z) | (Y & ~Z)
    X, Y, Z = abcd[1] & 0xFFFFFFFF, abcd[2] & 0xFFFFFFFF, abcd[3] & 0xFFFFFFFF
    return ((X & Z) | (Y & (~Z & 0xFFFFFFFF))) & 0xFFFFFFFF

def func2(abcd: DigestArray) -> int:  # H function: X ^ Y ^ Z
    X, Y, Z = abcd[1] & 0xFFFFFFFF, abcd[2] & 0xFFFFFFFF, abcd[3] & 0xFFFFFFFF
    return (X ^ Y ^ Z) & 0xFFFFFFFF

def func3(abcd: DigestArray) -> int:  # I function: Y ^ (X | ~Z)
    X, Y, Z = abcd[1] & 0xFFFFFFFF, abcd[2] & 0xFFFFFFFF, abcd[3] & 0xFFFFFFFF
    return (Y ^ (X | (~Z & 0xFFFFFFFF))) & 0xFFFFFFFF

# Function to calculate the sine-based constants (K table) for MD5.
# All constants are 32-bit unsigned integers.
def calctable(k: DigestArray) -> DigestArray:
    pwr = 2 ** 32
    for i in range(64):
        s = abs(math.sin(1 + i))
        k[i] = int(s * pwr) & 0xFFFFFFFF  # Ensure value stays within 32-bit unsigned range
    return k

# Function for 32-bit left circular shift (rotate left).
# Ensures the input and output are treated as 32-bit unsigned.
def rol(r: int, N: int) -> int:
    r &= 0xFFFFFFFF  # Ensure r is 32-bit
    return ((r << N) | (r >> (32 - N))) & 0xFFFFFFFF

# The core MD5 hash function.
def md5(msg: str, mlen: int) -> DigestArray:
    # Initial hash values (A, B, C, D) as defined in RFC 1321
    h0 = [0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476]

    # Functions for each round (F, G, H, I)
    ff = [func0, func1, func2, func3]

    # Constants for determining message word (Mj) access order in each round
    M_indices = [1, 5, 3, 7]
    O_offsets = [0, 1, 5, 0]

    # Rotation amounts for each of the four rounds
    rot0 = [7, 12, 17, 22]
    rot1 = [5, 9, 14, 20]
    rot2 = [4, 11, 16, 23]
    rot3 = [6, 10, 15, 21]
    rots = [rot0, rot1, rot2, rot3]

    # Precompute K constants (sine table) used in each step
    kspace = [0] * 64
    k = calctable(kspace)

    # Initialize the working hash variables with h0 values
    h = h0.copy()

    # MD5 Padding: The message is padded to be a multiple of 512 bits (64 bytes).
    # 1. Append a single '1' bit (represented as 0x80 byte).
    # 2. Append zero bits until the message length (in bits) is 448 (mod 512).
    # 3. Append the original message length (in bits) as a 64-bit little-endian integer.

    original_bit_len = mlen * 8
    # Use 'latin-1' encoding as MD5 operates on a stream of bytes; each char -> one byte.
    msg_bytes = bytearray(msg.encode('latin-1'))

    # Step 1: Append 0x80
    msg_bytes.append(0x80)

    # Step 2: Append zero bytes
    current_len_mod_64 = len(msg_bytes) % 64
    num_zeros_needed = (56 - current_len_mod_64 + 64) % 64 # Ensure positive result
    msg_bytes.extend(b'\x00' * num_zeros_needed)

    # Step 3: Append original message length (64 bits, little-endian)
    msg_bytes.extend(original_bit_len.to_bytes(8, byteorder='little'))

    # Process the padded message in 64-byte (512-bit) blocks
    num_blocks = len(msg_bytes) // 64

    for block_idx in range(num_blocks):
        block = msg_bytes[block_idx*64 : (block_idx+1)*64]

        # Initialize A, B, C, D for this block with the current hash values
        a, b, c, d = h[0], h[1], h[2], h[3]

        # Main loop for 4 rounds (each round has 16 operations)
        for p in range(4): # p iterates from 0 to 3, representing the four rounds
            fctn = ff[p]
            rotn_values = rots[p] # Rotation amounts for the current round
            m_idx_param = M_indices[p]
            o_offset_param = O_offsets[p]

            for q in range(16): # q iterates from 0 to 15, representing 16 steps in a round
                # Determine the index `g` of the message word (Mj) to use in this step
                g_msg_word_idx = (m_idx_param * q + o_offset_param) % 16

                # Extract the 32-bit message word (Mj) from the current block (little-endian)
                Mj = struct.unpack("<I", block[g_msg_word_idx * 4 : (g_msg_word_idx + 1) * 4])[0]

                # Get the specific rotation amount `s_rot` for this step
                s_rot = rotn_values[q % 4]

                # Calculate the intermediate value that gets rotated and added
                # This is A + F(B,C,D) + Mj + K[i]
                # fctn([0,b,c,d]) correctly passes B, C, D as arguments to the F, G, H, I functions
                f_val_component_to_rotate = (a + fctn([0, b, c, d]) + Mj + k[q + 16 * p]) & 0xFFFFFFFF

                # Calculate the new 'B' register value for this step
                # This corresponds to: B + rol(A + F(B,C,D) + Mj + K[i], s)
                new_b = (b + rol(f_val_component_to_rotate, s_rot)) & 0xFFFFFFFF

                # Rotate the registers (A, B, C, D) for the next step.
                # The MD5 rotation is: A <- D, B <- calculated_value, C <- B, D <- C.
                # This matches the user's original logic: `abcd = [abcd[3], f, abcd[1], abcd[2]]`
                a, b, c, d = d, new_b, b, c

        # After all 64 operations, add this block's final A, B, C, D to the main hash sum.
        # All additions are modulo 2^32.
        h[0] = (h[0] + a) & 0xFFFFFFFF
        h[1] = (h[1] + b) & 0xFFFFFFFF
        h[2] = (h[2] + c) & 0xFFFFFFFF
        h[3] = (h[3] + d) & 0xFFFFFFFF

    return h

def main():
    # Test cases for MD5 to verify correctness
    msg_fox = "The quick brown fox jumps over the lazy dog"
    d_fox = md5(msg_fox, len(msg_fox))
    print("MD5 ENCRYPT (\"The quick brown fox jumps over the lazy dog\"):")
    # Expected MD5 hash: 9e107d9d372bb6826bd81d3542a419d6
    print("".join(f"{val:08x}" for val in d_fox))

    msg_hello = "Hello World!"
    d_hello = md5(msg_hello, len(msg_hello))
    print("\nMD5 ENCRYPT (\"Hello World!\"):")
    # Expected MD5 hash: 3e25960a79dbc69b674cd4ec67a72c65
    print("".join(f"{val:08x}" for val in d_hello))

    msg_empty = "" # Empty string
    d_empty = md5(msg_empty, len(msg_empty))
    print("\nMD5 ENCRYPT (empty string):")
    # Expected MD5 hash: d41d8cd98f00b204e9800998ecf8427e
    print("".join(f"{val:08x}" for val in d_empty))

# Call main function to run the examples when the script is executed
if __name__ == "__main__":
    main()


MD5 ENCRYPT ("The quick brown fox jumps over the lazy dog"):
9d7d109e82b62b37351dd86bd619a442

MD5 ENCRYPT ("Hello World!"):
876207ed36862e53921e845e8c0dc5bf

MD5 ENCRYPT (empty string):
d98c1dd404b2008f980980e97e42f8ec
