# MSc Data Science and Artificial Intelligence
# DSM070 Blockchain Programming Coursework
# MD2 Cryptographic Hash Function
# ('abc' Input)

## Program: 50% - Essay: 50%

## Step 1: Set-up Before Padding

In [1]:
# set the substitution table
S = [41, 46, 67, 201, 162, 216, 124, 1, 61, 54, 84, 161, 236, 240, 6, 19,
     98, 167, 5, 243, 192, 199, 115, 140, 152, 147, 43, 217, 188, 76, 130, 202,
     30, 155, 87, 60, 253, 212, 224, 22, 103, 66, 111, 24, 138, 23, 229, 18,
     190, 78, 196, 214, 218, 158, 222, 73, 160, 251, 245, 142, 187, 47, 238, 122,
     169, 104, 121, 145, 21, 178, 7, 63, 148, 194, 16, 137, 11, 34, 95, 33,
     128, 127, 93, 154, 90, 144, 50, 39, 53, 62, 204, 231, 191, 247, 151, 3,
     255, 25, 48, 179, 72, 165, 181, 209, 215, 94, 146, 42, 172, 86, 170, 198,
     79, 184, 56, 210, 150, 164, 125, 182, 118, 252, 107, 226, 156, 116, 4, 241,
     69, 157, 112, 89, 100, 113, 135, 32, 134, 91, 207, 101, 230, 45, 168, 2,
     27, 96, 37, 173, 174, 176, 185, 246, 28, 70, 97, 105, 52, 64, 126, 15,
     85, 71, 163, 35, 221, 81, 175, 58, 195, 92, 249, 206, 186, 197, 234, 38,
     44, 83, 13, 110, 133, 40, 132, 9, 211, 223, 205, 244, 65, 129, 77, 82,
     106, 220, 55, 200, 108, 193, 171, 250, 36, 225, 123, 8, 12, 189, 177, 74,
     120, 136, 149, 139, 227, 99, 232, 109, 233, 203, 213, 254, 59, 0, 29, 57,
     242, 239, 183, 14, 102, 88, 208, 228, 166, 119, 114, 248, 235, 117, 75, 10,
     49, 68, 80, 180, 143, 237, 31, 26, 219, 153, 141, 51, 159, 17, 131, 20]

In [2]:
# set the input to abc
msg = "abc"
block_size = 16

# convert the string to byte list
msg_bytes = list(str.encode(msg))

## Step 2: Append Padding Bytes

> Padding was used to get a full block size to the encrypted data

In [3]:
# append i bytes of the vlaue i, so here for abc 13 bytes will be appended with values 13
if len(msg_bytes) % 16 == 0:
    missing_bytes = 16 
else:
    missing_bytes = 16-(len(msg_bytes) % 16)
while True:
    msg_bytes.append(missing_bytes)
    if len(msg_bytes) % 16 == 0:
        break    

In [4]:
# set the block numbers which was used when iterating through more than 1 block
block_no = int(len(msg_bytes) / block_size)

In [5]:
print("Message before padding: ", list(str.encode(msg)))
print("Length of message before padding: " + str(len(msg)), '\n')

print("Extended message after padding: ", msg_bytes)
print("Length after padding: " + str(missing_bytes), '\n')

print("Padded message becomes congruent to zero: ", len(msg_bytes) % 16)
print("Blocks nummber: " + str(block_no))

Message before padding:  [97, 98, 99]
Length of message before padding: 3 

Extended message after padding:  [97, 98, 99, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13]
Length after padding: 13 

Padded message becomes congruent to zero:  0
Blocks nummber: 1


In [6]:
# set a byte_array function to print the byte block values as a table
def byte_array():
    
    # print block number in the first row
    print('0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t13\t14\t15')

    for i in range(block_no):
        line_abb = "" 
        
        for j in range(block_size):          
            line_abb = line_abb + str(msg_bytes[i * block_size + j]) + '\t'
        
        print(line_abb)

# get the abc array by block
byte_array()

0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15
97	98	99	13	13	13	13	13	13	13	13	13	13	13	13	13	


## Step 3: Append Checksum

In [7]:
# clear the checksum to be zero
padded_checksum = block_size * [0]

# generate the checksum
l = 0
for i in range(block_no):
    for j in range(block_size):
        x = msg_bytes[i * block_size + j]
        l = S[(x ^ l)] 
        l = l ^ padded_checksum[j]
        padded_checksum[j] = l

# add the checksum to the padded message
msg_bytes = msg_bytes + padded_checksum
block_no = block_no + 1

byte_array()

0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15
97	98	99	13	13	13	13	13	13	13	13	13	13	13	13	13	
25	226	157	27	115	4	54	142	89	90	39	111	48	47	87	204	


## Step 4: Initialize MD Buffer

In [8]:
# initialize the message digest to zero using 3 blocks of 16-byte length
md2_digest = [0] * 3 * block_size

# Step 5: Process Message in 16-Byte Blocks

In [9]:
# generate the hashing
for i in range(block_no):    
    for j in range(block_size):
        md2_digest[block_size + j] = msg_bytes[i * block_size + j]
        md2_digest[2 * block_size + j] = md2_digest[block_size + j] ^ md2_digest[j]
        
    pre_byte = 0
    
    # iterate for 18 rounds
    for k in range(18):
        for l in range(3 * block_size):
            pre_byte = md2_digest[l] ^ S[pre_byte]
            md2_digest[l] = pre_byte
        pre_byte = (pre_byte + k) % len(S)

for x in range(int(len(md2_digest) / block_size)):
    print(''.join(map(lambda y: str(y) + '\t', md2_digest[x: x + block_size])))

218	133	59	13	63	136	217	155	48	40	58	105	230	222	214	187	
133	59	13	63	136	217	155	48	40	58	105	230	222	214	187	146	
59	13	63	136	217	155	48	40	58	105	230	222	214	187	146	101	


# Step 6: Output

In [10]:
# calculate the abc hash and take the first 16 bytes of the digest
# convert the list with bytes to hexadecimal format string
result = "".join([format(x, '02x') for x in md2_digest[:16]])
print("Expected:\n", "abc hash:", result, "\n")
print("Actual:\n", "abc hash:", "da853b0d3f88d99b30283a69e6ded6bb" )

# verify equality of expected and actual abc hash 
result == str("da853b0d3f88d99b30283a69e6ded6bb")

Expected:
 abc hash: da853b0d3f88d99b30283a69e6ded6bb 

Actual:
 abc hash: da853b0d3f88d99b30283a69e6ded6bb


True

# References:
- Kaliski, Burt (April 1992). The MD2 Message-Digest Algorithm. IETF. p. 3. doi:10.17487/RFC1319. RFC 1319. Retrieved 22 November 2014
- MD2 (Hashes and message digests) - YouTube. [online]. Available from: https://www.youtube.com/watch?v=BTf2zsvjvS0&t=370 [Accessed May 8, 2022].