# MSc Data Science and Artificial Intelligence
# DSM070 Blockchain Programming Coursework
# MD2 Cryptographic Hash Function
# (Testing Suite Input)

## Program: 50% - Essay: 50%

In [12]:
# set the substitution table
S = [41, 46, 67, 201, 162, 216, 124, 1, 61, 54, 84, 161, 236, 240, 6, 19,
     98, 167, 5, 243, 192, 199, 115, 140, 152, 147, 43, 217, 188, 76, 130, 202,
     30, 155, 87, 60, 253, 212, 224, 22, 103, 66, 111, 24, 138, 23, 229, 18,
     190, 78, 196, 214, 218, 158, 222, 73, 160, 251, 245, 142, 187, 47, 238, 122,
     169, 104, 121, 145, 21, 178, 7, 63, 148, 194, 16, 137, 11, 34, 95, 33,
     128, 127, 93, 154, 90, 144, 50, 39, 53, 62, 204, 231, 191, 247, 151, 3,
     255, 25, 48, 179, 72, 165, 181, 209, 215, 94, 146, 42, 172, 86, 170, 198,
     79, 184, 56, 210, 150, 164, 125, 182, 118, 252, 107, 226, 156, 116, 4, 241,
     69, 157, 112, 89, 100, 113, 135, 32, 134, 91, 207, 101, 230, 45, 168, 2,
     27, 96, 37, 173, 174, 176, 185, 246, 28, 70, 97, 105, 52, 64, 126, 15,
     85, 71, 163, 35, 221, 81, 175, 58, 195, 92, 249, 206, 186, 197, 234, 38,
     44, 83, 13, 110, 133, 40, 132, 9, 211, 223, 205, 244, 65, 129, 77, 82,
     106, 220, 55, 200, 108, 193, 171, 250, 36, 225, 123, 8, 12, 189, 177, 74,
     120, 136, 149, 139, 227, 99, 232, 109, 233, 203, 213, 254, 59, 0, 29, 57,
     242, 239, 183, 14, 102, 88, 208, 228, 166, 119, 114, 248, 235, 117, 75, 10,
     49, 68, 80, 180, 143, 237, 31, 26, 219, 153, 141, 51, 159, 17, 131, 20]

In [13]:
block_size = 16

# define an MD2 function to iterate thrugh all the test suite messages
def MD2(msg):

    # convert the string to byte list
    msg_bytes = list(str.encode(msg))
    
    # append i bytes of the vlaue i
    if len(msg_bytes) % 16 == 0:
        missing_bytes = 16 
    else:
        missing_bytes = 16-(len(msg_bytes) % 16)
    while True:
        msg_bytes.append(missing_bytes)
        if len(msg_bytes) % 16 == 0:
            break     

    # set the block numbers which was used when iterating through more than 1 block
    block_no = int(len(msg_bytes) / block_size)

    
    # clear the checksum to be zero
    padded_checksum = block_size * [0]
    
    # generate the checksum
    l = 0
    for i in range(block_no):
        for j in range(block_size):
            x = msg_bytes[i * block_size + j]
            l = S[(x ^ l)] 
            l = l ^ padded_checksum[j]
            padded_checksum[j] = l

    # add the checksum to the padded message
    msg_bytes = msg_bytes + padded_checksum
    block_no = block_no + 1

    # initialize the message digest to zero using 3 blocks of 16-byte length
    md2_digest = [0] * 3 * block_size


    # generate the hashing
    for i in range(block_no):    
        for j in range(block_size):
            md2_digest[block_size + j] = msg_bytes[i * block_size + j]
            md2_digest[2 * block_size + j] = md2_digest[block_size + j] ^ md2_digest[j]

        pre_byte = 0
        
        # iterate for 18 rounds
        for k in range(18):
            for l in range(3 * block_size):
                pre_byte = md2_digest[l] ^ S[pre_byte]
                md2_digest[l] = pre_byte
            pre_byte = (pre_byte + k) % len(S)

    # calculate the hash and take the first 16 bytes of the digest
    # convert the list with bytes to hexadecimal format string
    return "" .join([format(x, '02x') for x in md2_digest[:16]])

In [14]:
test_suite = {
("") : "8350e5a3e24c153df2275c9f80692773",
("a") : "32ec01ec4a6dac72c0ab96fb34c0b5d1",
("abc") : "da853b0d3f88d99b30283a69e6ded6bb",
("message digest") : "ab4f496bfb2a530b219ff33031fe06b0",
("abcdefghijklmnopqrstuvwxyz") : "4e8ddff3650292ab5a4108c3aa47940b",
("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") :
"da33def2a42df13975352846c30338cd",
("12345678901234567890123456789012345678901234567890123456789012345678901234567890") : 
"d5976f79d83d3a0dc9806c3c66f3efd8"
}

# verify equality of all expected and actual hash messages
for k, v in test_suite.items():
    print(k,":")
    print("Expected:", MD2(k))
    print("Actual  :", v, "\n")
    print("-------------------------------------------")

 :
Expected: 8350e5a3e24c153df2275c9f80692773
Actual  : 8350e5a3e24c153df2275c9f80692773 

-------------------------------------------
a :
Expected: 32ec01ec4a6dac72c0ab96fb34c0b5d1
Actual  : 32ec01ec4a6dac72c0ab96fb34c0b5d1 

-------------------------------------------
abc :
Expected: da853b0d3f88d99b30283a69e6ded6bb
Actual  : da853b0d3f88d99b30283a69e6ded6bb 

-------------------------------------------
message digest :
Expected: ab4f496bfb2a530b219ff33031fe06b0
Actual  : ab4f496bfb2a530b219ff33031fe06b0 

-------------------------------------------
abcdefghijklmnopqrstuvwxyz :
Expected: 4e8ddff3650292ab5a4108c3aa47940b
Actual  : 4e8ddff3650292ab5a4108c3aa47940b 

-------------------------------------------
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 :
Expected: da33def2a42df13975352846c30338cd
Actual  : da33def2a42df13975352846c30338cd 

-------------------------------------------
12345678901234567890123456789012345678901234567890123456789012345678901234567890 :
E

# Full Report for all the Test Suite Messages:

In [15]:
# set a byte_array function to print the byte block values as a table
def byte_array():
    
    # print block number in the first row
    print('0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t13\t14\t15')

    for i in range(block_no):
        line_abb = "" 
        
        for j in range(block_size):          
            line_abb = line_abb + str(msg_bytes[i * block_size + j]) + '\t'
        
        print(line_abb)

In [16]:
block_size = 16

# define an MD2 function to iterate thrugh all the test suite messages
def MD2(msg):
    
    # set those variables as global so it can be called out of the function
    global msg_bytes
    global block_no
    global msg_bytes

    # convert the string to byte list
    msg_bytes = list(str.encode(msg))
    
    # append i bytes of the vlaue i
    if len(msg_bytes) % 16 == 0:
        missing_bytes = 16 
    else:
        missing_bytes = 16-(len(msg_bytes) % 16)
    while True:
        msg_bytes.append(missing_bytes)
        if len(msg_bytes) % 16 == 0:
            break     

    # set the block numbers which was used when iterating through more than 1 block
    block_no = int(len(msg_bytes) / block_size)

    print("Message before padding: ", list(str.encode(msg)))
    print("Length of message before padding: " + str(len(msg)), '\n')

    print("Extended message after padding: ", msg_bytes)
    print("Length after padding: " + str(missing_bytes), '\n')

    print("Blocks nummber: " + str(block_no))
    
    # get the array by block
    print(byte_array()) 

    # clear the checksum to be zero
    padded_checksum = block_size * [0]
    
    # generate the checksum
    l = 0
    for i in range(block_no):
        for j in range(block_size):
            x = msg_bytes[i * block_size + j]
            l = S[(x ^ l)] 
            l = l ^ padded_checksum[j]
            padded_checksum[j] = l

    # add the checksum to the padded message
    msg_bytes = msg_bytes + padded_checksum
    block_no = block_no + 1

    # initialize the message digest to zero using 3 blocks of 16-byte length
    md2_digest = [0] * 3 * block_size

    # generate the hashing
    for i in range(block_no):    
        for j in range(block_size):
            md2_digest[block_size + j] = msg_bytes[i * block_size + j]
            md2_digest[2 * block_size + j] = md2_digest[block_size + j] ^ md2_digest[j]

        pre_byte = 0
        
        # iterate for 18 rpunds
        for k in range(18):
            for l in range(3 * block_size):
                pre_byte = md2_digest[l] ^ S[pre_byte]
                md2_digest[l] = pre_byte
            pre_byte = (pre_byte + k) % len(S)

    # calculate the hash and take the first 16 bytes of the digest
    # convert the list with bytes to hexadecimal format string
    return "" .join([format(x, '02x') for x in md2_digest[:16]])

In [17]:
test_suite = {
("") : "8350e5a3e24c153df2275c9f80692773",
("a") : "32ec01ec4a6dac72c0ab96fb34c0b5d1",
("abc") : "da853b0d3f88d99b30283a69e6ded6bb",
("message digest") : "ab4f496bfb2a530b219ff33031fe06b0",
("abcdefghijklmnopqrstuvwxyz") : "4e8ddff3650292ab5a4108c3aa47940b",
("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") :
"da33def2a42df13975352846c30338cd",
("12345678901234567890123456789012345678901234567890123456789012345678901234567890") : 
"d5976f79d83d3a0dc9806c3c66f3efd8"
}

for k, v in test_suite.items():
    print(k,":")
    print("Expected:", MD2(k))
    print("Actual  :", v, "\n")
    print("-------------------------------------------")

 :
Message before padding:  []
Length of message before padding: 0 

Extended message after padding:  [16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16]
Length after padding: 16 

Blocks nummber: 1
0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15
16	16	16	16	16	16	16	16	16	16	16	16	16	16	16	16	
None
Expected: 8350e5a3e24c153df2275c9f80692773
Actual  : 8350e5a3e24c153df2275c9f80692773 

-------------------------------------------
a :
Message before padding:  [97]
Length of message before padding: 1 

Extended message after padding:  [97, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15]
Length after padding: 15 

Blocks nummber: 1
0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15
97	15	15	15	15	15	15	15	15	15	15	15	15	15	15	15	
None
Expected: 32ec01ec4a6dac72c0ab96fb34c0b5d1
Actual  : 32ec01ec4a6dac72c0ab96fb34c0b5d1 

-------------------------------------------
abc :
Message before padding:  [97, 98, 99]
Length of message before padding: 3 

Extended message after padding:  [97, 98, 9

# References:
- Kaliski, Burt (April 1992). The MD2 Message-Digest Algorithm. IETF. p. 3. doi:10.17487/RFC1319. RFC 1319. Retrieved 22 November 2014.
- MD2 (Hashes and message digests) - YouTube. [online]. Available from: https://www.youtube.com/watch?v=BTf2zsvjvS0&t=370 [Accessed May 8, 2022].