In [18]:
import sys
import pandas as pd
import numpy as np
sys.path.append("../../../../2021/src/otp/")
from otp_utils import (
    as_integers,
    as_binary_strings,
    as_binary_string,
    print_as_binary,
    xor
)

In [11]:
as_integers("wena JP")

[119, 101, 110, 97, 32, 74, 80]

In [12]:
bin(32)[2:].zfill(8)

'00100000'

In [13]:
bin(48)[2:].zfill(8)

'00110000'

In [14]:
bin(64)[2:].zfill(8)

'01000000'

In [15]:
bin(96)[2:].zfill(8)

'01100000'

In [8]:
bin(127)[2:].zfill(8)

'01111111'

In [19]:
def h_prima(H: int, mensaje: str) -> str:
    
    # Se divide el h0 en 4 partes de la siguiente forma
    mascara = 0xFFFFFFFF
    a0 = (H & (mascara << 96)) >> 96
    b0 = (H & (mascara << 64)) >> 64
    c0 = (H & (mascara << 32)) >> 32
    d0 = H & mascara
    
    # Se especifica los shifts por ronda
    s = []
    s[0:15] =  [ 7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22 ]
    s[16:31] = [ 5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20 ]
    s[32:47] = [ 4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23 ]
    s[48:63] = [ 6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21 ]
      
    k = []
    for i in range(64):
        k.append(int(np.floor(2**32*abs(np.sin(i + 1)))) & mascara)
    
    # Se inicializan las variables
    A = a0
    B = b0
    C = c0
    D = d0
    
    # Se divide el mensaje en chunks de 32 bits
    M = []
    for g in range(0,len(mensaje),4):
        M.append(int.from_bytes(mensaje[g:g+4], byteorder='little'))
    
    for j in range(64):
        F,g = 0,0
        if 0 <= j <= 15:
            F = (B & C) | ((~ B) & D)
            g = j
        elif 16 <= j <= 31:
            F = (D & B) | ((~ D) & C)
            g = (5*j + 1) % 16
        elif 32 <= j <= 47:
            F = B ^ C ^ D
            g = (3*j + 5) % 16
        elif 48 <= j <= 63:
            F = C ^ (B | (~ D))
            g = (7*j) % 16
        
        F = (F + A + k[j] + M[g]) & mascara
        A = D
        D = C
        C = B
        B = (B + (F << s[j] | F >> (32-s[j])) & mascara) & mascara
        
    a0 = (a0 + A) & mascara
    b0 = (b0 + B) & mascara
    c0 = (c0 + C) & mascara
    d0 = (d0 + D) & mascara

    return a0 + (b0 << 32) + (c0 << 64) + (d0 << 96)

In [20]:
def md5_to_hex(digest):
    raw = digest.to_bytes(16, byteorder='little')
    return '{:032x}'.format(int.from_bytes(raw, byteorder='big'))

In [38]:
def custom_md5(m: str, h0: int) -> str:
    # Argumentos:
    #  m: str - mensaje
    # h0: int - constante inicial H_0
    # Retorna:
    #  str - hash MD5 correcto del mensaje en formato hexadecimal
    
    # lo primero es dejar el mensaje divisible por 512
    
    # Ahora se agregar√°n un uno y ceros hasta que quede divisible por 512
        
    # Se agrega el largo original del mensaje
    message = bytearray(m, 'utf-8') #copy our input into a mutable buffer
    orig_len_in_bits = (8 * len(message)) & 0xffffffffffffffff
    message.append(0x80)
    while len(message)%64 != 56:
        message.append(0)
    message += orig_len_in_bits.to_bytes(8, byteorder='little')

        
    # Ahora se toman los estados para calcular el h_prima
    a0 = h0
    b0 = 0xefcdab89
    c0 = 0x98badcfe
    d0 = 0x10325476
    H = (a0 << 96) + (b0 << 64) + (c0 << 32) + d0
    for i in range(0,len(message),64):
        H = h_prima(H, message[i: i + 64])
        
    return md5_to_hex(H)   

In [90]:
df = pd.read_csv("../../../../2021/tareas/tarea1/mensajes_pregunta_3/mensajes_pregunta_3.csv", header=None, names=["indice", "mensajes"])
count = 0
mensajes = []
while custom_md5("fcjimenez@uc.cl", 16207084 * 100 + count ) in df["indice"].unique():
    mensajes.append(df[df["indice"] == custom_md5("fcjimenez@uc.cl", 16207084 * 100 + count )]["mensajes"].item())
    count+=1

In [91]:
mensajes

['01111010000001100001011000000000001010000001011101001101010111100000101000000111',
 '00110011000111010101001100000100001011010101100101000000010000110000101000011011',
 '00110100000000110001101000011111001110000101011001001111010100010100100000000001',
 '00111111010011100001011100000011001011110100001001001001010101010100010000011000',
 '00111011000111000000101001001100001111000100010101001011010111110100110001001011',
 '00110101000010000101001100011000001001000101001000000100010101100100101100000110',
 '00101001000001110001010100000101001011110101011001010000010110010100010100000111',
 '01111010000000010001010101001100001011010101100100000100010110000100001100011011',
 '00101110000000010000000100000101001011110101011001001000000100000100110000000110',
 '00111001000110100101110101001100000011010101100101000000000100000100010100001000',
 '01111010000110100001101100001101001110000001011101001011010100110100100000001110',
 '001010010000011100011100000000100110110001100100010010010101100

In [92]:
len(mensajes)

200