# Pregunta 3

A continuación se importan algunas librerías estándar de ``python`` que serán de utilidad. Luego se definen funciones auxiliares que serán usadas para simular funciones de hash conocidas (útiles para testing):

In [157]:
# Standard library
from hashlib import md5, sha256

# Auxiliary functions

# Wrapper for MD5 hash function
def hash_md5(string):
    return md5(string.encode()).hexdigest()

# Wrapper for SHA-256 hash function
def hash_sha256(string):
    return sha256(string.encode()).hexdigest()

# Fake hash function that returns the same string (for testing)
def hash_fake(string):
    return string

La implementación del Merkle Tree se basa en objetos que representan a cada nodo, cada uno guardando una referencia a su hermano y padre, además del lado que le corresponde (izquierda o derecha) y su valor de hash. Esta construcción se realiza Bottom-Up (desde las hojas hasta la raíz), utilizando el algoritmo BFS y considerando los casos especiales de nodos sin hermano que deberán duplicarse. Teniendo todo esto, la obtención de la raíz y de la prueba de pertenencia son directas, simplemente se debe recorrer el árbol desde las hojas hacia arriba mientras se realizan chequeos en cada nodo. La clase se define a continuación:

In [158]:
# Merkle Tree representation
class MerkleTree:
    # Object representation for each node
    class Node:
        def __init__(self, value, level=0):
            self.value = value
            self.level = level
            self.parent = None
            self.sibling = None
            self.side = ''

    # Construct tree
    def __init__(self, strings, hash_func):
        """
        Arguments:
            strings: The set of strings S to be represented by the tree.
            hash_func: An arbitrary cryptographic hash function.
        """
        self.hash = hash_func
        self.leaves = [self.Node(self.hash(s)) for s in strings]

        # Bottom-Up tree generation using BFS
        nodes = [node for node in self.leaves]
        previous = None
        current = None
        level = 0
        while nodes:
            current = nodes.pop(0)
            if previous and current.level > level:  # Lonely node, self duplicate and create parent
                duplicated = MerkleTree.Node(previous.value, level=previous.level)
                parent = MerkleTree.Node(self.hash(previous.value + duplicated.value), level=previous.level + 1)
                previous.parent = parent
                previous.sibling = duplicated
                previous.side = 'i'
                duplicated.parent = parent
                duplicated.sibling = previous
                duplicated.side = 'd'
                nodes.append(parent)
                previous = current
            elif previous:  # Create parent of previous and current
                parent = MerkleTree.Node(self.hash(previous.value + current.value), level=current.level + 1)
                previous.parent = parent
                previous.sibling = current
                previous.side = 'i'
                current.parent = parent
                current.sibling = previous
                current.side = 'd'
                nodes.append(parent)
                previous = None
            else:  # Wait for next node to create a parent
                previous = current
            level = current.level  # Update current tree level

        # Special Case: The tree only represents one string
        if not level:
            duplicated = MerkleTree.Node(previous.value, level=previous.level)
            parent = MerkleTree.Node(self.hash(previous.value + duplicated.value), level=previous.level + 1)
            previous.parent = parent
            previous.sibling = duplicated
            previous.side = 'i'
            duplicated.parent = parent
            duplicated.sibling = previous
            duplicated.side = 'd'

    # Get root of the tree
    def get_root(self):
        current = self.leaves[0]
        while current.parent:
            current = current.parent
        return current.value

    # Obtain necessary values to verify an item of the tree
    def get_proof_for(self, item):
        hashed_item = self.hash(item)
        leaf_values = [l.value for l in self.leaves]
        if hashed_item not in leaf_values:
            return None
        proof = []
        current = self.leaves[leaf_values.index(hashed_item)]
        while current.parent:
            sibling = current.sibling
            proof.append((sibling.value, sibling.side))
            current = current.parent
        return proof

    # Visual representation of the tree
    def __str__(self):
        repr_str = 'L0: '
        nodes = [node for node in self.leaves]
        previous = None
        current = None
        level = 0
        while nodes:
            current = nodes.pop(0)
            if current.level > level:
                level += 1
                repr_str += f'\nL{level}: '
                previous = None
            if previous:
                previous = None
            else:
                if current.parent:
                    repr_str += f'L({current.value}) R({current.sibling.value}) '
                    nodes.append(current.parent)
                else:
                    repr_str += f'ROOT({current.value})'
                previous = current
        return repr_str

La siguiente función permite verificar si un elemento es una de las hojas de un árbol, dado que se conoce la raíz de dicho árbol y se tiene una prueba de pertenencia. La idea es reconstruir la raíz del árbol mediante la información de la prueba, y finalmente comparar la raíz obtenida con la conocida. En caso de que sean iguales, el elemento pertenece. En caso contrario, se puede concluir que el elemento fue alterado y ya no contiene información válida.

In [159]:
# Verify proof for a given item and tree root
def verify(root, item, proof, hash_func):
    """
    Arguments:
        root: The root of a Merkle Tree.
        item: An arbitrary string.
        proof: An alleged proof that item is part of a Merkle Tree with the previous root.
        hash_func: An arbitrary cryptographic hash function.
    Returns:
        correct: Whether the proof is correct or not.
    """
    # Reconstruct root using the proof
    hash_value = hash_func(item)
    for step in proof:
        sibling, side = step
        if side == 'd':
            hash_value = hash_func(hash_value + sibling)
        else:
            hash_value = hash_func(sibling + hash_value)

    # Compare computed root with the real root, if they are equal then the proof was correct
    return hash_value == root

A modo de testing, a continuación se utiliza un Merkle Tree con hash SHA-256 para detectar las modificaciones realizadas a un mensaje:

In [160]:
# Test Merkle Tree
if __name__ == '__main__':
    # Strings to represent
    #strings = ['s1', 's2', 's3', 's4', 's5', 's6']
    #strings = ['bitcoin', 'ethereum', 'tether', 'binance', 'usdc']
    strings = ['This i', 's a legit m', 'essage that is', 'very import', 'ant']
    corrupted_strings = ['This i', 's a fake m', 'essage that is', 'not import', 'ant']

    # Merkle Tree
    tree = MerkleTree(strings, hash_sha256)
    #print(tree)

    # Verification
    for idx in range(len(strings)):
        item = strings[idx]
        corrupted_item = corrupted_strings[idx]
        result = verify(tree.get_root(), corrupted_item, tree.get_proof_for(item), hash_sha256)
        print(f'REAL: {item}, OBTAINED: {corrupted_item}, VALID: {result}')

REAL: This i, OBTAINED: This i, VALID: True
REAL: s a legit m, OBTAINED: s a fake m, VALID: False
REAL: essage that is, OBTAINED: essage that is, VALID: True
REAL: very import, OBTAINED: not import, VALID: False
REAL: ant, OBTAINED: ant, VALID: True
