## Test Serializer

In [None]:
import json, os
from ipld import marshal, unmarshal
import rlp
import json
import base64

In [None]:
class LakatSerializer:
    
    def serialize(o) -> bytes :
        return marshal(o)

    def deserialize(data: bytes) -> any :
        return unmarshal(data)
    
class BytesEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, bytes):
            return base64.b64encode(obj).decode('utf-8')
        # Let the base class default method raise the TypeError
        return json.JSONEncoder.default(self, obj)
    

def custom_json_decoder(obj):
    """
    Custom decoder function for JSON objects.
    Converts Base64 encoded strings back into bytes.
    """
    if isinstance(obj, dict):
        for key, value in obj.items():
            if isinstance(value, str):
                try:
                    obj[key] = base64.b64decode(value)
                except (ValueError, TypeError):
                    pass
    return obj

def serialize(obj):
    """
    Serialize a Python object into bytes.
    
    :param obj: Python object to serialize.
    :return: Bytes representation of the object.
    """
    json_string = json.dumps(obj, cls=BytesEncoder)
    return json_string.encode('utf-8')

def unserialize(bytes_obj):
    """
    Unserialize bytes back into a Python object.
    
    :param bytes_obj: Bytes object to unserialize.
    :return: Original Python object.
    """
    return json.loads(bytes_obj.decode('utf-8'), object_hook=custom_json_decoder)


def jsondump(obj, file):
    json.dump(obj, file, cls=BytesEncoder, indent=2)


def jsonload(file):
    return json.load(file, object_hook=custom_json_decoder)

In [5]:
# print 20 fruits
fruits = ['apple', 'banana', 'cherry', 'dragonfruit', 'elderberry', 'fig', 'grape', 'honeydew', 'imbe', 'jackfruit', 'kiwi', 'lemon', 'mango', 'nectarine', 'orange', 'papaya', 'quince', 'raspberry', 'strawberry', 'tangerine']

In [12]:
import cbor2
import hashlib
import multihash
import base58
import cid

def compute_multihash(data):
    # Compute SHA-256 hash using hashlib
    sha256_hash = hashlib.sha256(data).digest()

    # Use multihash to encode the hash in multihash format
    encoded_multihash = multihash.encode(sha256_hash, 'sha2-256')

    # Base58 encode the multihash
    return base58.b58encode(encoded_multihash)

# Serialize the objects using CBOR

bob_serialized = cbor2.dumps({"name": "Bob", "age": 32, "likes":fruits})
charlie_serialized = cbor2.dumps({"name": "Charlie", "age": 36})

print('bob_serialized:', bob_serialized)
# Compute the base58-encoded multihashes
bob_multihash = compute_multihash(bob_serialized)
charlie_multihash = compute_multihash(charlie_serialized)

print('bob_multihash:', bob_multihash)
print('bob_multihash (decoded):', bob_multihash.decode()[0:5])
# Create CIDs from the base58-encoded multihashes
bob_cid = cid.make_cid(1, 'cbor', bob_multihash[0:5])
charlie_cid = cid.make_cid(charlie_multihash)

# Print the CIDs
print("Bob's CID:", bob_cid)
print("Charlie's CID:", charlie_cid)


bob_serialized: b'\xa3dnamecBobcage\x18 elikes\x94eapplefbananafcherrykdragonfruitjelderberrycfigegrapehhoneydewdimbeijackfruitdkiwielemonemangoinectarineforangefpapayafquinceiraspberryjstrawberryitangerine'
bob_multihash: b'QmQgkr1e1kP3R6H1nPeCobjhC6iT9isDezUdL8qShmL85S'
bob_multihash (decoded): QmQgk
Bob's CID: z3tyXPDz4z
Charlie's CID: QmbCFxvC1bFes6MQePeKqTZJ52VMyehYGj7aDByCLQmyMg


In [42]:
ALGORITHM = 'sha2-256'
ENCODING_FUNCTION = 'utf-8'
MH_BRANCHID_PREFIX_LENGTH = 8

def get_hashing_algorithm(algorithm):
    if algorithm.startswith('sha2-'):
        hash_length = int(algorithm.split('-')[1])
        return 'sha' + str(hash_length)
    else:
        return algorithm

def __get_multihash_from_bytes(bytes_data):
    hashlib_algorithm = get_hashing_algorithm(ALGORITHM)
    sha256_hash = hashlib.__getattribute__(hashlib_algorithm)(bytes_data).digest()
    return multihash.encode(sha256_hash, ALGORITHM)

def get_multihash(data: str):
    bytes_data = data.encode(ENCODING_FUNCTION)
    return __get_multihash_from_bytes(bytes_data)

# def _get_branch_modified_multihash_from_prefix_length(data: str, branchId: str, prefix_length: int):
#     multihash = get_multihash(data)
#     return multihash[0:2] + b'\x00' + multihash[3:]

# def get_branch_modified_multihash(data: str, branchId: str):
#     return _get_branch_modified_multihash_from_prefix_length(data, branchId, MH_BRANCHID_PREFIX_LENGTH)

# sha256_hash, encoded_multihash

In [36]:
# hashlib.__getattribute__('sha256')('hallo'.encode()).digest()
# hashlib.algorithms_guaranteed
sha256_hash = hashlib.sha256(b'blabla').digest()
# type(sha256_hash), 
sha256_hash

b'\xcc\xad\xd9\x9b\x16\xcd= \x0c"\xd6\xdbE\xd8\xb6c\x0e\xf3\xd96vq\'4~\xc8\xa7j\xb9\x92\xc2\xea'

In [37]:
import hashlib

# Define a mapping of numbers to syllables
syllables = ['kec', 'num', 'her', 'lar', 'mis', 'ron', 'siv', 'tac', 'yul', 'zum']

def hash_to_syllables(data):
    # Hash the data using SHA-256
    hash_digest = hashlib.sha256(data.encode()).digest()

    # Convert the hash to a large integer
    hash_int = int.from_bytes(hash_digest, byteorder='big')

    # Convert the integer to a sequence of syllables
    syllable_sequence = []
    while hash_int > 0:
        hash_int, idx = divmod(hash_int, len(syllables))
        syllable_sequence.append(syllables[idx])

    return '-'.join(syllable_sequence)

# Example usage
data = "Hello, world!"
syllable_representation = hash_to_syllables(data)
print(syllable_representation)


num-num-mis-zum-zum-siv-ron-num-her-zum-ron-mis-num-tac-kec-her-lar-tac-lar-tac-num-mis-lar-siv-lar-lar-lar-zum-siv-zum-lar-tac-mis-siv-siv-siv-yul-mis-lar-her-mis-tac-ron-kec-her-kec-yul-mis-ron-tac-kec-siv-lar-tac-ron-kec-num-tac-kec-lar-yul-yul-mis-her-zum-lar-tac-her-kec-mis-num-yul-num-lar-lar-her-her


In [38]:
import itertools

vowels = 'aeiou'
consonants = 'bcdfghjklmnpqrstvwxyz'

# Generate CVCVC words
def generate_cvcvc_words():
    return [''.join(word) for word in itertools.product(consonants, vowels, consonants, vowels, consonants)]

cvcvc_words = generate_cvcvc_words()

# Forward mapping: Number to Word
def number_to_word(number):
    return cvcvc_words[number]

# Reverse mapping: Word to Number
def word_to_number(word):
    def get_index(letter, group):
        return group.index(letter)

    c1, v1, c2, v2, c3 = word
    return (get_index(c1, consonants) * len(vowels) * len(consonants) * len(vowels) * len(consonants) +
            get_index(v1, vowels) * len(consonants) * len(vowels) * len(consonants) +
            get_index(c2, consonants) * len(vowels) * len(consonants) +
            get_index(v2, vowels) * len(consonants) +
            get_index(c3, consonants))

# Example usage
num = 12345
word = number_to_word(num)
reversed_num = word_to_number(word)

print(f"Number: {num}, Word: {word}, Reversed Number: {reversed_num}")


Number: 12345, Word: caqix, Reversed Number: 12345


In [40]:
len(cvcvc_wordbs), 21*5*21*5*21

(231525, 231525)

In [56]:
import itertools

vowels = 'aeiou'
consonants = 'bcdfghjklmnpqrstvwxyz'

# Generate CVCVC words
def generate_cvcvc_words():
    return [''.join(word) for word in itertools.product(consonants, vowels, consonants, vowels, consonants)]

cvcvc_words = generate_cvcvc_words()

# Adjusted the size of CVCVC words list to avoid index out of range
words_count = len(cvcvc_words)

# Forward mapping: Number to Word Sequence
def number_to_word_sequence(number):
    word_sequence = []
    while number > 0:
        number, idx = divmod(number, words_count)
        word_sequence.append(cvcvc_words[idx])
    return '-'.join(reversed(word_sequence))

def get_index(letter, group):
        return group.index(letter)

def single_word_to_number(word):
        c1, v1, c2, v2, c3 = word
        return (get_index(c1, consonants) * len(vowels) * len(consonants) * len(vowels) * len(consonants) +
                get_index(v1, vowels) * len(consonants) * len(vowels) * len(consonants) +
                get_index(c2, consonants) * len(vowels) * len(consonants) +
                get_index(v2, vowels) * len(consonants) +
                get_index(c3, consonants))

# Reverse mapping: Word Sequence to Number
def word_sequence_to_number(word_sequence):
    words = word_sequence.split('-')
    number = 0
    for word in words:
        number = number * words_count + single_word_to_number(word)
    return number

# Example usage
num = 123456789
word_sequence = number_to_word_sequence(num)
reversed_num = word_sequence_to_number(word_sequence)

print(f"Number: {num}, Word Sequence: {word_sequence}, Reversed Number: {reversed_num}")


Number: 123456789, Word Sequence: bahal-gumut, Reversed Number: 123456789


In [49]:
def hash_to_int(data: str, crop: int = 10) -> int:
    # Hash the data using SHA-256
    hash_digest = hashlib.sha256(data.encode()).digest()
    cropped_hash = hash_digest[:crop]
    # Convert the hash to a large integer
    hash_int = int.from_bytes(cropped_hash, byteorder='big')
    return hash_int, cropped_hash

In [54]:
hash_int, cropped_hash = hash_to_int('hallo', crop=30)
cropped_hash.hex(), number_to_word_sequence(hash_int)

('d3751d33f9cd5049c4af2b462735457e4d3baf130bcbb87f389e349fbaeb',
 'badir-suroc-yelil-hobod-wifij-lakuw-rotev-zoyah-gexaj-cepos-tanix-xunad-pumon-wijak')

In [66]:
data = b'hallo'
sha256_hash = hashlib.sha256(data).digest()

base58.b58encode(sha256_hash)
# Use multihash to encode the hash in multihash format
encoded_multihash = multihash.encode(sha256_hash, 'sha2-256')

# Base58 encode the multihash
a = base58.b58encode(encoded_multihash).decode('utf-8')
b = a.encode('utf-8')
c = base58.b58decode(a)
c, encoded_multihash

(b"\x12 \xd3u\x1d3\xf9\xcdPI\xc4\xaf+F'5E~M;\xaf\x13\x0b\xcb\xb8\x7f8\x9e4\x9f\xba\xeb \xb9",
 b"\x12 \xd3u\x1d3\xf9\xcdPI\xc4\xaf+F'5E~M;\xaf\x13\x0b\xcb\xb8\x7f8\x9e4\x9f\xba\xeb \xb9")

In [69]:
data = b'hallo'
sha256_hash = hashlib.sha256(data).digest()
# base58.b58encode(sha256_hash)
type(sha256_hash)

bytes

In [68]:
import numpy as np
4* np.log(256) / np.log(words_count)

1.7956536638176726

In [93]:
import cbor2
import hashlib
import multihash
import base58
import cid

def compute_multihash(data):
    # Compute SHA-256 hash using hashlib
    sha256_hash = hashlib.sha256(data).digest()

    # Use multihash to encode the hash in multihash format
    encoded_multihash = multihash.encode(sha256_hash, 'sha2-256')

    # Base58 encode the multihash
    return base58.b58encode(encoded_multihash)

# Serialize the objects using CBOR

bob_serialized = cbor2.dumps({"name": "Bob", "age": 32, "likes":fruits})
charlie_serialized = cbor2.dumps({"name": "Charlie", "age": 36})

# Compute the base58-encoded multihashes
bob_multihash = compute_multihash(bob_serialized)
charlie_multihash = compute_multihash(charlie_serialized)

bob_sha256_hash = hashlib.sha256(bob_serialized).digest()
bob_encoded_multihash = multihash.encode(bob_sha256_hash, 'sha2-256')

print(bob_multihash, bob_encoded_multihash)
# Create CIDs from the base58-encoded multihashes
bob_cid = cid.make_cid(1, 'cbor', bob_multihash)
bob_cid_not_encoded = cid.make_cid(1, 'cbor', bob_encoded_multihash)

print(bob_cid, bob_cid_not_encoded)
# Print the CIDs
# print("Bob's CID:", bob_cid)
# print("Bob's CID not encoded:", bob_cid_not_encoded)
bob_cid, bob_cid.multihash, bob_cid.codec



b'QmQgkr1e1kP3R6H1nPeCobjhC6iT9isDezUdL8qShmL85S' b'\x12 "\xdd\xed\xa9k\xfc\xe9\xd2\xe4\xae\xea\x11\xa9 o\xdc\xa9/\x7fx\xf3\x15\xbeBi\xd9\x1a"\xa9\x1c.\xf5'
z3obfhD5TwPHFAzezqGaZcfc24on6nHZeWDgqjyyFrnQ5DjEvp3Mez53Kow51SuHBY zadi4X2DiXgkh1pkRWSFPttRoYDPCDDM8SCMEoy4bsc66B1Ka


(CIDv1(version=1, codec=cbor, multihash=b'QmQgkr1e1kP3R6H1nPeC..'),
 b'QmQgkr1e1kP3R6H1nPeCobjhC6iT9isDezUdL8qShmL85S',
 'cbor')

In [103]:
# # cid.is_cid(bob_cid_not_encoded)
# # cid.cid(bob_cid)
# [ c for c in multihash.constants.HASH_CODES.keys() if 'c' in c.lower()]
# multicodec
cbor2.dumps({}),cbor2.dumps(dict()), cbor2.dumps(None), cbor2.dumps(""), cbor2.dumps(False), cbor2.dumps([]), cbor2.dumps(set())

(b'\xa0', b'\xa0', b'\xf6', b'`', b'\xf4', b'\x80', b'\xd9\x01\x02\x80')