# Binary data conversion

Binary data conversion allows conversion of typed data to and from a binary neutral format.

## Converting data to binary format
Python is able to convert typed data into a standard non-typed byte array format through various methods.

In [1]:
# the bytearray function ensures the conversion fo string data to 
# byte array format using a specified encoding
string_to_encode = "A text input for encoding"
encoding_bytes = bytearray(string_to_encode, encoding="utf-8")

print("The encoded form of {0} is {1} .".format(string_to_encode, encoding_bytes))

# the byte arrays can be decoded using the decode method
decoded_string = encoding_bytes.decode("utf-8")
print("The decoded form of {0} is {1} .".format(encoding_bytes, decoded_string))

The encoded form of A text input for encoding is bytearray(b'A text input for encoding') .
The decoded form of bytearray(b'A text input for encoding') is A text input for encoding .


The other data types are usually transformed in binary format using the pack and unpack functions of the struct module. These functions ensure the transformation of typed data (such as float, int or boolean) to binary format and back. 

In [2]:
# importing the necessary module
import struct

# the pack method ensures packing of data in binary format using
# a format string.
# the most important data flags in format string are: 

# ?: boolean
# h: short
# l: long
# i: int
# f: float
# q: long long int


pack_data = [True, -12, 90000, 256, -30.1573, 20102020]
pack_flags = ["?", "h", "l", "i", "f", "q"]

pack_information = zip(pack_data, pack_flags)

for current_pack_data, current_pack_flags in pack_information:
    pack_byte_array = struct.pack(current_pack_flags, current_pack_data)
    print("Current packed data {0} using packing format string {1} is {2}".format(current_pack_data, current_pack_flags, pack_byte_array))

Current packed data True using packing format string ? is b'\x01'
Current packed data -12 using packing format string h is b'\xf4\xff'
Current packed data 90000 using packing format string l is b'\x90_\x01\x00'
Current packed data 256 using packing format string i is b'\x00\x01\x00\x00'
Current packed data -30.1573 using packing format string f is b"'B\xf1\xc1"
Current packed data 20102020 using packing format string q is b'\x84\xbb2\x01\x00\x00\x00\x00'


In [3]:
# it is possible to specify multiple values of the same type when packing information
packed_integer_data = struct.pack("4h", 0, 1, 2 ,3)
print("The packed format for {0}, {1}, {2}, {3} is {4} .".format(0, 1, 2 ,3, packed_integer_data))

# alternatively the data can be specified data type by data type
packed_heterogenous_data = struct.pack("h ? l f", 0, True, -2500, 1.23675)
print("The packed format for {0}, {1}, {2}, {3} is {4} .".format(0, True, -2500, 1.23675, packed_heterogenous_data))

The packed format for 0, 1, 2, 3 is b'\x00\x00\x01\x00\x02\x00\x03\x00' .
The packed format for 0, True, -2500, 1.23675 is b'\x00\x00\x01\x00<\xf6\xff\xff\xd3M\x9e?' .


In [4]:
# packing strings would require passing on the length of the string to the format string
# and encoding the string using bytearray previous to packing
string_to_pack = "A text input for packing"
format_string = "{0}s".format(len(string_to_pack))
packed_string = struct.pack(format_string, bytearray(string_to_pack, encoding="utf-8"))
print("The packed format for {0} is {1} .".format(string_to_pack, packed_string))

The packed format for A text input for packing is b'A text input for packing' .


The struct module allows calculating the size of the packed data using the calcsize function.

In [5]:
# let's calculate the byte size of packed data using the calcsize function
pack_format_string= "20s 20s q ?"  
print("The calculated data size for {0} is {1} .".format(pack_format_string, struct.calcsize(pack_format_string)))

The calculated data size for 20s 20s q ? is 49 .


In [6]:
# the elements above allows us to pack complex data
product_instance = {
    "name": "Personal Laptop",
    "sku": "PX23332223",
    "cost_EUR": 1000.50,
    "is_available": False
}


def pack_product_data(product):
    # calculate packing parameters
    len_name = len(product["name"])
    len_sku = len(product["sku"])
    format_string = "q q q {0}s {1}s f ?".format(len_name, len_sku)
    length_data = struct.calcsize(format_string)

    # pack data using the calculated format string
    packed_data = struct.pack(
        format_string,
        # product header data containing various data lengths
        length_data,
        len_name,
        len_sku,
        # product effective data
        bytearray(product["name"], encoding="utf-8"),
        bytearray(product["sku"], encoding="utf-8"),
        product["cost_EUR"],
        product["is_available"]
    )

    return packed_data


product_instance_packed_data = pack_product_data(product_instance)

print("The packed instance data for the product \n{0} \nis \n{1}".format(product_instance, product_instance_packed_data))

The packed instance data for the product 
{'name': 'Personal Laptop', 'sku': 'PX23332223', 'cost_EUR': 1000.5, 'is_available': False} 
is 
b'9\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00Personal LaptopPX23332223\x00\x00\x00\x00 zD\x00'


In [7]:
# using complex data is possible to pack entire collections

product_instances = [
    {'name': 'Personal Laptop', 'sku': 'PX23332223', 'cost_EUR': 1000.5, 'is_available': False},
    {'name': 'Traveling Bag', 'sku': 'TBW33311209', 'cost_EUR': 20, 'is_available': True},
    {'name': 'Winter coat', 'sku': 'PCW98798324', 'cost_EUR': 100.75, 'is_available': True},     
]

product_instances_packed_data = bytearray()

for product_instance in product_instances:
    product_instances_packed_data = product_instances_packed_data + pack_product_data(product_instance)
    
print("The packed instances data for the products \n{0} \nis \n{1}".format(product_instances, product_instances_packed_data))

The packed instances data for the products 
[{'name': 'Personal Laptop', 'sku': 'PX23332223', 'cost_EUR': 1000.5, 'is_available': False}, {'name': 'Traveling Bag', 'sku': 'TBW33311209', 'cost_EUR': 20, 'is_available': True}, {'name': 'Winter coat', 'sku': 'PCW98798324', 'cost_EUR': 100.75, 'is_available': True}] 
is 
bytearray(b'9\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00Personal LaptopPX23332223\x00\x00\x00\x00 zD\x005\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00Traveling BagTBW33311209\x00\x00\xa0A\x015\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00Winter coatPCW98798324\x00\x00\x00\x80\xc9B\x01')


## Converting data from binary format
By using the same struct module, Python is able to convert the byte arrays back to typed data. 

In [8]:
# the unpack method allows to convert back data from binary format
def unpack_product_data(byte_array):
    # read the packing header
    product_info_header_size = struct.calcsize("q q q")
    data_size, name_size, sku_size = struct.unpack("q q q", byte_array[0:product_info_header_size])
    
    # calculate the product data parameters
    format_string = "{0}s {1}s f ?".format(name_size, sku_size)
    product_data_size = struct.calcsize(format_string)

    # unpack data and create the product instance
    product_byte_data = byte_array[product_info_header_size: product_info_header_size + product_data_size]
    name, sku, cost_EUR, is_available = struct.unpack(format_string, product_byte_data)
    product = {
        "name": name.decode("UTF-8"),
        "sku": sku.decode("UTF-8"),
        "cost_EUR": cost_EUR,
        "is_available": is_available
    }
    
    return data_size, product

data_size, product = unpack_product_data(product_instance_packed_data)

print("The unpacked instance data is:\n{0} \nhaving a size of \n{1} bytes.".format(product, data_size))

The unpacked instance data is:
{'name': 'Personal Laptop', 'sku': 'PX23332223', 'cost_EUR': 1000.5, 'is_available': False} 
having a size of 
57 bytes.


In [10]:
unpacked_product_instances = []
current_index = 0

while(current_index < len(product_instances_packed_data)):
    # unpack the product data, creating product instances until 
    # the data buffer is fully explored 
    data_length, unpacked_product_instance = unpack_product_data(product_instances_packed_data[current_index:])
    unpacked_product_instances.append(unpacked_product_instance)
    current_index = current_index + data_length
    
print("The unpacked product instances are: \n{0}".format(unpacked_product_instances))

The unpacked product instances are: 
[{'name': 'Personal Laptop', 'sku': 'PX23332223', 'cost_EUR': 1000.5, 'is_available': False}, {'name': 'Traveling Bag', 'sku': 'TBW33311209', 'cost_EUR': 20.0, 'is_available': True}, {'name': 'Winter coat', 'sku': 'PCW98798324', 'cost_EUR': 100.75, 'is_available': True}]
