<a href="https://colab.research.google.com/github/remjw/data/blob/master/binary-file-example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Text File versus Binary File

## Numerical Values

In [None]:
# a txt file holds readable characters encoded in ascii or unicode
data = [15, 24, 255]

with open('data.txt', 'w') as f:
  f.write(str(data))

with open('data.txt', 'r') as f:
  print(f.read())

[15, 24, 255]


In [None]:
# a binary file stores content in bytes (8 binary bits / 2 hexadecimal bits)

data = [15, 24, 255]

# convert to bytes
buffer = bytes(data)
print(buffer)

# binary write
with open("binary.txt", "bw") as f:
  f.write(buffer)

# read binary
with open('binary.txt', 'rb') as f:
  content = f.read()

print(type(content), content)

b'\x0f\x18\xff'
<class 'bytes'> b'\x0f\x18\xff'


## Number Bases

* base-2 (binary) 0 - 1

* base-10 (decimal) 0 - 9

* base-16 (hexadecimal): 0 - 9, a - f to denote 10-15

\x0f = $15\times16^0$ = 15

\x18 = $1\times16^1 + 8\times16^0$ = 24

\xff = $15\times16^1 + 15\times16^0$ = 255

## bytes to hex to decimal

In [31]:
to_hex = b'\x0f'.hex()
print(to_hex)

to_decimal = int(b'\x0f'.hex(), 16)
print(to_decimal)


0f
15


# Characters

In [39]:
# char to ascii
ord('a'), ord('A'), ord('\n'), ord(' ')

(97, 65, 10, 32)

In [71]:
# ascii to char
chr(97), chr(65), chr(10), chr(32)

('a', 'A', '\n', ' ')

In [68]:
# ascii in byte
bin(ord('a')), bin(ord('A')), bin(ord('\n')), bin(ord(' '))

('0b1100001', '0b1000001', '0b1010', '0b100000')

In [69]:
# convert byte to a string of 0s and 1s
bin(ord('a'))[2:].zfill(8)

'01100001'

In [73]:
#  chars to byte string
def chars_to_binary(chars):
  return ''.join([ bin(ord(c))[2:].zfill(8) for c in chars])

# Take a string of 1s and 0s as ascii codes. 
# Decode to chars
def binary_to_chars(bits):
  # recursion
  if len(bits) % 8 != 0 : 
    return None
  elif len(bits) == 8:
    return chr(int(bits, 2))
  else:
    first_char = binary_to_chars(bits[0:8])
    rest = binary_to_chars(bits[8:])
    return ''.join([first_char, rest])

In [72]:
# Test run 

raw = "abc"
# to bytes
binary_string = chars_to_binary(raw)
print(type(binary_string), binary_string)

# write
with open("binary.txt", "w") as f:
  f.write(binary_string)

# read
with open('binary.txt', 'r') as f:
  content = f.read()

print(type(content), content)

binary_to_chars(content)

<class 'str'> 011000010110001001100011
<class 'str'> 011000010110001001100011


'abc'