In [1]:
import numpy as np
import matplotlib.pyplot as plt
from bitarray import bitarray


In [46]:
def read_raw_binary_bit(filepath, bitsize=8):
    """Reads a file and outputs binary numbers in raw format with any bitsize"""
    to_return = []
    bits = bitarray()
    with open(filepath, 'rb') as f:
        bits.fromfile(f)
    for i in range(0, len(bits), bitsize):
        to_return.append(bits[i:i+bitsize].to01())
    return to_return

def read_raw_binary_byte(filepath, bytesize=1):
    to_return = []
    with open(filepath, "rb") as f:
        data = f.read()
    for i in range(0, len(data), bytesize):
        chunk = data[i:i+bytesize]
        hex_part = " ".join(f"{b:02x}" for b in chunk)
        bin_part = " ".join(f"{b:08b}" for b in chunk)
        print(f"{i:08}  {hex_part:<29}  {bin_part}")
        to_return.append("".join(f"{b:08b}" for b in chunk))
    return to_return


In [47]:
data = read_raw_binary_bit("data/secret.jpg", bitsize=32)
print(len(data), "bitchunk total")
print(data)


18 bitchunk total
['01000001001000000111000101110101', '01101001011000110110101100100000', '01100010011100100110111101110111', '01101110001000000110011001101111', '01111000001000000110101001110101', '01101101011100000110010101100100', '00100000011011110111010101110100', '00100000011011110110011000100000', '01100010011001010110010000001010', '01000001001000000111000101110101', '01101001011000110110101100100000', '01100010011100100110111101110111', '01101110001000000110011001101111', '01111000001000000110101001110101', '01101101011100000110010101100100', '00100000011011110111010101110100', '00100000011011110110011000100000', '01100010011001010110010000001010']


In [48]:
data = read_raw_binary_byte("data/secret.jpg", bytesize=10)
print()
print(len(data), "bytechunk total")
print(data)


00000000  41 20 71 75 69 63 6b 20 62 72  01000001 00100000 01110001 01110101 01101001 01100011 01101011 00100000 01100010 01110010
00000010  6f 77 6e 20 66 6f 78 20 6a 75  01101111 01110111 01101110 00100000 01100110 01101111 01111000 00100000 01101010 01110101
00000020  6d 70 65 64 20 6f 75 74 20 6f  01101101 01110000 01100101 01100100 00100000 01101111 01110101 01110100 00100000 01101111
00000030  66 20 62 65 64 0a 41 20 71 75  01100110 00100000 01100010 01100101 01100100 00001010 01000001 00100000 01110001 01110101
00000040  69 63 6b 20 62 72 6f 77 6e 20  01101001 01100011 01101011 00100000 01100010 01110010 01101111 01110111 01101110 00100000
00000050  66 6f 78 20 6a 75 6d 70 65 64  01100110 01101111 01111000 00100000 01101010 01110101 01101101 01110000 01100101 01100100
00000060  20 6f 75 74 20 6f 66 20 62 65  00100000 01101111 01110101 01110100 00100000 01101111 01100110 00100000 01100010 01100101
00000070  64 0a                          01100100 00001010

8 bytechunk total
['010

In [None]:
fig, ax = plt.subplots(1,2, figsize=[12,5])
ax[0].plot(data)
values, counts = np.unique(data, return_counts=True)
ax[1].bar(values, counts)
ax[1].tick_params(axis='x', labelrotation=90)
plt.show()
