# Binary to DNA Conversion
We'll assume the ordering of the characters in the string implies their relative magnitude. In much the same way a decimal or binary system works, i.e. "01" or "0123456789", where 0 < 1 in a binary system.

In [71]:
import numpy as np

from baseconvert import base
from math import ceil, log
from PIL import Image

In [33]:
DNA_NUCLEOTIDES = "ATCG" # [::-1]

In [52]:
# First, we convert values from decimal to binary
# Then, we convert values from binary to quaternary
# Finally, we map the quaternary numbers to nucleotides
def numberToBase(n: int, b: int) -> str:
    if n == 0:
        return "0"
    digits = []
    while n:
        digits.append(int(n % b))
        n //= b
    return "".join(list(map(str, digits[::-1])))


def quartenaryToNucleotide(q) -> str:
    nucleotides = []
    for digit in q:
        ndx = int(digit)
        nucleotides.append(DNA_NUCLEOTIDES[ndx])
    return "".join(nucleotides)


def nucleotideToQuartenary(n) -> str:
    quartenary = []
    for digit in n:
        quartenary.append(DNA_NUCLEOTIDES.index(digit))
    return "".join(map(str, quartenary))

In [53]:
BINARY = 2
QUARTENARY = 4
DECIMAL = 10
num = 1141

toBin = numberToBase(num, BINARY)
toQuar = numberToBase(num, QUARTENARY)

In [77]:
quartenaryToNucleotide(toQuar)
num_from_dna = nucleotideToQuartenary("TGGG")
num = numberToBase(int(num), 10)
base(list(map(int, num)), 4, 10)

(1, 2, 7)

In [27]:
# Read in image file of us as binary. Each element is a number from 0-255 representing the intensity
# of the respective color channel (red, blue, green)
data = ""
with open("images/us.jpg", "rb") as dashley_img:
    data = dashley_img.read()
    
# Now, we iterate across our container and convert each decimal value to that of
base4_data = []
for d in data:
    # Converts each base 10 value to base 4 (with padded zeros where necessary)
    base4_data.append(
        numberToBase(d, QUARTENARY).zfill(4)
    )
    
# Join string together then convert to nucleotide sequence
base4_data = "".join(base4_data)
data_as_dna = quartenaryToNucleotide(base4_data)

In [93]:
# For every four characters, add that to an array 
original_data = []
for begin in range(len(data_as_dna) // 4):
    nucleotide_sequence = data_as_dna[begin : begin+4]
    nucleotide_numeric = nucleotideToQuartenary(nucleotide_sequence)
    conversion_step = base(list(map(int, nucleotide_numeric)), 4, 10)
    original_numeric = int("".join(map(str, conversion_step)))
    original_data.append(original_numeric)

In [94]:
original_data = np.asarray(original_data)

In [102]:
len(original_data)

15178

In [110]:
bytes(original_data)[0:10]

b'\xff\x00\x00\x00\x00\x00\x00\x00\xff\x00'

In [111]:
data[0:10]

b'\xff\xd8\xff\xe0\x00\x10JFIF'