In [None]:
#import statements
import lzma
import gzip
import bz2
import binascii

In [None]:
def bzip_compression(data):
    data_out = bz2.compress(data)
    # print(len(data) / len(data_out))
 
    return data_out


In [None]:
def gzip_compression(data):
    data_out = gzip.compress(data)
    # print(len(data) / len(data_out))

    return data_out

In [None]:
def lzma_compression(data):
    data_out = lzma.compress(data)
    # print(len(data)/len(data_out))

    return data_out


In [None]:
def bzip_decompression(compressed_data):
    return bz2.decompress(compressed_data)

In [None]:
def gzip_decompression(compressed_data):
    return gzip.decompress(compressed_data)

In [None]:
def lzma_decompression(compressed_data):
    return lzma.decompress(compressed_data)

In [None]:
def data_base64(data):
    
    string=binascii.b2a_base64(data)

    return string

In [None]:
def data_reconvert(reconvert):
    return binascii.a2b_base64(reconvert)


In [None]:
# Creating tree nodes for Huffman Coding
class NodeTree(object):

    def __init__(self, left=None, right=None):
        self.left = left
        self.right = right

    def children(self):
        return (self.left, self.right)

    def nodes(self):
        return (self.left, self.right)

    def __str__(self):
        return '%s_%s' % (self.left, self.right)


# Main function implementing huffman coding
def huffman_code_tree(node, left=True, binString=''):
    if type(node) is str:
        return {node: binString}
    (l, r) = node.children()
    d = dict()
    d.update(huffman_code_tree(l, True, binString + '0'))
    d.update(huffman_code_tree(r, False, binString + '1'))
    return d


def Huffman_Compress(string, outGraph=False):
  # Calculating frequency
  freq = {}
  for c in string:
      if c in freq:
          freq[c] += 1
      else:
          freq[c] = 1

  freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)

  nodes = freq

  while len(nodes) > 1:
      (key1, c1) = nodes[-1]
      (key2, c2) = nodes[-2]
      nodes = nodes[:-2]
      node = NodeTree(key1, key2)
      nodes.append((node, c1 + c2))

      nodes = sorted(nodes, key=lambda x: x[1], reverse=True)

  huffmanCode = huffman_code_tree(nodes[0][0])
  
  if outGraph == True:
    print(' Char | Huffman code ')
    print('----------------------')
    for (char, frequency) in freq:
        print(' %-4r |%12s' % (char, huffmanCode[char]))
  
  return huffmanCode

In [None]:
def toBinary(String, GraphKeys):
  Encoding = ""
  Sense = 'Even'
  for char in String:
    Encoding += (str(GraphKeys[char]))

  if len(Encoding)%2 != 0:
    Encoding += str(0)
    Sense = 'Odd'
  return Encoding, Sense

In [None]:
def toDNA(From_Binary_Encoding):
  DNA = ''
  (Binary_Encoding, Sense) = From_Binary_Encoding
  for char in range(0,len(Binary_Encoding),2):
    if (Binary_Encoding[char]=='0' and Binary_Encoding[char+1]=='0'):
      DNA += 'A'
    elif (Binary_Encoding[char]=='0' and Binary_Encoding[char+1]=='1'):
      DNA += 'T'
    elif (Binary_Encoding[char]=='1' and Binary_Encoding[char+1]=='0'):
      DNA += 'G'
    elif (Binary_Encoding[char]=='1' and Binary_Encoding[char+1]=='1'):
      DNA += 'C' 

  return DNA, Sense

In [None]:
!pip install cryptography
from cryptography.fernet import Fernet

def Encrypt(String):

  key = Fernet.generate_key()
  encryption_type = Fernet(key)
  String = str(encryption_type.encrypt(bytes(String, encoding='utf8')))

  return String

def Decrypt(String):
  String = encryption_type.decrypt(String)

  return String



In [None]:
def Main_Compress(String, compressAlgo = 'bzip', encrypt = False, retainCase = False, outGraph = False):
  if retainCase == False:
    String = String.lower()

  if compressAlgo == 'bzip':
    String = bytes(String, 'utf-8')
    data_compress = data_base64(bzip_compression(String))
    String = str(data_compress, 'utf-8')
  elif compressAlgo == 'gzip':
    String = bytes(String, 'utf-8')
    data_compress = data_base64(gzip_compression(String))
    String = str(data_compress, 'utf-8')
  elif compressAlgo == 'lzma':
    String = bytes(String, 'utf-8')
    data_compress = data_base64(lzma_compression(String))
    String = str(data_compress, 'utf-8')
  elif compressAlgo == 'None':
    String = String            

  if encrypt == True:
    String = Encrypt(String)
  
  return(toDNA(toBinary(String, Huffman_Compress(String, outGraph))))

In [None]:
Test_String = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi maximus nisi in nunc commodo, id malesuada odio vulputate. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Nam dapibus ac risus eu rutrum. Proin pretium eget nisl eu mattis. Nullam et tincidunt lorem, vitae vestibulum est. Nam luctus arcu a vehicula finibus. Sed cursus, est ut facilisis maximus, dolor augue dictum ipsum, quis fringilla nibh nibh a nisi. Curabitur lobortis elementum facilisis. Aliquam erat volutpat. Sed ultricies turpis sit amet dui mattis, ac aliquet purus efficitur. Nunc mi diam, interdum eu augue ut, tempus rhoncus ex. Etiam magna sapien, imperdiet et sem in, congue imperdiet mi. Sed venenatis magna purus, a faucibus neque tincidunt sed. Integer vel eleifend tellus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In hac habitasse platea dictumst. Quisque consectetur lorem vitae commodo consectetur. Mauris augue sem, convallis non ante sed, facilisis molestie nisi. Duis tempor sodales dignissim. Morbi aliquet orci non venenatis gravida. Ut eleifend eu purus eu euismod. Suspendisse interdum est eu dolor vulputate, eget rhoncus lacus placerat. Maecenas quis nulla vel tellus consequat pretium. Curabitur nibh nulla, aliquam at volutpat vitae, pretium at metus. Nunc elementum in neque eget rutrum. Etiam sit amet luctus magna. Proin pulvinar commodo libero, sed ultrices mauris mattis a. Vivamus tempus, libero non pretium laoreet, velit metus porta sapien, ac fermentum velit sapien et turpis. Sed tincidunt lacus sed magna porttitor tempor. Vestibulum in nibh at mauris tristique maximus. Ut commodo erat eu consequat luctus. Vivamus ligula libero, porttitor pharetra justo ut, sagittis luctus ante. Nulla rhoncus dictum vulputate. Cras euismod quam nec eros dapibus mollis. Nulla fermentum, nunc aliquam sollicitudin scelerisque, nulla justo elementum arcu, tempus tristique ante odio id magna. Curabitur consequat, quam eu ultrices molestie, sapien turpis viverra erat, vitae dapibus ipsum dui id enim. Sed id mauris magna. In interdum eu quam eget bibendum. Fusce sit amet urna ut mauris rutrum molestie ac in dui. Curabitur quis felis tortor. Suspendisse consectetur at diam sit amet pulvinar. Nunc eu elit eu dui euismod placerat. Fusce sodales tempor quam ac convallis. Proin sed ex libero. Donec mollis urna nec ipsum cursus, lobortis egestas risus eleifend. Integer viverra mollis nisi, vitae mollis diam pellentesque quis. Aenean sem quam, pellentesque sit amet porta nec, posuere et nunc. Duis tempor imperdiet massa, sit amet eleifend eros. Sed vulputate lectus non condimentum dapibus. Fusce scelerisque, odio sit amet eleifend varius, justo nisl mollis ipsum, id eleifend lectus justo a arcu. Aenean feugiat ex vitae libero sodales eleifend. Vestibulum nec ante sapien. Fusce eu ex tempus, imperdiet tortor nec, rhoncus dolor. Proin blandit consectetur dui et laoreet. Sed fermentum lectus sit amet facilisis ultricies. Aenean faucibus ex at tellus dignissim, ut bibendum elit pharetra. Vestibulum sit amet leo vitae risus iaculis consequat ac nec erat. Praesent ac sollicitudin lectus. Ut a fermentum nisl. Nunc in felis at tellus viverra convallis ac in dui. Pellentesque gravida, eros sed hendrerit auctor, libero urna sagittis eros, vitae imperdiet enim ex. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi maximus nisi in nunc commodo, id malesuada odio vulputate. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Nam dapibus ac risus eu rutrum. Proin pretium eget nisl eu mattis. Nullam et tincidunt lorem, vitae vestibulum est. Nam luctus arcu a vehicula finibus. Sed cursus, est ut facilisis maximus, dolor augue dictum ipsum, quis fringilla nibh nibh a nisi. Curabitur lobortis elementum facilisis. Aliquam erat volutpat. Sed ultricies turpis sit amet dui mattis, ac aliquet purus efficitur. Nunc mi diam, interdum eu augue ut, tempus rhoncus ex. Etiam magna sapien, imperdiet et sem in, congue imperdiet mi. Sed venenatis magna purus, a faucibus neque tincidunt sed. Integer vel eleifend tellus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In hac habitasse platea dictumst. Quisque consectetur lorem vitae commodo consectetur. Mauris augue sem, convallis non ante sed, facilisis molestie nisi. Duis tempor sodales dignissim. Morbi aliquet orci non venenatis gravida. Ut eleifend eu purus eu euismod. Suspendisse interdum est eu dolor vulputate, eget rhoncus lacus placerat. Maecenas quis nulla vel tellus consequat pretium. Curabitur nibh nulla, aliquam at volutpat vitae, pretium at metus. Nunc elementum in neque eget rutrum. Etiam sit amet luctus magna. Proin pulvinar commodo libero, sed ultrices mauris mattis a. Vivamus tempus, libero non pretium laoreet, velit metus porta sapien, ac fermentum velit sapien et turpis. Sed tincidunt lacus sed magna porttitor tempor. Vestibulum in nibh at mauris tristique maximus. Ut commodo erat eu consequat luctus. Vivamus ligula libero, porttitor pharetra justo ut, sagittis luctus ante. Nulla rhoncus dictum vulputate. Cras euismod quam nec eros dapibus mollis. Nulla fermentum, nunc aliquam sollicitudin scelerisque, nulla justo elementum arcu, tempus tristique ante odio id magna. Curabitur consequat, quam eu ultrices molestie, sapien turpis viverra erat, vitae dapibus ipsum dui id enim. Sed id mauris magna. In interdum eu quam eget bibendum. Fusce sit amet urna ut mauris rutrum molestie ac in dui. Curabitur quis felis tortor. Suspendisse consectetur at diam sit amet pulvinar. Nunc eu elit eu dui euismod placerat. Fusce sodales tempor quam ac convallis. Proin sed ex libero. Donec mollis urna nec ipsum cursus, lobortis egestas risus eleifend. Integer viverra mollis nisi, vitae mollis diam pellentesque quis. Aenean sem quam, pellentesque sit amet porta nec, posuere et nunc. Duis tempor imperdiet massa, sit amet eleifend eros. Sed vulputate lectus non condimentum dapibus. Fusce scelerisque, odio sit amet eleifend varius, justo nisl mollis ipsum, id eleifend lectus justo a arcu. Aenean feugiat ex vitae libero sodales eleifend. Vestibulum nec ante sapien. Fusce eu ex tempus, imperdiet tortor nec, rhoncus dolor. Proin blandit consectetur dui et laoreet. Sed fermentum lectus sit amet facilisis ultricies. Aenean faucibus ex at tellus dignissim, ut bibendum elit pharetra. Vestibulum sit amet leo vitae risus iaculis consequat ac nec erat. Praesent ac sollicitudin lectus. Ut a fermentum nisl. Nunc in felis at tellus viverra convallis ac in dui. Pellentesque gravida, eros sed hendrerit auctor, libero urna sagittis eros, vitae imperdiet enim ex."""

print(Main_Compress(Test_String))

('CCCGTTGCACCGCACTTATTTCCTCGATACATTTTATAAGGGGGGAGAACGGCGTTCAACGCCGGTTAGTTTGGAGTACAAGGCCAACAACTAACGATAACGATATGAAGACGAACACGGCCAACAACAACAACAACAACAACAACAACAACAACAACAACAATAGCAACAACAAGGTCGCTCCCCAGCAACAACAATTTTACGGACCGCAACAACAAACAGGCGCCCCCCAACAACGCCGGGACTGACGGACAACAACTCCCAACGGGCGCGCCAGTGCCCCCCAGAATGCCAATTATGTATATTATATTGAACGTATTGAGCGGCTCTTCATAAATTTCAGTAGAGTCGCACGTGGCCTCAAAGCGGCCGCATGATGCGTCCCGAAGCACACTCTCTTGGCTCAGACCTTATGCACCGTGGTGCTGAGCTCACTACACAATTACCTATGCCCGTCTATAGGGTGTCCGTTACGGTCCATGGCCGTCGTATCCCGGCGCGTGGTAGCGCTCAGGATACACATATTGCAGTACCTTACGGTATACCTTTGAGATTACCCATGACACCGCGGGCTCAGCCAAGTCCATGTGGGGCGGCTCACGTTGTTCGTAGTGACCCAACTGGGATCGGTGACGGTCAACTCGCCGTCCCAACCCCAGTCTGCTTTGTCGTCCTAGGCGCTACATAACTCTAGCTACTCGCCAGGAGACGAGCATAGTGGTCCTCCCTATTGAGTGGTCTCTAGGCTGATAACTACCGGCGCCTTTACCCAGATGAATATCCATGTGCGCTGCAATTGCCGGAAATTCGGAAGGCGCAGCTCTGCCCAACAGCACCTTTCCTATAAGGCTAGTAGTCGCACAATCTGCAGGCTAGCTGGCTGAGCAATGCGCTCTTCCAGATCACTGGTAGCTTCCTGCTAGGGAACCGCCAGTACCGCGGGGATATGCTCGAAAAAGCTGATTTGACGGTTAGCCTGACAAGGGGATCTGGTATTC

In [None]:
res = ''.join(format(ord(i), 'b') for i in Test_String)
len_res = len(str(res))/2 
 
for x in ['bzip','gzip','lzma','None']:
  for y in [True, False]:
    for z in [True, False]:
      (DNA_Codes, Sense) = Main_Compress(Test_String, x, y, z)
      l = len_res/len(DNA_Codes)
      print("Compress Algo: {} Encryption: {:5s} CaseRetain: {:5s} :: Compress Ratio = {:1.3f}".format(x,str(y),str(z),l))

Compress Algo: bzip Encryption: True  CaseRetain: True  :: Compress Ratio = 2.580
Compress Algo: bzip Encryption: True  CaseRetain: False :: Compress Ratio = 2.660
Compress Algo: bzip Encryption: False CaseRetain: True  :: Compress Ratio = 3.622
Compress Algo: bzip Encryption: False CaseRetain: False :: Compress Ratio = 3.735
Compress Algo: gzip Encryption: True  CaseRetain: True  :: Compress Ratio = 2.925
Compress Algo: gzip Encryption: True  CaseRetain: False :: Compress Ratio = 3.022
Compress Algo: gzip Encryption: False CaseRetain: True  :: Compress Ratio = 4.056
Compress Algo: gzip Encryption: False CaseRetain: False :: Compress Ratio = 4.174
Compress Algo: lzma Encryption: True  CaseRetain: True  :: Compress Ratio = 2.807
Compress Algo: lzma Encryption: True  CaseRetain: False :: Compress Ratio = 2.876
Compress Algo: lzma Encryption: False CaseRetain: True  :: Compress Ratio = 3.891
Compress Algo: lzma Encryption: False CaseRetain: False :: Compress Ratio = 3.967
Compress Algo: N