In [1]:
### 1.1.6. Fibonacci with a generator

In [9]:
from typing import Generator
def fib6(n: int) -> Generator[int, None, None]:
    yield 0
    if n > 0:
        last: int = 0
        next: int = 1
        for _ in range(1, n):
            last, next = next, last + next
            yield next                

In [13]:
%time
for i in fib6(50):
    print(i)

CPU times: user 4 µs, sys: 1e+03 ns, total: 5 µs
Wall time: 8.82 µs
0
1
2
3
5
8
13
21
34
55
89
144
233
377
610
987
1597
2584
4181
6765
10946
17711
28657
46368
75025
121393
196418
317811
514229
832040
1346269
2178309
3524578
5702887
9227465
14930352
24157817
39088169
63245986
102334155
165580141
267914296
433494437
701408733
1134903170
1836311903
2971215073
4807526976
7778742049
12586269025


### 1.2. Trivial compaction

In [16]:
import sys

In [22]:
x = 1100000

In [23]:
sys.getsizeof(x)

28

In [50]:
class CompressedGene:
    def __init__(self, gene:str) -> None:
        self._compress(gene)
    def _compress(self, gene:str) -> None:
        self.bit_string: int = 1
        for nucleotide in gene.upper():
            self.bit_string <<= 2
            if nucleotide == "A":
                self.bit_string |= 0b00
            elif nucleotide == "C":
                self.bit_string |= 0b01
            elif nucleotide == "G":
                self.bit_string |= 0b10
            elif nucleotide == "T":
                self.bit_string |= 0b11
            else:
                raise ValueError(f'Invalid Nucleotide: {nucleotide}')
    def decompress(self) -> str:
        gene: str = ''
        for i in range(self.bit_string.bit_length() - 1, 2):
            bits: int = self.bit_string >> i & 0b11
            if bits == 0b00:
                gene += "A"
            elif bits == 0b01:
                gene += "C"
            elif bits == 0b10:
                gene += "G"
            elif bits == 0b11:
                gene += "T"
            else:
                raise ValueError(f'Invalid bits: {bits}')
        return gene[::-1] 
            
                 

In [52]:
original_seq = "ACTACGACGCAGATAGACAGTAGACGATA" * 100
sys.getsizeof(original_seq)

2949

In [55]:
compressed: CompressedGene = CompressedGene(original_seq)
sys.getsizeof(compressed.bit_string)

800

In [56]:
sys.getsizeof(compressed.decompress())

49

In [48]:
type(0b101)

int