# Conditionals and Loops - Exercises IV

## 1. Searching for patterns

In [6]:
dna = 'GACTATACGAATGTGCTCGGTATAAGAATGAGCTATATATATGATGT'
p = 'TATAxxxATGxxxT'

for i in range(len(dna) - len(p) + 1):
    j = i + 7

    if dna[i:i+4] == 'TATA' and dna[j:j+3] == 'ATG' and dna[i+13] == 'T':
        print(i)

3
20
33


## 2. Codons in array

In [48]:
s = 'gggtgcgacgattcattgttttcggacaagtggataggcaaccactaccggtggattgtca'

l = [s[i:i+3] for i in range(0, len(s), 3)]

print(l)

['ggg', 'tgc', 'gac', 'gat', 'tca', 'ttg', 'ttt', 'tcg', 'gac', 'aag', 'tgg', 'ata', 'ggc', 'aac', 'cac', 'tac', 'cgg', 'tgg', 'att', 'gtc', 'a']


## 3. Logistic growth with while and for with range

$x_{n+1} = r \cdot x_n \cdot (1 - x_n)$

In [2]:
r = 3.73
x = 0.43

g = 0

print(f'{g:2}: {x:.2f}')

while g < 12:
    g += 1
    x = r * x * (1 - x)
    print(f'{g:2}: {x:.2f}')

 0: 0.43
 1: 0.91
 2: 0.29
 3: 0.77
 4: 0.66
 5: 0.84
 6: 0.50
 7: 0.93
 8: 0.23
 9: 0.67
10: 0.82
11: 0.54
12: 0.93


In [3]:
r = 3.73
x = 0.43

print(f'{0:2}: {x:.2f}')

for g in range(1, 13):
    x = r * x * (1 - x)
    print(f'{g:2}: {x:.2f}')

 0: 0.43
 1: 0.91
 2: 0.29
 3: 0.77
 4: 0.66
 5: 0.84
 6: 0.50
 7: 0.93
 8: 0.23
 9: 0.67
10: 0.82
11: 0.54
12: 0.93


## 4. Locating substrings - while loop

In [63]:
# Non-overlapping count
DNA_seq = 'CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA'

print('DNA sequence:', DNA_seq)

ss = 'TT'
print('Substring:', ss)

print('str.count():', DNA_seq.count(ss))

count = 0
index = 0

ssLen = len(ss)

while index < len(DNA_seq):
    if ss == DNA_seq[index: index + ssLen]:
        count += 1
        index += ssLen
    else:
        index += 1

print('Our while count:', count)

DNA sequence: CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA
Substring: TT
str.count(): 5
Our while count: 5


## 5. Locating substrings - for loop

In [74]:
# Non-overlapping count
DNA_seq = 'CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA'

print('DNA sequence:', DNA_seq)

ss = 'TT'
print('Substring:', ss)

print('str.count():', DNA_seq.count(ss))

count = 0
offset = 0

ssLen = len(ss)

for index in range(len(DNA_seq) - ssLen + 1):
    if index >= offset and ss == DNA_seq[index: (t:=index + ssLen)]:
        offset = t
        count += 1

print('Our for count:', count)

DNA sequence: CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA
Substring: TT
str.count(): 5
Our for count: 5


## 6. Timing

In [16]:
from time import process_time

for N in [10 ** i for i in range(4, 7)]:
    DNA_seq = 'CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA' * N
    #print('DNA sequence:', DNA_seq)
     
    bp = 'T'
    #print('Base pair:', bp)
    
    t1 = process_time()
    #print('str.count():', DNA_seq.count(bp))
    sc = DNA_seq.count(bp)
    t1 = process_time() - t1
    
    count = 0
    index = 0
     
    t2 = process_time()
    while index < len(DNA_seq):
        if bp == DNA_seq[index]:
            count += 1
        index += 1
    t2 = process_time() - t2
     
    #print('Our while count:', count)

    print(f'N: {N}')
    print(f'str.count time:   {t1}')
    print(f'while count time: {t2}')
    
    if t1 > 0:
        print(f'Ratio:            {t2/t1}')
    
    print()

N: 10000
str.count time:   0.0
while count time: 0.140625

N: 100000
str.count time:   0.0
while count time: 1.25

N: 1000000
str.count time:   0.046875
while count time: 12.359375
Ratio:            263.6666666666667



## 7. All codons

In [18]:
b = ('A', 'C', 'G', 'T')

for i in b:
    for j in b:
        for k in b:
            print(f'{i}{j}{k}')

AAA
AAC
AAG
AAT
ACA
ACC
ACG
ACT
AGA
AGC
AGG
AGT
ATA
ATC
ATG
ATT
CAA
CAC
CAG
CAT
CCA
CCC
CCG
CCT
CGA
CGC
CGG
CGT
CTA
CTC
CTG
CTT
GAA
GAC
GAG
GAT
GCA
GCC
GCG
GCT
GGA
GGC
GGG
GGT
GTA
GTC
GTG
GTT
TAA
TAC
TAG
TAT
TCA
TCC
TCG
TCT
TGA
TGC
TGG
TGT
TTA
TTC
TTG
TTT


## 8. Locating cutting sites of restriction enzymes

In [20]:
DNAseq = "GGCGATGCTAGTCGCGTAGTCTAAGCTGTCGAGAATTCGGATGTCATGA"

restriction_enzymes = {"EcoRI" : "GAATTC", 
                        "AluI" : "AGCT", 
                        "NotI" : "GCGGCCGC", 
                        "TaqI" : "TCGA"
                      }

for k, v in restriction_enzymes.items():
    i = DNAseq.find(v)

    if i > -1:
        print(f'{k} :\t{v}\tIndex: {i}')

EcoRI :	GAATTC	Index: 32
AluI :	AGCT	Index: 23
TaqI :	TCGA	Index: 28


## 9. Generating random DNA sequences

In [85]:
bases = ('A', 'C', 'G', 'T')

from random import randint

l = 1001

DNA = []

for _ in range(l):
    DNA.append(bases[randint(0, 3)])

print(''.join(DNA))
print()
print(f'A: {DNA.count("A") / l:.2%}')
print(f'C: {DNA.count("C") / l:.2%}')
print(f'G: {DNA.count("G") / l:.2%}')
print(f'T: {DNA.count("T") / l:.2%}')

CCTGAACGCCCCTTAGAAATAGCGATCCCAGATGACAGAGTAACGGGAGCGACGTCTACAGTTCTTCTGGCCACAAGCCGTCCTCGCCTGGATTCTAGACGCACCAGGTTCTTCTCCTAGCCGGTCCAATCCTAACCAAGTTGGAGAGGGATGCGAGGCCTGCGAAGTCGTCCACTCGACCGGCTCAGAAGAGGGTATGGCTGGAGACCGTAAGCACAAATGGCGGACCTGCTAAAACGGCCAACTTCCTGACAACATGAAGAAGGAATCATACCTAAAGCTAAGTGAAAAACATTATGGTCATGTTGGACAATTAGTATACTTCAGTGCGGTAATAGTTTTTCGTCATGGACCATAAAGCACGCCTGGGCTTATACACAAGTTTCCCTAAAAGAATTAAATGACTTACTGTATCACTTAGCGCTAACCTGAGTTTCCCGCGTGGGTCCGGCATCGTGGATCCCGACATGCATAGCGTTGCTGCTGTGAAGACGTGCGCTAACCTATCCATGTAACTTGAACTTGGTTCCACACTTGGAAACTCCGGGCCGCTGTGCAACCTGTCCCGACATTAATTCATAGCGAGGCATTTCGGCGTATGTGCACACAAGGTCAGTTGCAGGGTCCGGGCTTAAGGGAGGTTATTAAGGAGCGCTACCGACTCGGGGTTGGGTGTGTCGGCGAAGTGTCACACATTCCTTTGCGTTGCAAGTAGGACGGTCGCCCTACAACTAACATATCTGCAAGCTGCAGGTCGCTACACTGCTAGACGACAACCGGAGGGGCAATGCATAAAAGAGCACATTAGACGGCTCGAGTACCGTGTTCCGATTTGTAGTCCCGTACCCTAAGGGTCCTGTACCTCTTCCTGGTCCCTTTGGACACGACGAAAGGACGTTCTACTCAGCCGCATTTACTACTGACACGGCAAGGGGTGATCACGCGATGCCCGCTCTAGCGCCGCAGTGGTGCGTTATGTATGAGCCCCCAGAATCCTGCT

## 10. Counting k-mers

In [1]:
DNA_seq = 'gggtgcgacgattcattgttttcggacaagtggataggcaaccactaccggtggattgtc'

from collections import Counter

k = 4

a = [DNA_seq[i - k: i] for i in range(k, len(DNA_seq) + 1)]

counts = Counter(a)

for k, v in counts.items():
    print(f'{k} :  {v}')

gggt :  1
ggtg :  2
gtgc :  1
tgcg :  1
gcga :  1
cgac :  1
gacg :  1
acga :  1
cgat :  1
gatt :  2
attc :  1
ttca :  1
tcat :  1
catt :  1
attg :  2
ttgt :  2
tgtt :  1
gttt :  1
tttt :  1
tttc :  1
ttcg :  1
tcgg :  1
cgga :  1
ggac :  1
gaca :  1
acaa :  1
caag :  1
aagt :  1
agtg :  1
gtgg :  2
tgga :  2
ggat :  2
gata :  1
atag :  1
tagg :  1
aggc :  1
ggca :  1
gcaa :  1
caac :  1
aacc :  1
acca :  1
ccac :  1
cact :  1
acta :  1
ctac :  1
tacc :  1
accg :  1
ccgg :  1
cggt :  1
tgtc :  1


## 11. Generating random human DNA sequences

In [5]:
from random import choices

dnaLen = 50

DNA = ''.join(choices('ACGT', weights=(29, 21, 21, 29), k=dnaLen))

print(DNA)

GATTTTAGAAGGTTGACTGATAGGGTCATCAAAAACAGCGATTCCTGGCA


## 12. Random mutations

In [23]:
DNAseq = "GGCGATGCTAGTCGCGTAGTCTAAGCTGTCGAGAATTCGGATGTCATGA"

m = ('A', 'C', 'G', 'T')

from random import randint, choice

mDNA = [*DNAseq]
dlen = len(mDNA) - 1

# Replace
c = choice(m)
i = randint(0, dlen)
print(f'{c} replaced {mDNA[i]} at location {i}')
mDNA[i] = c

# Insert
c = choice(m)
i = randint(0, dlen)
print(f'{c} inserted at location {i}')
mDNA.insert(i, c)

# Delete
i = randint(0, dlen)
c = mDNA.pop(i)
print(f'{c} deleted at location {i}')

print()

print(DNAseq)
print(''.join([str(i % 10) for i in range(dlen + 1)]))
print(''.join(mDNA))

A replaced A at location 22
C inserted at location 14
T deleted at location 42

GGCGATGCTAGTCGCGTAGTCTAAGCTGTCGAGAATTCGGATGTCATGA
0123456789012345678901234567890123456789012345678
GGCGATGCTAGTCGCCGTAGTCTAAGCTGTCGAGAATTCGGAGTCATGA
