# Python Tripos

## 1. Logistic growth model

In [9]:
p = 0.43
r = 3.1
y = 20

g = []

for i in range(y + 1):
    g.append(p)
    p = r * p * (1 - p)

for i, p in enumerate(g):
    print(f'{i:2} : {p:>.2f}')


 0 : 0.43
 1 : 0.76
 2 : 0.57
 3 : 0.76
 4 : 0.56
 5 : 0.76
 6 : 0.56
 7 : 0.76
 8 : 0.56
 9 : 0.76
10 : 0.56
11 : 0.76
12 : 0.56
13 : 0.76
14 : 0.56
15 : 0.76
16 : 0.56
17 : 0.76
18 : 0.56
19 : 0.76
20 : 0.56


## 2. FASTA to GenBank

In [None]:
# GenBank file format
'''
ORIGIN      
        1 gaagtagaag gcgtgggccg cctggtgaac cgaattgttg agtgaggaaa cagcgaaatg
       61 aaaaaagtaa atcattggat caacggcaaa aatgttgcag gtaacgacta cttcctgacc
//
'''

filename = ''

ROWLEN = 60
COLLEN = 10


from os import path

with open(filename, 'r') as f:
    seq = ''.join(line.strip() for line in f.readlines() if '>' not in line)

with open(path.splitext(filename)[0] + '.GenBank', 'w') as f:
    f.write('ORIGIN')
    
    for i in range(0, len(seq), ROWLEN):
        s = seq[i: i+ROWLEN]
        s = ' '.join(s[j:j+COLLEN] for j in range(0, len(s), COLLEN))

        f.write(f'{i + 1:>9} {s}')

    f.write('//')

## 3. TATA-Pribnow box

In [None]:
seq = ''

boxlen = 6
boxes = ('TATAAA', 'TATAAT')

for i in range(len(seq) - boxlen + 1):
    s = seq[i: i + boxlen]

    if s in boxes:
        print(f'{i:>6}\t{s}')

## 4. TATA boxes in ebola virus genome

In [1]:
filename = 'EboBund-112 2012.fasta'

boxlen = 6
boxes = ('TATAAA', 'TATAAT')

with open(filename, 'r') as f:
    seq = ''.join(line.strip() for line in f.readlines() if '>' not in line)

for i in range(len(seq) - boxlen + 1):
    s = seq[i: i + boxlen]

    if s in boxes:
        print(f'{i:>6}\t{s}')

   258	TATAAA
  1395	TATAAT
  8712	TATAAA
  9086	TATAAT
  9921	TATAAT
 11694	TATAAT
 12879	TATAAT
 13763	TATAAA
 14190	TATAAT


## 5. Palindromic restriction sites in Zika virus genome

### 5.1

In [10]:
seqf = "GCTAGTGTATGCATGAGCGTAGGCGATGTGGCGCCGAGCTGAGGTGATCACGTGATGTGCTAGTCG"
seqr = seqf.translate(str.maketrans('ACGT', 'TGCA'))

sitelen = 6

print(seqf)
print(seqr)
print()

for i in range(len(seqf) - sitelen  + 1):
    s1 = seqf[i:i+sitelen]
    s2 = seqr[i:i+sitelen]
    
    if s2[::-1] == s1:
        print(f'{s1} {s2} : {i:>2}')

GCTAGTGTATGCATGAGCGTAGGCGATGTGGCGCCGAGCTGAGGTGATCACGTGATGTGCTAGTCG
CGATCACATACGTACTCGCATCCGCTACACCGCGGCTCGACTCCACTAGTGCACTACACGATCAGC

ATGCAT TACGTA :  8
GGCGCC CCGCGG : 29
TGATCA ACTAGT : 44
CACGTG GTGCAC : 48


## 5.2

In [9]:
filename = 'zika.fasta'

sitelen = 6

with open(filename, 'r') as f:
    seqf = ''.join(line.strip() for line in f.readlines() if '>' not in line)

seqr = seqf.translate(str.maketrans('ACGT', 'TGCA'))

for i in range(len(seqf) - sitelen  + 1):
    s1 = seqf[i:i+sitelen]
    s2 = seqr[i:i+sitelen]
    
    if s2[::-1] == s1:
        print(f'{s1} {s2} : {i:>5}')

TCCGGA AGGCCT :    22
GGATCC CCTAGG :    28
TCCGGA AGGCCT :    31
AGATCT TCTAGA :   257
CCATGG GGTACC :   358
ACATGT TGTACA :   397
ACATGT TGTACA :   493
TCTAGA AGATCT :   636
TTGCAA AACGTT :   678
TGATCA ACTAGT :   730
CCCGGG GGGCCC :   762
TCATGA AGTACT :   838
GTCGAC CAGCTG :   990
ATCGAT TAGCTA :  1058
GATATC CTATAG :  1061
ACATGT TGTACA :  1212
AAGCTT TTCGAA :  1409
AGGCCT TCCGGA :  1439
AGATCT TCTAGA :  1454
GTGCAC CACGTG :  1494
TCATGA AGTACT :  1511
GCATGC CGTACG :  1532
GAATTC CTTAAG :  1587
GAGCTC CTCGAG :  1669
TCTAGA AGATCT :  1673
TGGCCA ACCGGT :  1715
AAGCTT TTCGAA :  1746
GTGCAC CACGTG :  1778
CAGCTG GTCGAC :  1810
GGCGCC CCGCGG :  2106
ACCGGT TGGCCA :  2458
AGGCCT TCCGGA :  2512
GAGCTC CTCGAG :  2589
CCATGG GGTACC :  2705
GTCGAC CAGCTG :  2772
CTTAAG GAATTC :  2874
CAGCTG GTCGAC :  2926
GGGCCC CCCGGG :  3010
ACATGT TGTACA :  3033
TGGCCA ACCGGT :  3042
CCATGG GGTACC :  3168
CATATG GTATAC :  3438
TCATGA AGTACT :  3523
TCATGA AGTACT :  3553
TGGCCA ACCGGT :  3586
AAGCTT TTC