public
Description: repository for the code featured in the blog
Homepage: http://python.genedrift.org
Clone URL: git://github.com/nuin/beginning-python-for-bioinformatics.git
100755 41 lines (34 sloc) 1.095 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python
 
'''
a more elaborated script to generate random DNA sequences
'''
 
import random
import sys
 
def simulate_sequence(length):
    '''function the generates the simulations'''
    #list with nucleotides
    dna = ['A', 'C', 'G', 'T']
    #initializing the sequence
    sequence = ''
    #iterates over the input sequence length ...
    for i in range(length):
        #and chooses randomly the nucletides
        sequence += random.choice(dna)
    #returns simulated sequence
    return sequence
 
#first parameter is the number of sequences to generate
setsize = int(sys.argv[1])
#minimum and maximum sequence lengths
minlength = int(sys.argv[2])
maxlength = int(sys.argv[3])
 
#initializes a list to store the sequence set
sequenceset = []
for i in range(setsize):
    #generate a random integer between min and max seq lenght
    rlength = random.randint(minlength, maxlength)
    #appending to the sequence set and calling simulated sequence
    #function
    sequenceset.append(simulate_sequence(rlength))
 
#printing output
for sequence in sequenceset:
    print sequence