public
Description: repository for the code featured in the blog
Homepage: http://python.genedrift.org
Clone URL: git://github.com/nuin/beginning-python-for-bioinformatics.git
100755 56 lines (51 sloc) 2.106 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python
 
'''
script that counts the number of each nucleotide in a sequence using
user input and saving to a file.
'''
 
import sys
import re
 
 
fileentered = True #flag that determines if a filename has been entered
 
while fileentered == True:
    #ask the user to input a filename
    filename = raw_input('Please enter a file to check: ')
    #if a filename was entered, go ...
    if len(filename) >= 1:
        try:
            #open the file
            seqlist = open(filename, 'r').readlines()
            #sequence is read as a list, convert to string
            sequence = ''.join(seqlist)
            #remove carriage returns
            sequence = sequence.replace('\n', '')
            #counting
            totalA = sequence.count('A')
            totalC = sequence.count('C')
            totalG = sequence.count('G')
            totalT = sequence.count('T')
            #create a regex object with non-nucleotide letters to check for "errors"
            otherletter = re.compile('[BDEFHIJKLMNOPQRSUVXZ]+')
            #find possible non-nucleotides
            extra = re.findall(otherletter, sequence)
            #open an output filename to output counts
            output = open(filename + '.count', 'w')
            #writing the output
            output.write('Count report for file ' + filename + '\n')
            output.write('A = ' + str(totalA) + '\n')
            output.write('C = ' + str(totalC) + '\n')
            output.write('G = ' + str(totalG) + '\n')
            output.write('T = ' + str(totalT) + '\n')
            #if there are non-nucleotides in the sequence, report them
            if len(extra) > 0:
                output.write('Also were found ' + str(len(extra)) + ' errors\n')
                for i in extra:
                    output.write(i + ' ')
            else:
                output.write('No error found')
            print 'Result file saved on ' + filename + '.count'
        except:
            print 'File not found. Please try again.'
    else:
        #if no filename entered, exit
        fileentered = False
        sys.exit()