-
Notifications
You must be signed in to change notification settings - Fork 0
/
Count_region_specific_SNPs.py
56 lines (48 loc) · 1.84 KB
/
Count_region_specific_SNPs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/bin/env python2.7
# Written by - Arif Mohammad Tanmoy
# This script was used to analyze the O2-antigen synthesis region of Salmonella Paratyphi A in DOI: (TO-DO).
import os
from optparse import OptionParser
def main():
usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)
parser.add_option("-s", "--snp", action="store", dest="snpall", help="All_SNP position file.", default=True)
parser.add_option("-r", "--reg", action="store", dest="region", help="Region, colon-separated.", default=True)
parser.add_option("-d", "--dir", action="store", dest="directory", help="Folder with SNP position files.", default=True)
parser.add_option("-e", "--ext", action="store", dest="file_extension", help="Extension of the position files.", default=True)
parser.add_option("-o", "--output", action="store", dest="output", help="output file", default="Genotype_specific_SNPs.list")
return parser.parse_args()
(options, args) = main()
## Open files
all_pos = open(options.snpall, 'r')
start,end = str(options.region).split(':')
folder = str(options.directory)
extension = str(options.file_extension)
output = open(options.output, 'w')
# Take all SNPs in the target region in a list
reg_loc = []
for line in open(options.snpall, 'r'):
loc = line.rstrip()
if int(loc) >= int(start) and int(loc) <= int(end):
reg_loc.append(loc)
#print reg_loc
# Generate a list of position filenames
alist = os.listdir(folder)
posfiles = []
for i in range(0, len(alist)):
if str(alist[i]).endswith(extension):
posfiles.append(folder+str(alist[i]))
#print len(posfiles)
# Now Let's count the positions
for i in range(int(start),int(end)):
n=0
for j in range(0, len(posfiles)):
for line in open(str(posfiles[j]), 'r'):
rec = line.rstrip()
if str(i) == rec:
n=n+1
else:
n=n+0
print str(i)+'\t'+str(n)
output.write(str(i)+'\t'+str(n)+'\n')
output.close()