forked from xyc0813/pysim
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pysim_main.py
138 lines (130 loc) · 7.3 KB
/
pysim_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from pysim1 import *
import random
import sys
from optparse import OptionParser
import getopt
import time
import os
import ConfigParser
def main():
usage = """%prog -c ini.config
Author: Yuchao Xia
Description: simulate germline and somatic SVs.
"""
parser = OptionParser(usage)
parser.add_option("-c", "--config", dest="config", help="config file",metavar="FILE")
(opts, args) = parser.parse_args()
if opts.config is None:
parser.print_help()
else:
cf = ConfigParser.ConfigParser()
cf.read(opts.config)
SV_config_file=cf.get("pysim_settings", "SV_config_file")
ref_fasta=cf.get("pysim_settings", "ref_fasta")
dbsnp=cf.get("pysim_settings", "dbsnp")
somatic=cf.get("pysim_settings", "somatic")
germline_num=cf.getint("pysim_settings", "germline_num")
somatic_num=cf.getint("pysim_settings", "somatic_num")
db=cf.get("pysim_settings", "db")
somatic_SNP_db=cf.get("pysim_settings", "somatic_SNP_db")
hyp_rate=cf.getfloat("pysim_settings",'hyp_rate')
up_down_stream=cf.getint("pysim_settings", "up_down_stream")
snp_rate=cf.getfloat("pysim_settings",'snp_rate')
indel_prob=cf.getfloat("pysim_settings",'indel_prob')
min_indel_length=cf.getint("pysim_settings", "min_indel_length")
max_indel_length=cf.getint("pysim_settings", "max_indel_length")
sub_clone=cf.getint("pysim_settings", 'sub_clone')
out_prex=cf.get("pysim_settings", "out_prex")
chrome_can=cf.get("pysim_settings", "chrome")
germ_ratio=cf.get("pysim_settings", "germ_ratio")
sub_sub_clone=cf.getint("pysim_settings",'sub_sub_clone')
len_list=read_config(SV_config_file)
ref=reference(ref_fasta)
ref_range=compute_range(ref)
snp_dic=read_dbsnp(dbsnp,chrome_can)
#snp_dic=read_vcf(dbsnp,chrome_can)
if somatic=='Y':
ref_result=generate_normal(ref,snp_dic,germline_num,'germline_SNP_pois.txt',hyp_rate)
ref_germline=ref_result[0]
ref_germline_new=ref_germline
snp_list=ref_result[1]
germline_dic={}
output(ref_germline_new,germline_dic,out_prex+'germline.fa')
for ttttt in snp_list:
print ['snp_list_germline',len(snp_list[ttttt])]
ref_somatic_result=generate_somatic(ref_germline,snp_dic,snp_list,somatic_num,'somatic_SNP_pois.txt',db,ref_range,hyp_rate)
ref_somatic=ref_somatic_result[0]
snp_list_somatic=ref_somatic_result[1]
for ttttt in snp_list_somatic:
print ['snp_list',len(snp_list_somatic[ttttt])]
dic=generate_pois(len_list,ref,ref_range)
germline_dic={}
if float(germ_ratio)>0:
for key in dic:
tmp=random.sample(dic[key],len(dic[key])/2)
tmp_sort=sorted(tmp,key=lambda d:d[0],reverse=True)
germline_dic[key]=tmp_sort
dic_remove={}
dic_tmp=[]
for key in dic:
for line in dic[key]:
if line not in germline_dic[key]:
dic_tmp.append(line)
else:
continue
dic_remove[key]=dic_tmp
dic_tmp=[]
outfile=open(out_prex+'SV_germline.txt','w')
ref_germline=generate_fasta(ref_germline,germline_dic,snp_rate,outfile,ref_range,up_down_stream,indel_prob,min_indel_length,max_indel_length)
outfile.close()
output(ref_germline_new,germline_dic,out_prex+'germline.fa')
#else:
# output(ref_germline_new,germline_dic,out_prex+'germline.fa')
outfile=open(out_prex+'SV_somatic.txt','w')
ref_somatic=generate_fasta(ref_somatic,dic,snp_rate,outfile,ref_range,up_down_stream,indel_prob,min_indel_length,max_indel_length)
outfile.close()
output(ref_somatic,dic,out_prex+'somatic.fa')
if sub_clone>=2:
for i in range(sub_clone-1):
print('run subclone '+str(sub_clone-1))
sub_len_list=random.sample(len_list,len(len_list))
sub_dic=generate_subclone_pois(sub_len_list,ref,ref_range,germline_dic)
new_sub_dic=add_dic(sub_dic,germline_dic)
ref_somatic_sub_result=generate_somatic(ref_germline_new,snp_dic,snp_list,somatic_num,out_prex+'somatic_SNP_pois_subclone_'+str(i+1)+'.txt',db,ref_range,hyp_rate)
ref_sub_somatic=ref_somatic_sub_result[0]
ref_sub_somatic_new=ref_sub_somatic
snp_list_sub_somatic=ref_somatic_sub_result[1]
snp_list_sub_somatic_new=snp_list_sub_somatic
outfile=open(out_prex+'SV_somatic_subclone_'+str(i+1)+'.txt','w')
ref_sub_somatic=generate_fasta(ref_sub_somatic,new_sub_dic,snp_rate,outfile,ref_range,up_down_stream,indel_prob,min_indel_length,max_indel_length)
outfile.close()
output(ref_sub_somatic,new_sub_dic,out_prex+'somatic_subclone_'+str(i+1)+'.fa')
if sub_sub_clone>0:
for j in range(sub_sub_clone):
sub_len_list=random.sample(len_list,len(len_list))
sub_dic=generate_subclone_pois(sub_len_list,ref,ref_range,germline_dic)
new_sub_dic_1=add_dic(sub_dic,new_sub_dic)
ref_somatic_sub_result=generate_somatic(ref_somatic_new,snp_dic,snp_list_sub_somatic_new,somatic_num,out_prex+'somatic_SNP_pois_sub_subclone_'+str(j+1)+'.txt',db,ref_range,hyp_rate)
ref_sub_somatic_new=ref_somatic_sub_result[0]
snp_list_sub_somatic_new=ref_somatic_sub_result[1]
outfile=open(out_prex+'SV_somatic_sub_subclone_'+str(i+1)+'.txt','w')
ref_sub_somatic=generate_fasta(ref_sub_somatic_new,new_sub_dic_1,snp_rate,outfile,ref_range,up_down_stream,indel_prob,min_indel_length,max_indel_length)
outfile.close()
output(ref_sub_somatic,new_sub_dic,out_prex+'somatic_sub_subclone_'+str(i+1)+'.fa')
else:
ref_result=generate_normal(ref,snp_dic,germline_num,'germline_SNP_pois.txt',hyp_rate)
ref_germline=ref_result[0]
snp_list=ref_result[1]
dic=generate_pois(len_list,ref,ref_range)
germline_dic=dic
outfile=open('SV_germline.txt','w')
ref_germline=generate_fasta(ref_germline,germline_dic,snp_rate,outfile,ref_range,up_down_stream,indel_prob,min_indel_length,max_indel_length)
outfile.close()
output(ref_germline,germline_dic,'germline.fa')
if __name__ == "__main__":
start = time.clock()
print('simulation begins at:'+str(start))
main()
end = time.clock()
print('simulation ends at:'+str(end))
print("The function run time is : %.03f seconds" %(end-start))