From be0a3a050c534bb2b51fb4b8b07ba3789a6dbf7a Mon Sep 17 00:00:00 2001
From: Josh Schraiber <jgschraiber@gmail.com>
Date: Fri, 29 Sep 2017 16:35:14 -0400
Subject: [PATCH] added contamination to simulations

---
 ancient_genotypes_simulation.py          | 13 +++++-
 ancient_relationships/coverage_table.txt | 51 ------------------------
 2 files changed, 11 insertions(+), 53 deletions(-)
 delete mode 100644 ancient_relationships/coverage_table.txt

diff --git a/ancient_genotypes_simulation.py b/ancient_genotypes_simulation.py
index 42e36a5..8338d9f 100644
--- a/ancient_genotypes_simulation.py
+++ b/ancient_genotypes_simulation.py
@@ -2,7 +2,7 @@
 import scipy.stats as st
 import numpy as np
 
-def ancient_sample_mix_multiple(num_modern=1000,anc_pop = 0, anc_num = 1, anc_time=200,mix_time=300,split_time=400,f=0.0,Ne0=10000,Ne1=10000,mu=1.25e-8,length=1000,num_rep=1000, error = None, coverage=False, seed = None):
+def ancient_sample_mix_multiple(num_modern=1000,anc_pop = 0, anc_num = 1, anc_time=200,mix_time=300,split_time=400,f=0.0,Ne0=10000,Ne1=10000,mu=1.25e-8,length=1000,num_rep=1000, error = None, contamination = None, coverage=False, seed = None):
 	if mix_time > split_time:
 		print "mixture occurs more anciently than population split!"
 		return None
@@ -11,6 +11,14 @@ def ancient_sample_mix_multiple(num_modern=1000,anc_pop = 0, anc_num = 1, anc_ti
 		return None
 	if error is None:
 		error = np.zeros(anc_num)
+	if contamination is None:
+		contamination = np.zeros(anc_num)
+	if len(error) != anc_num: 
+		print "Incorrect number of error parameters"
+		return None
+	if len(contamination) != anc_num:
+		print "Incorrect number of contamination parameters"
+		return None
 	samples = [msp.Sample(population=0,time=0)]*num_modern
 	samples.extend([msp.Sample(population=anc_pop,time=anc_time)]*(2*anc_num))
 	pop_config = [msp.PopulationConfiguration(initial_size=Ne0),msp.PopulationConfiguration(initial_size=Ne1)]
@@ -37,7 +45,8 @@ def ancient_sample_mix_multiple(num_modern=1000,anc_pop = 0, anc_num = 1, anc_ti
 				reads[-1].append([None,None])
 				if coverage:
 					num_reads = st.poisson.rvs(coverage)
-					p_der = cur_GT/2.*(1-error[i])+(1-cur_GT/2.)*error[i]
+					p_der = (1-contamination[i])*(cur_GT/2.*(1-error[i])+(1-cur_GT/2.)*error[i])
+					p_der += contamination[i]*(cur_freq*(1-error[i])+(1-cur_freq)*error[i])
 					derived_reads = st.binom.rvs(num_reads, p_der)
 					reads[-1][-1] = (num_reads-derived_reads,derived_reads)
 	return np.array(freq), GT, reads
diff --git a/ancient_relationships/coverage_table.txt b/ancient_relationships/coverage_table.txt
deleted file mode 100644
index dac46d3..0000000
--- a/ancient_relationships/coverage_table.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-# coverage	num_ind	t1mean	t1sd	t2mean	t2sd
-0.5	1	0.02057	0.00660	1.16716	3.06794
-0.5	2	0.02088	0.00432	0.04897	0.01238
-0.5	3	0.02083	0.00359	0.04925	0.00820
-0.5	4	0.02139	0.00311	0.04930	0.00640
-0.5	5	0.02101	0.00306	0.04906	0.00505
-0.5	6	0.02083	0.00275	0.04940	0.00446
-0.5	7	0.02108	0.00236	0.04883	0.00403
-0.5	8	0.02084	0.00234	0.04957	0.00348
-0.5	9	0.02103	0.00206	0.04979	0.00300
-0.5	10	0.02077	0.00197	0.04961	0.00301
-1.0	1	0.02177	0.00533	0.52932	2.09196
-1.0	2	0.02139	0.00297	0.04942	0.00858
-1.0	3	0.02150	0.00309	0.04895	0.00639
-1.0	4	0.02118	0.00252	0.04933	0.00433
-1.0	5	0.02101	0.00238	0.04981	0.00348
-1.0	6	0.02104	0.00192	0.04957	0.00293
-1.0	7	0.02088	0.00176	0.04981	0.00256
-1.0	8	0.02104	0.00181	0.04940	0.00239
-1.0	9	0.02114	0.00181	0.05023	0.00942
-1.0	10	0.02081	0.00177	0.04986	0.00229
-2.0	1	0.02049	0.00494	0.05338	0.03693
-2.0	2	0.02134	0.00291	0.04856	0.00555
-2.0	3	0.02101	0.00207	0.04880	0.00475
-2.0	4	0.02103	0.00208	0.04946	0.00315
-2.0	5	0.02074	0.00173	0.04998	0.00271
-2.0	6	0.02109	0.00159	0.04962	0.00232
-2.0	7	0.02081	0.00159	0.04966	0.00242
-2.0	8	0.02091	0.00146	0.04978	0.00196
-2.0	9	0.02093	0.00147	0.04956	0.00188
-2.0	10	0.02095	0.00136	0.04955	0.00186
-4.0	1	0.02055	0.00419	0.04821	0.01086
-4.0	2	0.02099	0.00241	0.04875	0.00747
-4.0	3	0.02091	0.00197	0.04915	0.00433
-4.0	4	0.02097	0.00186	0.04956	0.00319
-4.0	5	0.02102	0.00145	0.04921	0.00315
-4.0	6	0.02089	0.00145	0.04960	0.00268
-4.0	7	0.02090	0.00156	0.04942	0.00296
-4.0	8	0.02089	0.00126	0.04956	0.00210
-4.0	9	0.02090	0.00129	0.04983	0.00171
-4.0	10	0.02073	0.00129	0.04968	0.00203
-8.0	1	0.02080	0.00326	0.04914	0.00818
-8.0	2	0.02091	0.00242	0.04857	0.00798
-8.0	3	0.02110	0.00162	0.04931	0.00417
-8.0	4	0.02076	0.00177	0.04892	0.00539
-8.0	5	0.02086	0.00139	0.04962	0.00228
-8.0	6	0.02079	0.00138	0.04934	0.00305
-8.0	7	0.02091	0.00144	0.04947	0.00289
-8.0	8	0.02089	0.00124	0.04972	0.00212
-8.0	9	0.02076	0.00126	0.04944	0.00326
-8.0	10	0.02084	0.00106	0.04955	0.00192