MesserLab · npb596 · Jun 13, 2024 · Jun 13, 2024 · Jun 24, 2024 · Jul 2, 2024
diff --git a/functions/calcPiTheta.txt b/functions/calcPiTheta.txt
@@ -0,0 +1,60 @@
+// Written by Nick Bailey, submitted on 2024-06-13
+// Currently affiliated with CNRS and University of Lyon 1
+// This code is provided in the public domain free of restriction, please credit SLiM-Extras (https://github.com/MesserLab/SLiM-Extras) and myself, Nick Bailey, if you use it
+
+// calculate population genetic statistic pi (designated PiTheta to differentiate from mathematical constant pi)
+// calculations originally in Nei and Li 1979 and Tajima 1983
+function (float)calcPiTheta(object<Genome> genomes, [No<Mutation> muts = NULL], [Ni$ start = NULL], [Ni$ end = NULL])
+// conduct various checks for proper usage copied straight from calcWattersonTheta()
+{ if (genomes.length() == 0)
+		stop("ERROR (calcPiTheta()): genomes must be non-empty.");
+	if (community.allSpecies.length() > 1)
+	{
+		species = unique(genomes.individual.subpopulation.species, preserveOrder=F);
+		if (species.length() != 1)
+			stop("ERROR (calcPiTheta()): genomes must all belong to the same species.");
+		if (!isNULL(muts))
+			if (!all(muts.mutationType.species == species))
+				stop("ERROR (calcPiTheta()): muts must all belong to the same species as genomes.");
+	}
+	else
+	{
+		species = community.allSpecies;
+	}
+
+	if (isNULL(muts))
+		muts = species.mutations;
+
+	// handle windowing
+	if (!isNULL(start) & !isNULL(end))
+	{
+		if (start > end)
+			stop("ERROR (calcPiTheta()): start must be less than or equal to end.");
+		mpos = muts.position;
+		muts = muts[(mpos >= start) & (mpos <= end)];
+		length = end - start + 1;
+	}
+	else if (!isNULL(start) | !isNULL(end))
+	{
+		stop("ERROR (calcPiTheta()): start and end must both be NULL or both be non-NULL.");
+	}
+
+	// narrow down to the mutations that are actually present in the genomes and aren't fixed
+	p = genomes.mutationFrequenciesInGenomes(muts);
+	muts = muts[(p != 0.0) & (p != 1.0)];
+
+	// do the calculation
+	// multiply vector of mutation frequencies by integer count of sequences to get vector of counts of variant sequences
+	varCount = genomes.mutationFrequenciesInGenomes(species.mutations) * size(genomes);
+	// total count of sequences subtracted by count of variant sequences equals count of invariant sequences
+	invarCount = size(genomes) - varCount;
+	// count of pairwise differences per site (assuming a biallelic site) is the product of counts of both alleles, this is then summed for all sites 
+	diffs = sum(varCount * invarCount);
+	// pi is the ratio of pairwise differences to number of possible combinations of the given sequences
+	// the latter is calculated by a standard formula defined in a function above (not by default in SLiM)
+	pi = sum(varCount * invarCount) / combinationTwo(genomes.size());
+	// pi is conventionally averaged per site and this is consistent with SLiM's calculation of Watterson's theta
+	// sequences start at 0, so sequence length is final position plus 1
+	avg_pi = pi / (species.chromosome.lastPosition + 1);
+	return avg_pi;
+}
diff --git a/functions/calcTajimaD.txt b/functions/calcTajimaD.txt
@@ -0,0 +1,65 @@
+// Written by Nick Bailey, submitted on 2024-06-13
+// Currently affiliated with CNRS and University of Lyon 1
+// This code is provided in the public domain free of restriction, please credit SLiM-Extras (https://github.com/MesserLab/SLiM-Extras) and myself, Nick Bailey, if you use it
+
+// calculate neutrality test based on the site frequency spectrum, Tajima's D
+// calculations originally in Tajima 1989
+function (float)calcTajimaD(object<Genome> genomes, [No<Mutation> muts = NULL], [Ni$ start = NULL], [Ni$ end = NULL])
+// conduct various checks for proper usage copied straight from calcWattersonTheta()
+{ if (genomes.length() == 0)
+		stop("ERROR (calcTajimaD()): genomes must be non-empty.");
+	if (community.allSpecies.length() > 1)
+	{
+		species = unique(genomes.individual.subpopulation.species, preserveOrder=F);
+		if (species.length() != 1)
+			stop("ERROR (calcTajimaD()): genomes must all belong to the same species.");
+		if (!isNULL(muts))
+			if (!all(muts.mutationType.species == species))
+				stop("ERROR (calcTajimaD()): muts must all belong to the same species as genomes.");
+	}
+	else
+	{
+		species = community.allSpecies;
+	}
+
+	if (isNULL(muts))
+		muts = species.mutations;
+
+	// handle windowing
+	if (!isNULL(start) & !isNULL(end))
+	{
+		if (start > end)
+			stop("ERROR (calcTajimaD()): start must be less than or equal to end.");
+		mpos = muts.position;
+		muts = muts[(mpos >= start) & (mpos <= end)];
+		length = end - start + 1;
+	}
+	else if (!isNULL(start) | !isNULL(end))
+	{
+		stop("ERROR (calcTajimaD()): start and end must both be NULL or both be non-NULL.");
+	}
+
+	// narrow down to the mutations that are actually present in the genomes and aren't fixed
+	p = genomes.mutationFrequenciesInGenomes(muts);
+	muts = muts[(p != 0.0) & (p != 1.0)];
+
+	// do the calculation
+	// Pi and Watterson's theta functions divide by sequence length so this must be undone in Tajima's D
+	// Sequence length is constant (i.e. no missing data or indels) so this can be applied equally over both metrics
+	diff = (calcPiTheta(genomes) - calcWattersonsTheta(genomes)) * (species.chromosome.lastPosition + 1);
+	// calculate standard deviation of covariance of pi and Watterson's theta
+	k = size(muts);
+	n = genomes.size();
+	a_1 = sum(1 / 1:(n - 1));
+	a_2 = sum(1 / 1:(n - 1 ^ 2));
+	b_1 = (n + 1) / (3 * (n - 1));
+	b_2 = 2 * (n ^ 2 + n + 3) / (9 * n * (n - 1));
+	c_1 = b_1 - 1 / a_1;
+	c_2 = b_2 - (n + 2) / (a_1 * n) + a_2 / a_1 ^ 2;
+	e_1 = c_1 / a_1;
+	e_2 = c_2 / (a_1 ^ 2 + a_2);
+	covar = e_1 * k + e_2 * k * (k - 1);
+	stdev = sqrt(covar);
+	tajima_d = diff / stdev;
+	return tajima_d;
+}
diff --git a/functions/combinationTwo.txt b/functions/combinationTwo.txt
@@ -0,0 +1,9 @@
+// Written by Nick Bailey, submitted on 2024-06-13
+// Currently affiliated with CNRS and University of Lyon 1
+// This code is provided in the public domain free of restriction, please credit SLiM-Extras (https://github.com/MesserLab/SLiM-Extras) and myself, Nick Bailey, if you use it
+
+// calculate simple combination equation with only two possible choices in a given selection
+function (integer)combinationTwo(integer x)
+{
+	return asInteger((x * (x - 1)) / 2);
+}