## Simulate non-cyanogenic evolution via drift alone

There is a finding that across a cline from less to more urban the frequency of a non-cyanogenic phenotype increases
The phenotype is genetically controlled by two loci, both of which have a segregating knock-out allele
If any individual is homozygous for either knockout they become non-cyanogenic

In this extremely simple simulation I create a 'population' which is represented by 2 lists of alleles (A/a and B/b)
To simulate evolution I randomly sample with replacement from the lists to create new list that represent the next generation.

By repeating this process with variable starting frequencies, population sizes and numbers of generations (functionally equivalent to steps in a strict stepping stone) we can look at the change in the frequency of cyanogenic and non-cyanogenic phenotypes

In [614]:
# Modules used throughout script
import random
from collections import OrderedDict
import csv
import time
from datetime import datetime
import os


# Randomly sample 'N' alleles from lists containing alleles for locus A (allels: A or a) and locus B (alleles: B or b). 
# Return list containing sampled alleles.
def sample_population_A(locus_A, N):
    new_locus_A = [random.choice(locus_A) for _ in range(N)]
    return new_locus_A

def sample_population_B(locus_B, N):
    new_locus_B = [random.choice(locus_B) for _ in range(N)]
    return new_locus_B

# From list containing alleles, calculate the frequency of 'A' or 'B' allele. 
def allele_freq(locus):
    p = sum(1*i.isupper() for i in locus)/float(len(locus))
    return p

# Join list containing simulation results (e.g. Population size, allele frequencies, phenotype frequency, etc.) with 
# list containing iteration number (i.e. sim)
def final_results(results,sim):
    return [a + b for a,b in zip(results,sim)]


# Given the frequencies of 'A' and 'B' alleles, return the frequency of the 'acyanogenic' phenotype (i.e. recessive
# at either the A locus, B locus, or both) 
def phenotype(pA, pB):
    qA = 1-pA
    qB = 1-pB
    mut= qA**2 + qB**2 - (qA**2 * qB**2)
    WT = 1-mut
    return mut # Frequency of acyanogenic phenotype

# Randomly samples N alleles from locus A and locus B, calculates the frequency of both alleles followed by the frequency
# of 'acyanogenesis' phenotype and adds this frequency to step_dict list. Repeats process "step" times. Note that locus_A
# and locus_B are defined in the for loop. Therefore with each generation (i.e. step), loci are sampled from those sampled
# in the previous generation. This is analogous to a stepping stone model. 
def cline(locus_A,locus_B, steps, N):
    for i in range(steps):
        #print i,
        locus_A, locus_B = (sample_population(locus_A,locus_B, N))
        pA, pB = allele_freq(locus_A), allele_freq(locus_B)
        #print pA, pB, phenotype(pA, pB)
        results.append([N,i,pA,pB,phenotype(pA,pB)])
    return results # results contains pop. size (N), step, (generation), allele frequencies and frequency of acyanogenic phenotype

        
# Using the functions defined above, 'simulate' performs 'sims' iterations of the cline function -- simulating the effects 
# of drift in a stepping stone model -- each time storing the results.
def simulate(pA, pB, steps, N, sims):
    qA = 1-pA # Frequency of 'a' allele
    qB = 1-pB
    # Make the two lists based on the allele frequency to represent the initial population
    locus_A = (['A'] * int(N*pA) ) + (['a'] * int(round(N*qA)) ) # [A,A,A,A,a,a,a,a,....]
    locus_B = (['B'] * int(N*pB) ) + (['b'] * int(round(N*qB)) ) 
    ####### sims simulations #####################
    # We will simulate 'steps' iterations of resampling this population to simulate drift
    # We will then repeat that simulation of 'steps' iterations 1000 times to get a mean
    ##############################################
    for i in range(sims):
        # reset the population for each iteration. I don't actually think this is necessary
        locus_A = (['A'] * int(N*pA) ) + (['a'] * int(round(N*qA)) ) 
        locus_B = (['B'] * int(N*pB) ) + (['b'] * int(round(N*qB)) ) 
        cline(locus_A,locus_B, steps, N) # appends results to globally defined 'results' list
        for x in range(steps):sim.append([i])


In [2]:
##############################
#### VARY POPULATION SIZE ####
##############################

# We will vary population size as follows:
# Population size (N): (Start, End, By) -- (10,100,10);(100,500,100);(500,1000,500);(1000,5000,1000);(5000,10000,5000)
# 'A' and 'B' alleles held constant at 0.5. 
# 'Steps' held constant at 50
# 'Sims' held constant at 10000
# All results exported as 'csv' files for import and analysis in R

start_time = time.time()
datestring = datetime.strftime(datetime.now(), '%Y%m%d')

N = 10 # Starting population size (i.e. sample 10 alleles)
pA = 0.5
pB = 0.5
steps = 50
sims = 10
results = [['N','step','pA','pB','Phen']] # Results stored here. Header added.
sim = [['sim']] # Iteration of cline function (i.e. 'sim') stored here. Will later be appended to 'results'
fixed = [['Up','Down']] # Proportion of simulations resulting in fixation stored here
for i in range(100):
    if N >=10 and N < 100: # When N between 10 and 100, run simulation, increment population size by 10
        simulate(pA,pB,steps,N,sims) 
        N += 10
    elif N >= 100 and N < 500:
        simulate(pA,pB,steps,N,sims)
        N += 100
    elif N >= 500 and N < 1000:
        simulate(pA,pB,steps,N,sims)
        N += 500
    elif N >= 1000 and N <= 5000:
        simulate(pA,pB,steps,N,sims)
        N += 1000
    elif N > 5000:
        # Once population size reaches 5000, generate final results dataset and write as 'csv' to datasets folder
        final = final_results(results,sim) # Store final results with 'sim' appended to the rest of 'results'
        os.chdir("/Users/jamessantangelo/Documents/Academia/Master's/SEC - Simulating evolutionary clines/Datasets/Nvary")
        with open(datestring+"_SEC_Drift_Nvary.results.csv", "wb") as f:
            writer = csv.writer(f)
            writer.writerows(final)
        break

# Change directory back to folder containing python code
os.chdir("/Users/jamessantangelo/Documents/Academia/Master's/SEC - Simulating evolutionary clines/Python code/Nvary")

print "My program took", time.time() - start_time, "seconds to run"

My program took 38609.6228969 seconds to run


In [613]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


What needs to be considered when adding spatial structure to these simulations?

1) Need a function that creates new populations with some probability that is set by us
2) This new function should occur within the 'cline' function such that every generation there is some probability that a new population is created. 
3) This should "snowball". In other words the number of populations created should increase with the total number of populations.
4) Ideally, newly created populations should start with some fraction of alleles (specified by us), randomly sampled from the population that created it. 

In [615]:
N = 10 # Starting population size (i.e. sample 10 alleles)
pA = 0.5
pB = 0.5
qA = 1 - pA
qB = 1 - pB
steps = 20
sims = 10
locus_A = (['A'] * int(N*pA) ) + (['a'] * int(round(N*qA)) ) 
locus_B = (['B'] * int(N*pB) ) + (['b'] * int(round(N*qB)) ) 

In [616]:
pops = OrderedDict({'1':[]})
alleles = OrderedDict({'1':{'A':[sample_population_A(locus_A,N)],'B':[sample_population_B(locus_B,N)]}})
pop_counter = 1
def create_population(p):
    prob_list = (['1'] * int(10*p) ) + (['0'] * int(round(10*(1-p))))
    create = [random.choice(prob_list) for _ in range(1)]
    if create[0] == '1':
        global pop_counter
        pop_counter += 1
        pops['{0}_{1}'.format(pop_counter,key)] = []
        alleles['{0}_{1}'.format(pop_counter,key)] = {'A':[],'B':[]}

In [529]:
def cline(locus_A,locus_B, steps, N, pA, pB):
        for i in range(steps):
            for Akey, Avalue in alleles.items():
                if Akey in pops.keys():
                    if 'A' in Avalue.keys():
                        Avalue['A'] = (sample_population_A(Avalue['A'], N))
                    if 'B' in Avalue.keys():
                        Avalue['B'] = (sample_population_B(Avalue['B'], N))
            for Pkey, Pvalue in pops.items():
                if Pkey in alleles.keys():
                    pA = allele_freq(alleles[Pkey].items()[0][1]) 
                    pB = allele_freq(alleles[Pkey].items()[1][1])
                    Pvalue.append([N, i, pA, pB, phenotype(pA, pB)])
        return pops

In [530]:
N = 10 # Starting population size (i.e. sample 10 alleles)
pA = 0.5
pB = 0.5
qA = 1 - pA
qB = 1 - pB
steps = 20
sims = 10
results = [['N','s','step','pA','pB','Phen']] # Results stored here. Header added.
sim = [['sim']] # Iteration of cline function (i.e. 'sim') stored here. Will later be appended to 'results'
locus_A = (['A'] * int(N*pA) ) + (['a'] * int(round(N*qA)) ) 
locus_B = (['B'] * int(N*pB) ) + (['b'] * int(round(N*qB)) ) 
cline(locus_A,locus_B,steps,N,pA,pB)

AttributeError: 'list' object has no attribute 'isupper'

In [436]:
for x in range(2):
    for key in pops.keys():
        create_population(1)
    

In [626]:
for x in range(10):
    for Akey, Avalue in alleles.items():
        if Akey in pops.keys():
            if 'A' in Avalue.keys():
                Avalue['A'] = (sample_population_A(Avalue['A'], N))
            if 'B' in Avalue.keys():
                Avalue['B'] = (sample_population_B(Avalue['B'], N))
        

In [205]:
N = 10
pA = 0.5
pB = 0.5
qA = 1 - pA
qB = 1 - pB
locus_A = (['A'] * int(N*pA) ) + (['a'] * int(round(N*qA)) ) 
locus_B = (['B'] * int(N*pB) ) + (['b'] * int(round(N*qB)) ) 
for key, value in pops.iteritems():
    locus_A, locus_B = (sample_population(locus_A,locus_B, N))
    pA, pB = allele_freq(locus_A), allele_freq(locus_B)
    value = pA, pB
    print key, value

1 (0.6, 0.5)
2 (0.5, 0.4)


In [624]:
for x in range(10):
    for Akey, value in alleles.items():
        if Akey in pops.keys():
            if 'A' in value.keys():
                value['A'] = (sample_population_A(locus_A, N))
            if 'B' in value.keys():
                value['B'] = (sample_population_B(locus_B, N))

In [627]:
alleles

OrderedDict([('1',
              {'A': ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a'],
               'B': ['b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b']})])

In [628]:
for Pkey, Pvalue in pops.items():
    if Pkey in alleles.keys():
        s1 = allele_freq(alleles[Pkey].items()[0][1]) 
        s2 = allele_freq(alleles[Pkey].items()[1][1])
        

In [629]:
s2

0.0

In [630]:
s1, s2

(0.0, 0.0)