In [2]:
import msprime, tskit
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

intro

In [20]:
sample_size = 20 #number of samples per deme

mig = 3.8866e-7
seqLength = 32e3 
recr = 1.84675e-8
Ne0 = 2.3241e6
Ne1 = 9.8922e5 
splitT = 4.8580e6
mu = 1.9e-9

population_configurations = [
    msprime.PopulationConfiguration(sample_size=sample_size, initial_size=Ne0),
    msprime.PopulationConfiguration(sample_size=sample_size, initial_size=Ne1),
    ]
    
#demographic events: specify in the order they occur backwards in time
demographic_events = [
    msprime.PopulationParametersChange(time=splitT, initial_size=Ne0, population_id=0),
    msprime.MassMigration(time=splitT, source=1, destination=0, proportion=1.0),
    ]

In [21]:
replicates = msprime.simulate(
        num_replicates = 1,
        length = seqLength, 
        recombination_rate = recr,
        population_configurations = population_configurations,
        demographic_events = demographic_events,
        migration_matrix = [[0,0],
                            [mig,0]],
        mutation_rate = mu)

In [None]:
for index, ts in enumerate(replicates):
    msprime.mutate(ts, rate=mu, keep=True)
    with open('sim{}.vcf'.format(str(index)), 'w') as vcf_file:
        ts.write_vcf(vcf_file, ploidy=2)
    ts.dump('sim{}.trees'.format(str(index)))

or run all this in in script that can be run on multiple cores
change parametes within sim_chuncks.py as desired

In [None]:
%run sim_chuncks.py

run tskit Fst scan and
analyse output using script as provided in the Fst_istogram notebook

checking the impact of recombination rate variation

hapmap is a dataframe with recombination rate estimates (cM/Mb) for our case study
We can draw a random sample of those values, run the msprime simulation again and compare this to the outcome we obtained without recombination rate variation

In [8]:
hapmap = pd.read_csv('hapmap.csv')
hapmap['relative length'] = hapmap['Mb']/hapmap['Mb'].sum()
hapmap['recombination'] = hapmap['cM/Mb']/100/(hapmap['Mb']*1e6)
hapmap.tail()

Unnamed: 0,Start,cM/Mb,cM,Mb,relative length,recombination
275,15279126,937.756243,52.836,0.056343,0.003353,0.000166437
276,15335469,178.868009,52.836,0.295391,0.017579,6.055296e-06
277,15630860,2493.313312,53.135,0.021311,0.001268,0.001169965
278,15652171,46.135385,53.135,1.151719,0.068539,4.005785e-07
279,16803890,0.0,0.0,0.0,0.0,


In [29]:
#simulate replicates with different recombination rates cfr. windows 
#specify a distribution to draw values from
#choice can be weighted by the length of each interval, not necessary when windowwise estimates are given

recomb = np.random.choice(hapmap['recombination'],size=20, p=hapmap['relative length'])

replicates = [msprime.simulate(
        length = seqLength, 
        recombination_rate = rate,
        population_configurations = population_configurations,
        demographic_events = demographic_events,
        migration_matrix = [[0,0],
                            [mig,0]],
        mutation_rate = mu) 
for rate in recomb]

In [13]:
#results for when half the chromosome is simulated in its entirety 

building a secondary contact model

In [None]:
splitT = 4.8580e6
secT = 2e3 #moment of secondary contact
proportion =0.1 #proportion of ancestral population mass migrating into the population that split of at time T

population_configurations = [
    msprime.PopulationConfiguration(sample_size=sample_size, initial_size=Ne0),
    msprime.PopulationConfiguration(sample_size=sample_size, initial_size=Ne1),
    ]
    
#demographic events: specify in the order they occur backwards in time
demographic_events = [
    msprime.PopulationParametersChange(time=splitT, initial_size=Ne0, population_id=0),
    msprime.MassMigration(time=splitT, source=1, destination=0, proportion=1.0),
    msprime.MassMigration(time=secT, source=1, destination=0, proportion=proportion)
    ]

replicates = msprime.simulate(
        num_replicates = 1,
        length = seqLength, 
        recombination_rate = recr,
        population_configurations = population_configurations,
        demographic_events = demographic_events,
        mutation_rate = mu)