In [1]:
import numpy as np
import itertools as it
import os

import collections
def recursively_default_dict():
    return collections.defaultdict(recursively_default_dict)

## Demography simulations

> adapt demographic information in written format to SLiM recipe.



## Files

Files used for this simulation.

**necessary input**

- Table of demographic information. Very sensitive to formatting. 
> [demos file](https://github.com/SantosJGND/SLiM/blob/master/demos_ABC/demos/PM2013_M4A.txt)
   
- template recipe, initialize() block.
> [template](https://github.com/SantosJGND/SLiM/blob/master/demos_ABC/Recipes/demos_mat/template_matVar.slim)

**final product**
- [Recipes/demos_mat/M4A0.slim](https://github.com/SantosJGND/SLiM/blob/master/demos_ABC/Recipes/demos_mat/M4A0.slim)


In [2]:

from tools.ABC_utilities import read_demofile, demos_to_SLiM


def demo_to_recipe(demo_file,template,batch= 'test',anc_r= '0',Nsamp= 5,sizes= 500, burnin= 5e4, recipe_dir= 'Recipes/demos_mat/'):
    
    tree, demo_data= read_demofile(demo_file)
    #print(demo_data['N'])
    pops, files= demos_to_SLiM(batch, template,tree, demo_data, anc_r= anc_r, Nsamp= Nsamp, sizes= sizes, burnin= burnin,
                                                    size_key= '\t{}.setSubpopulationSize({});\n',
                                                    mig_key= '{}.setMigrationRates(c({}), c({}));\n',
                                                    create_key= 'sim.addSubpopSplit("{}", {}, {});\n')
    
    
    return pops, files



batch= 'M4A'
recipe_dir= 'Recipes/demos_mat/'


demo_file= 'demos/PM2013_M3.txt'
template= 'Recipes/demos_mat/template_matVar.slim'

anc_r= '0'
Nsamp= 5
sizes= 500
burnin= 5e4


pops,files= demo_to_recipe(demo_file, template,batch= batch,anc_r= anc_r,Nsamp= Nsamp, recipe_dir=recipe_dir)


print(files)

[10543  1234 71162]
['Recipes/demos_mat/M4A0.slim', 'Recipes/demos_mat/M4A1.slim', 'Recipes/demos_mat/M4A2.slim', 'Recipes/demos_mat/M4A3.slim', 'Recipes/demos_mat/M4A4.slim']


### Functions of interest.

### I. Sampling.

Currently using sample_dist_beta. This function makes use of the median and confidence interval to determine the skew in the proposed distribution, uses the beta distribution to try to emulate this skew. 

In [10]:
from scipy.stats import norm

def sample_dist(nsample,median,ll_cl,up_cl,assume='norm',func= '',func_args= [3],source= 0):
    '''
    determine mean and sd from UP-CL 95%.
    sample using scipy.
    '''
    
    mean_s= (ll_cl+up_cl) / 2
    sd_s= (up_cl - mean_s) / 2
    
    t= norm.rvs(loc=mean_s,scale= sd_s,size= nsample)
    if func:
        t= [func(x,*func_args) for x in t]
    
    return t


from scipy.stats import beta



def sample_dist_beta(nsample,median,ll_cl,up_cl,blur= 8,assume='norm',func= '',func_args= [3],source= 0):
    '''
    Use beta distribution to add skew.
    '''
    
    mean_s= (ll_cl+up_cl) / 2
    window= up_cl - ll_cl
    sd_s= (window) / 2
    
    rate= (median - ll_cl) / window
    t= np.pi / 2

    a= np.sin(rate * t) * blur
    b= np.cos(rate * t) * blur
    
    t= beta.rvs(a, b, size=nsample)
    
    if not source:
        t= t * window + ll_cl
    
    if func:
        t= [func(x,*func_args) for x in t]
    
    return t


### II. Initialize and sampling blocks

The content of these functions deterlines how samples are extracted as well as the initial blocks. 

In [4]:
def ancestral_initialize(anc_name= 'p1',anc_size= 20000,return_list= True):
    anc_intro= '''
    sim.addSubpop("{}", {});
    c = sim.chromosome;
    catn("Ancestral: " + paste(c.ancestralNucleotides(format="char")[0:20],sep=""));
    catn();\n'''
    
    anc_intro= anc_intro.format(anc_name,str(anc_size))
    
    anc_intro= """1 {\n""" + anc_intro  + """}\n"""
    
    if return_list:
        anc_intro= anc_intro.split('\n')
        anc_intro= [x + '\n' for x in anc_intro]
        
    return anc_intro


def sample_block(gen= 60000,pops= ['p1','p2'],sizes= [500,500]):
    pops= ','.join(pops)
    sizes= ','.join([str(x) for x in sizes])
    
    sample_simple= """
    g = c();
    pops= c({});
    samples= c({});
    for (x in pops) 
        g= c(g, sim.subpopulations[x].sampleIndividuals(samples[x]).genomes);

    g.outputVCF(vcf_file,simplifyNucleotides=T);
    """
    
    sample_simple= sample_simple.format(pops,sizes)
    sample_simple= """{} late() """.format(gen) + """{\n""" + sample_simple
    sample_simple= sample_simple.split('\n')
    sample_simple= [x + '\n' for x in sample_simple]
    sample_simple.append( """}\n""")
    
    return sample_simple
