In [1]:
import numpy as np
import itertools as it
import os

import collections
def recursively_default_dict():
    return collections.defaultdict(recursively_default_dict)

from _plotly_future_ import v4_subplots
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
init_notebook_mode(connected=True)
    

## Demographic Input


Liu _et al._ (2018) sequenced 81 rhesus macaques (RM, _Macaca mulatta_) from 17 locations in China. The sample encompasses 5 subspecies. The authors performed demographic inference on this data and published samples of parameter estimates for the resulting models (Supplementary Table S5).

**Context** Our aim is to model the evolution of rhesus subspecies using the software SLiM. For this we have previously developped a pipeline to convert tables of model parameter estimates (branch length, branch size and split times) into SLiM recipes. - Visit [demos ABC](https://github.com/SantosJGND/SLiM/tree/master/demos_ABC).

The purpose of this notebook is to process Table S5 into a format that can then be used by `demos_ABC` to produce SLiM recipes.

### I. Reading input

In [66]:
param_filename= 'bootstrap100.result.txt'

with open(param_filename,'r') as fp:
    lines= fp.readlines()

lines= [x.strip().split('\t') for x in lines]
params_names= lines[0]
params= lines[1:]
params= np.array(params,dtype= float).T

params_dict= {
    params_names[x]: params[x] for x in range(len(params))
}


### II. Visualizing parameter samples.



In [62]:
params_dict.keys()

dict_keys(['Ntc', 'Nli', 'Nbr', 'Nla', 'Nmu', 'NA1', 'NA2', 'NA3', 'NA4', 't1', 't2', 't3', 't4', 'Mmu2la', 'Mla2mu', 'Mli2la', 'Mla2li', 'Mli2tc', 'Mtc2li', 'Mmu2li', 'Mli2mu', 'Mla2tc', 'Mtc2la', 'Mli2br', 'Mbr2li', 'MaxEstLhood', 'MaxObsLhood'])

In [67]:
## plot max_likelihood distributions
import plotly.figure_factory as ff
select= 'MaxEstLhood'
np.random.seed(1)

hist_data = [list(params_dict[select] * -1)]
group_labels = [select] # name of the dataset
fig = [go.Histogram(
    x=params_dict[select]
)]

layout= go.Layout(
    title= select
)

Figure= go.Figure(data= fig,layout=layout)

iplot(Figure)

In [74]:
## plot max_likelihood distributions
import plotly.figure_factory as ff
select= 'NA2'
np.random.seed(1)

hist_data = [list(params_dict[select] * -1)]
group_labels = [select] # name of the dataset
fig = [go.Histogram(
    x=params_dict[select]
)]

layout= go.Layout(
    title= select
)

Figure= go.Figure(data= fig,layout=layout)

iplot(Figure)

### III. Tuple tree and converting parameter names

> Tree

ABC demo requires the first line of the file to be a tuple of the demographic tree inferred. The tree must be binary. Each internal note is a tree length tuple. The first two elements are the daughter nodes, the third is the forward position in time of the node (index starting at 1). 


> Population names 

Population names in the demos table integers. Not yet tested with double integers. 

> paramater names

Converted to fit required format for [demos ABC](https://github.com/SantosJGND/SLiM/tree/master/demos_ABC).

In [75]:
#### Tree tuple
tree_tuple= '((((1,2,4),3,3),4,2),5,1)'


names_dict= {
    'tc': '1',
    'li': '2',
    'br': '3',
    'la': '4',
    'mu': '5'
}

#### Name conversion
Names= [x[1:] for x in params_dict.keys() if x[0] == "N" and x[:2] != "NA"]

Ntimes= [x for x in params_dict.keys() if x[:2] == 'NA']
Ntimes= [int(x[2:]) for x in Ntimes]

new_params= []

for idx in range(len(params_names) - 3):
    consign= params_names[idx]
    if consign[0] == 'N' and consign[1] != 'A':
        nname= 'N' + names_dict[consign[1:]]
        new_params.append(nname)
    elif consign[:2] == 'NA':
        tmz= consign[2:]
        tmz= int(tmz)
        if int(tmz) == max(Ntimes):
            new_params.append('N0')
        else:
            new_params.append('NT' + str(tmz-1))
    
    elif consign[0] == 't':
        new_params.append('T' + consign[1:])
        
    elif consign[0] == 'M':
        mig= consign[1:]
        mig= mig.split('2')
        mig= [names_dict[x] for x in mig]
        mig= '-'.join(mig)
        new_params.append('M' + mig)

        
params_dict= {
    new_params[x]: params[x] for x in range(len(new_params))
}



### III. Parameter stats

`demos_ABC` samples from the parameter posteriors by using the median and lower and upper confidence intervals. 

Liu _et al._ (2018) provide 100 bootstrap estimates of each parameter. We take the 5th and 95th elements of the sorted arrays of each parameter sample as confidence intervals and estimate the mean using numpy. 



In [80]:

from scipy import stats
CI= 0.95

demos_dir= 'demos/'
demos_file=  demos_dir + 'rhesus_liu18.txt'

lines= [tree_tuple]

for param in sorted(params_dict.keys()):
    vals= params_dict[param]
    pmean= np.mean(vals)
    pvar= np.std(vals)
    median= np.median(vals)
    
    sortvar= sorted(vals)
    lcl= sortvar[5]
    ucl= sortvar[-5]
    #lcl,ucl= stats.norm.interval(CI, loc=pmean, scale=pvar**2)
    
    if param[0] in ['T','N']:
        median,lcl,ucl= [int(x) for x in [median,lcl,ucl]]
    nline= [param[0],param[1:],median,lcl,ucl]
    
    nline= [str(x) for x in nline]
    
    lines.append('\t'.join(nline))


### IV. Write demos file

In [None]:

with open(demos_file,'w') as fp:
    fp.write('\n'.join(lines))
