In [3]:
import scipy
import numpy as np
from sklearn.neighbors import KernelDensity
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.cluster import estimate_bandwidth
from sklearn.cluster import MeanShift, estimate_bandwidth

import pandas as pd

from scipy import stats
from scipy.stats import beta
from math import sin
from random import randint

import matplotlib.pyplot as plt
import itertools as it

import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
init_notebook_mode(connected=True)

import collections

def recursively_default_dict():
        return collections.defaultdict(recursively_default_dict)



## Chapter III

#### References

- Griffiths, R. (2006). Coalescent lineage distributions. Advances in Applied Probability, 38(2), 405-429. 
doi:10.1239/aap/1151337077

- Hein, J., Schierup, M., & Wiuf, C. (2004). Gene genealogies, variation and evolution: a primer in coalescent theory. Oxford University Press, USA. (*)

(*) referenced as GGVE throughout.

### Age of MRCA of 2 sequences.

GGVE pp. 91.

- unfinished.

In [135]:

def tMRCA2s(t,k,theta):
    
    ft= ((1 + theta)**(k+1)) / factorial(k) 
    ft= ft * t**k 
    ft= ft * np.exp(-(1 + theta)*t)
    
    return ft

def t2MRCA_mean(k,theta):
    
    tm= (k + 1) / (theta + 1)
    
    return tm



In [136]:
from math import factorial
from scipy.stats import gamma


mut_rate= 9.5e-9
Theta= 2.04
Nt= Theta / (mut_rate * 4)

k= 3

mean_t= t2MRCA_mean(k,Theta)


X_plot= list(np.linspace(0,4,50))
Tdens= [tMRCA2s(x,k,Theta) for x in X_plot]
gamma_compare= [gamma.pdf(x, a = k+1, scale = Theta + 1) for x in X_plot]


In [137]:

fig_MRCA= [go.Scatter(
    x= X_plot,
    y= Tdens,
    mode= 'markers',
    name= 'GGVE'
)]

fig_gamm= [go.Scatter(
    x= X_plot,
    y= gamma_compare,
    mode= 'markers',
    name= 'Gamma'
)]

#fig_MRCA.extend(fig_gamm)
#fig_MRCA= [fig_MRCA, fig_gamm]


layout= go.Layout(
    title= 'density of MRCA T2 for S2= {}'.format(k),
    yaxis= dict(title= 'P'),
    xaxis= dict(title= 't, theta= {}'.format(Theta)),
    shapes= [
        # Line Vertical
        {
            'type': 'line',
            'x0': mean_t,
            'y0': 0,
            'x1': mean_t,
            'y1': 1,
            'line': {
                'color': 'rgb(30,144,255)',
                'width': 3,
            },
        }]
)


Fig= go.Figure(data= fig_MRCA, layout= layout)
iplot(Fig)

### Probability of n to k ancestors.

GGVE pp. 92


In [138]:
from scipy.special import comb

import operator as op
from functools import reduce

def ncr(n, r):
    r = min(r, n-r)
    numer = reduce(op.mul, range(n, n-r, -1), 1)
    denom = reduce(op.mul, range(1, r+1), 1)
    return numer / denom

###
from operator import mul    # or mul=lambda x,y:x*y
from fractions import Fraction

def nCk(n,k): 
  return int( reduce(mul, (Fraction(n-i, i+1) for i in range(k)), 1) )


#### Walsh 2001

def proBnk(t,n,k):
    
    pi_sum= 0
    
    for i in range(k,n+1):
        
        nbi= [n - x for x in range(i)]
        nbi= reduce((lambda x, y: x * y), nbi) 
        
        npi= [n + x for x in range(i)]
        npi= reduce((lambda x, y: x * y), npi) 
        
        if i == 1:
            kis= k
        else:
            kis= [k + x for x in range(i - 1)]
            kis= reduce((lambda x, y: x * y), kis) 
        
        ip= (2 * i - 1) * (-1)**(i-k) * kis * nbi
        
        ip= ip / (factorial(k) * factorial(i - k) * npi)
        
        ip= ip * np.exp(-1 * nCk(i,2) * t)
        
        pi_sum += ip
    
    return pi_sum



def probKGT(t,n,j,theta):
    pi_sum= 0
    
    for k in range(j,n + 1):
        
        nbi= [n - x for x in range(k)]
        nbi= reduce((lambda x, y: x * y), nbi) 
        
        npi= [n + theta + x for x in range(k)]
        npi= reduce((lambda x, y: x * y), npi) 
        
        if k == 1:
            kis= j + theta 
        else:
            kis= [j + theta + x for x in range(k - 1)]
            kis= reduce((lambda x, y: x * y), kis)
        
        ##
        pkt= np.exp((-0.5) * k*(k+theta -1) * t)
        
        up= (2 * k + theta - 1) * kis * nbi
        dn= factorial(j) * factorial(k - j) * npi
        
        pi= pkt * (-1)**(k - j)
        pi= pi / (up / dn)
        
        pi_sum += pi
    
    return pi_sum


In [139]:
from numpy import prod

n= 7
range_k= list(range(1,8))
X_plot= list(np.linspace(0,2.5,100))

fig_k = []

for k in range_k:
    
    Tk= [proBnk(x,n,k) for x in X_plot]
    #Tk= [probKGT(x,n,k,Theta) for x in X_plot]
    
    trace1= go.Scatter(
        x= X_plot,
        y= Tk,
        mode= 'lines',
        name= str(k)
    )
    
    fig_k.append(trace1)


layout= go.Layout(
    title= 'Prob of k anc. in time for n= {}, k= {}'.format(n,','.join([str(x) for x in range_k])),
    yaxis= dict(title= 'P'), #,range= [0,1]),
    xaxis= dict(title= 'time')
)


Fig= go.Figure(data= fig_k, layout= layout)
iplot(Fig)

### probablility of m ancestors at time t1 given n and k and times 0 and t respectively.

In [146]:
n= 7
k= 4
tk= 1

Ptk= proBnk(tk,n,k)

range_m= list(range(k,n + 1))
X_plot= list(np.linspace(0,tk,100))

fig_k = []

for m in range_m:
    
    Tk= [proBnk(x,n,m) * proBnk(tk - x,m,k) / Ptk for x in X_plot]
    #Tk= [probKGT(x,n,k,Theta) for x in X_plot]
    
    trace1= go.Scatter(
        x= X_plot,
        y= Tk,
        mode= 'lines',
        name= str(m)
    )
    
    fig_k.append(trace1)


layout= go.Layout(
    title= 'Prob of m anc. in time for n= {}, k= {} at times 0 and {}'.format(n,
                                                                              k,
                                                                             tk),
    yaxis= dict(title= 'P'), #,range= [0,1]),
    xaxis= dict(title= 'time')
)


Fig= go.Figure(data= fig_k, layout= layout)
iplot(Fig)