In [2]:
import sys
import argparse
import numpy as np
from scipy.stats import chi2_contingency
from itertools import product
import itertools as it

import allel
import pandas as pd

from _plotly_future_ import v4_subplots
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
init_notebook_mode(connected=True)
    
from datetime import datetime
import tempfile
import os
import gzip
import subprocess
import time

import collections
def recursively_default_dict():
    return collections.defaultdict(recursively_default_dict)

import matplotlib
matplotlib.use('Agg')

import matplotlib.pyplot as plt

In [3]:
from scipy.stats import invgamma
from scipy.stats import beta

In [4]:
from sklearn.metrics import pairwise_distances

## GGVE p. 62 eq. 2.38
def PI_Taj(dataM):
    
    n= dataM.shape[0]
    pairDiff= pairwise_distances(dataM, metric='manhattan')
    mu= np.tril_indices(n)
    pair_Diff= sum(pairDiff[mu])
    
    pi= (2 / n / (n-1)) * pair_Diff
    
    return pi

## GGVE p. 62 eq. 2.40
def Watt_est(dataM):
    
    Sn= np.sum(dataM,axis= 0)
    Sn= [x for x in Sn if x >= 0]
    Sn= len(Sn)
    
    An= [1 / j for j in range(1,dataM.shape[0])]
    An= sum(An)
    
    Tw= Sn / An
    
    return Tw

## GGVE p. 62 eq. 2.42
def TajD(dataM):
    
    n= dataM.shape[0]
    
    Pit= PI_Taj(dataM)
    Watt= Watt_est(dataM)
    
    ##
    Sn= np.sum(dataM,axis= 0)
    Sn= [x for x in Sn if x >= 0]
    Sn= len(Sn)
    
    An= [1 / j for j in range(1,n)]
    An= sum(An)
    
    Bn= [1 / j**2 for j in range(1,n)]
    Bn= sum(Bn)
    
    E1= (n + 1) / (3 * An * (n-1)) - 1 / An**2
    E2= 2 * (n**2 + n + 3) / 9*n / (n-1)
    E2= E2 - (n+2)/ n / An + Bn / An**2
    
    E2= E2 / (An**2 + Bn)
    
    ##
    denom= E1 * Sn + E2 * Sn * (Sn - 1)
    denom= np.sqrt(denom)
    
    D= Pit - Watt
    D= D / denom
    
    return D

In [135]:
def set_SSD(set1,set2):
    '''
    return sum of squared differences between every pair of vectors across two sets.
    '''
    dists= []
    
    for indian in set1:
        
        dist_vec= [(x - indian) for x in set2] #/ np.sum(indian + x)
        dist_vec= [z**2 for z in dist_vec]
        dist_vec= [np.sum(x) for x in dist_vec]
        dists.extend(dist_vec)
    
    return dists



In [172]:
expected_freq_counts

[array([0.22226147, 0.11113074, 0.07408716, 0.05556537, 0.04445229,
        0.03704358, 0.03175164, 0.02778268, 0.02469572, 0.02222615,
        0.02020559, 0.01852179, 0.01709704, 0.01587582, 0.01481743,
        0.01389134, 0.0130742 , 0.01234786, 0.01169797, 0.01111307,
        0.01058388, 0.01010279, 0.00966354, 0.00926089, 0.00889046,
        0.00854852, 0.00823191, 0.00793791, 0.00766419, 0.00740872,
        0.00716972, 0.00694567, 0.0067352 , 0.0065371 , 0.00635033,
        0.00617393, 0.00600707, 0.00584899, 0.00569901, 0.00555654,
        0.00542101, 0.00529194, 0.00516887, 0.0050514 , 0.00493914,
        0.00483177, 0.00472897, 0.00463045, 0.00453595, 0.00444523])]

In [263]:
N_replicates= 5
L= 120
Sizes= 50
labels= np.repeat(np.array([x for x in range(N_replicates)]),Sizes)

data= []
range_a= [0.01,4]
range_b= [0.01,6]

Nsteps= 80

#######
#######
mu= 1e-8
Ne= 20000
Theta= 4 * Ne * mu
Nsamp= Sizes

freq_exp= [Theta / x for x in range(1,Nsamp+1)]
freq_exp= np.array(freq_exp) / np.sum(freq_exp)
#expected_freq_counts= [freq_exp]
freqs_possible= [x / (2*Nsamp) for x in range(1,Nsamp+1)]

expected_freq_clade= [np.random.choice(freqs_possible,Sizes,p=freq_exp) for x in range(10)]
expected_freq_counts= []

for freqs in expected_freq_clade:
    
    bin_count,bin_middle= np.histogram(freqs,bins= Sizes,range= [0,1])
    bin_count= bin_count / sum(bin_count)
    bin_middle= [(bin_middle[x] + bin_middle[x-1]) / 2 for x in range(1,len(bin_middle))]
    bin_count= np.array(bin_count)
    expected_freq_counts.append(bin_count)


def freq_exp_func(x):
    
    e= set_SSD([x],expected_freq_counts)
    e= np.mean(e)
    
    return e


#######

avector= []
bvector= []


data_dicts= {
    "Pi":{
        'mean': [],
       'std': [],
        'func': PI_Taj
   },
    "Wat": {
        'mean': [],
       'std': [],
        'func': Watt_est
   },
    "Taj": {
        'mean': [],
       'std': [],
        "func": TajD
   },
    'exp': {
        'mean': [],
       'std': [],
        "func": freq_exp_func
    }
}


for a in  np.linspace(range_a[0],range_a[1],Nsteps):
    for b in np.linspace(range_b[0],range_b[1],Nsteps):
        
        data= []
        freq_bins= []
        for k in range(N_replicates):

            probs= beta.rvs(a, b, size=L)
            probs[(probs > 1)]= 1
            
            Haps= [np.random.choice([1,0],p= [1-probs[x],probs[x]],size= (1,Sizes))[0] for x in range(L)] 
            
            Haps= np.array(Haps).T
            
            bin_count,bin_middle= np.histogram(probs,bins= Sizes,range= [0,1])
            bin_count= bin_count / sum(bin_count)
            
            freq_bins.append(bin_count)
            data.append(Haps)
        
        
        for dicto in data_dicts.keys():
            opal= list(data)
            if dicto== 'exp':
                opal = list(freq_bins)
                
            func_here= data_dicts[dicto]['func']
            
            stats= [func_here(x) for x in opal]
            data_dicts[dicto]['mean'].append(np.mean(stats))
            data_dicts[dicto]['std'].append(np.std(stats))
        
        avector.append(a)
        bvector.append(b)



In [266]:
Stat= 'exp'

mean_stat= data_dicts[Stat]['mean']
which_0= [x for x in range(len(avector)) if abs(data_dicts['Taj']['mean'][x]) <= 0.005]

fig= [go.Scatter(
    x= avector,
    y= bvector,
    mode= 'markers',
    marker= {
        'color': np.array(mean_stat),
        "colorbar": dict(
            title="Colorbar"
        ),
        'colorscale': 'Inferno',
        'cmax': 0.15,
        'cmin': 0.002,
        'line': {'width': 0},
        'size': 7,
        'symbol': 'circle',
        "opacity": 1
    }
)]

############ 

fig_line= go.Scatter(
    x= [avector[x] for x in which_0],
    y= [bvector[x] for x in which_0],
    mode= 'markers',
    marker= {
        'color': "red"
    },
    name= 'neutral'
    
)

############
lina=np.linspace(range_a[0],range_a[1],Nsteps)
rate= 3.9
rate= 1 / rate


fig_approx= [go.Scatter(
    x= [lina[x] for x in range(len(lina)) if rate * lina[x] < max(bvector)],
    y= [rate * lina[x] for x in range(len(lina)) if rate * lina[x] < max(bvector)],
    mode= 'markers',
    marker= {
        'color': "black"
    },
    name= 'neutral'   
)]



fig.append(fig_line)
#fig.extend(fig_approx)

########################
########################

layout= go.Layout(
    title= '{} - Beta params a, b. n: {}, m: {}'.format(Stat, L, Sizes),
    xaxis= dict(
        title= 'parameter a'
    ),
    yaxis= dict(
        title= 'parameter b'
    ),
    height= 700,
    width= 700
)


Figure= go.Figure(data= fig,layout= layout)

iplot(Figure)

############
#############

fig= [go.Scatter(
    x= data_dicts['Taj']['mean'],
    y= data_dicts['exp']['mean'],
    mode= 'markers'
)]

fig= [go.Scatter(
    y= np.array(avector) / np.array(bvector),
    x= data_dicts['exp']['mean'],
    mode= 'markers'
)]


layout= go.Layout(
    title= '{} - a / b. n: {}, m: {}'.format(Stat, L, Sizes),
    xaxis= dict(
        title= 'Tajimas D'
    ),
    yaxis= dict(
        title= 'Norm Euc dist to Neutral'
    ),
    height= 700,
    width= 700
)

Figure= go.Figure(data= fig,layout= layout)

iplot(Figure)


In [274]:

fig= [go.Scatter(
    x= avector,
    y= bvector,
    mode= 'markers',
    marker= {
        'color': np.log(data_dicts[Stat]['std']),
        "colorbar": dict(
            title="Colorbar"
        ),
        'colorscale': 'Viridis',
            'line': {'width': 0},
            'size': 5,
            'symbol': 'circle',
        "opacity": 1
    }
)]



layout= go.Layout(
    title= '{} variance - Beta params a, b. n: {}, m: {}'.format(Stat, L, Sizes),
    xaxis= dict(
        title= 'parameter a'
    ),
    yaxis= dict(
        title= 'parameter b'
    ),
    height= 700,
    width= 700
)

Figure= go.Figure(data= fig,layout= layout)

iplot(Figure)

fig= [go.Scatter(
    x= mean_stat,
    y= data_dicts['Pi']['mean'],
    mode= 'markers'
)]



layout= go.Layout(
    title= '{} - a / b. n: {}, m: {}'.format(Stat, L, Sizes),
    xaxis= dict(
        title= Stat
    ),
    yaxis= dict(
        title= 'dist to e'
    ),
    height= 700,
    width= 700
)

Figure= go.Figure(data= fig,layout= layout)

iplot(Figure)

In [273]:

rate= 0.07

a= 0.4
b = a * (1/rate)
r= beta.rvs(a, b, size=1000)
#

fig= go.Histogram(
    x=r,
    nbinsx= Sizes,
    histnorm='',
    name='control',
    opacity=0.75
)

layout = go.Layout(
    title='frequency distribution. a= {}; b= {}'.format(a,b),
    xaxis=dict(
        range= [-.02,1.02],
        title='Value'
    ),
    yaxis=dict(
        title='Count'
    ),
    bargap=0.2,
    bargroupgap=0.1
)
fig= [fig]

fig = go.Figure(data=fig, layout=layout)
iplot(fig, filename='styled histogram')

In [248]:
r= beta.rvs(a, b, size=1000)
bin_count,bin_middle= np.histogram(r,bins= Sizes,range= [0,1])
bin_count= bin_count / sum(bin_count)
t= np.sum(np.array(expected_freq_counts),axis= 0)
t= t / np.sum(t)
t= [t]
set_SSD([bin_count],t)

[0.008390000000000002]

[array([2.44, 1.84, 0.86, 0.84, 0.54, 0.44, 0.28, 0.24, 0.18, 0.18, 0.24,
        0.22, 0.3 , 0.16, 0.26, 0.16, 0.18, 0.1 , 0.06, 0.1 , 0.06, 0.08,
        0.06, 0.1 , 0.08, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  ])]

In [237]:
bin_count

array([0.24 , 0.105, 0.077, 0.072, 0.036, 0.047, 0.035, 0.043, 0.033,
       0.036, 0.019, 0.024, 0.02 , 0.021, 0.019, 0.019, 0.021, 0.016,
       0.008, 0.016, 0.011, 0.006, 0.005, 0.012, 0.006, 0.005, 0.006,
       0.005, 0.007, 0.007, 0.008, 0.003, 0.003, 0.   , 0.   , 0.   ,
       0.001, 0.002, 0.001, 0.001, 0.002, 0.001, 0.001, 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   ])

In [243]:
np.sum(np.array(expected_freq_counts),axis= 0)

array([2.44, 1.84, 0.86, 0.84, 0.54, 0.44, 0.28, 0.24, 0.18, 0.18, 0.24,
       0.22, 0.3 , 0.16, 0.26, 0.16, 0.18, 0.1 , 0.06, 0.1 , 0.06, 0.08,
       0.06, 0.1 , 0.08, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  ])

In [69]:
Iter= 100

coords_line= []

rate= 4.2

coord1= [0.01,.01*rate]
coord2= [80,80*rate]
coords= np.array([coord1,coord2])

vector2= coords[1] - coords[0]

for angle in np.linspace(0,10,Iter):

    new_guy = coords[0] + [angle / 10 * x for x in vector2]
    coords_line.append(new_guy)

coords_line= np.array(coords_line)
mesh= np.linspace(0,1,50)

coords_topo= []
coords_x= []
coords_y= []

Nbins= 50

for angle in range(coords_line.shape[0]):
    coord= coords_line[angle]
    a, b= coord
    coords_y.extend(mesh)
    coords_x.extend([angle] * len(mesh))
    
    primer= beta.rvs(a, b,size= 200)
    hist= np.histogram(primer,density=False,bins= Nbins,range= [0,1])[0]
    hist= hist / np.sum(hist)
    coords_topo.extend(hist)
    


In [70]:
fig= [go.Scatter(
    x= coords_x,
    y= coords_y,
    mode= 'markers',
    marker= {
        'color': coords_topo,
        "colorbar": dict(
            title="Colorbar"
        ),
        'colorscale': 'Inferno',
            'line': {'width': 0},
            'size': 10,
            'symbol': 'circle',
        "opacity": 1
    }
)]


layout= go.Layout(
    xaxis= dict(
        title= 'parameter a'
    ),
    yaxis= dict(
        title= 'parameter b'
    ),
    height= 700,
    width= 700
)

Figure= go.Figure(data= fig,layout= layout)

iplot(Figure)

In [44]:
range_here= [-0.056,-0.052]
coords_line= [[avector[w],bvector[w]] for w in range(len(avector)) if (mean_stat[w]>=range_here[0] and mean_stat[w] <= range_here[1])]
coords_line= [x for x in coords_line if x[1] / x[0] > 1]

coords_line= np.array(coords_line)
mesh= np.linspace(0,1,50)

coords_topo= []
coords_x= []
coords_y= []

Nbins= 50

for angle in range(coords_line.shape[0]):
    coord= coords_line[angle]
    a, b= coord
    coords_y.extend(mesh)
    coords_x.extend([angle] * len(mesh))
    
    primer= beta.rvs(a, b,size= 200)
    hist= np.histogram(primer,density=False,bins= Nbins,range= [0,1])[0]
    hist= hist / np.sum(hist)
    coords_topo.extend(hist)

In [45]:
fig= [go.Scatter(
    x= coords_x,
    y= coords_y,
    mode= 'markers',
    marker= {
        'color': coords_topo,
        "colorbar": dict(
            title="Colorbar"
        ),
        'colorscale': 'Inferno',
            'line': {'width': 0},
            'size': 10,
            'symbol': 'circle',
        "opacity": 1
    }
)]


layout= go.Layout(
    xaxis= dict(
        title= 'parameter a'
    ),
    yaxis= dict(
        title= 'parameter b'
    ),
    height= 700,
    width= 700
)

Figure= go.Figure(data= fig,layout= layout)

iplot(Figure)

In [46]:
fig= [go.Scatter(
    x= coords_line[:,0],
    y= coords_line[:,1],
    mode= "markers"
)]


layout= go.Layout(
    xaxis= dict(
        title= 'parameter a'
    ),
    yaxis= dict(
        title= 'parameter b'
    ),
    height= 700,
    width= 700
)

Figure= go.Figure(data= fig,layout= layout)

iplot(Figure)