In [35]:
import helperfuncs as hp
import numpy as np
import sib_ldsc_z as ld
from scipy.optimize import minimize
from scipy.special import comb
from scipy.misc import derivative
import scipy.stats
from importlib import reload
import matplotlib.pyplot as plt
reload(ld)

<module 'sib_ldsc_z' from 'C:\\Users\\Hariharan\\Documents\\git_repos\\SNIPar\\ldsc_reg\\inferz\\sib_ldsc_z.py'>

# Defining the PDF and the Log Likelihoods

The likelihood for a SNP $i$ is:

$$
l_i = -\frac{d}{2} log (2 \pi) - \frac{1}{2} log ( |I + r_i S_i^{-1/2} V S_i^{-1/2}| ) - \frac{1}{2} z_i^T (I + r_i S_i^{-1/2} V S_i^{-1/2}) ^{-1} z_i
$$

And its derivative:

$$
\frac{dl}{dV} = r_i S^{-1/2} \Sigma_i^{-1} (\Sigma - z_i z_i^T) \Sigma_i^{-1} S^{-1/2}
$$

In [36]:
np.random.seed(123)

N = int(100)
S_size = int(N/2)
S = np.array([np.array([[.5, 0], [0, .8]]),
    np.array([[0.5, 0], [0, 0.8]])] * S_size )
V = np.identity(2) * 0.5
f = np.random.uniform(0, 1, N)

# N = 100
# S = np.array([0.5/N] * N).reshape((N, 1, 1))
# V = np.atleast_2d(0.5)

In [37]:
model = ld.sibreg(S = S/N)
model.simdata(V/N, N, simr = True)

No value for U given. Generating a vector of ones (all SNPs weighted equally)
No value for r given. Generating a vector of ones for r
Simulated LD scores!
Effect Vectors Simulated!


In [38]:
Vin = hp.extract_upper_triangle(V)
model.neg_logll_grad(Vin)

[[-394.66726514]], [[0.5 0. ]
 [0.  0.5]]


(array([[39466.72651388]]),
 array([64351.41045795, 18182.89182106, 36840.34313903]))

In [39]:
# Testing derivatives
aderiv = model._grad_ll_v(V, model.z[0, :], model.S[0], 
                 model.u[0], model.r[0])

nderiv = model._num_grad_V(V, model.z[0, :], model.S[0], 
                 model.u[0], model.r[0])

np.allclose(aderiv, nderiv)

True

In [40]:
# solving
output, result = model.solve() #), gradfunc = model._num_grad_V
print(result)

No initial guess provided.
Making 'optimal' matrix
[[-502.24268658]], [[1.e-06 0.e+00]
 [0.e+00 1.e-06]]
[[-401.77993781]], [[ 0.93088136 -0.01510679]
 [-0.01510679  0.36501271]]
[[-401.29608597]], [[ 0.91171741 -0.01571632]
 [-0.01571632  0.37569287]]
[[-399.5422059]], [[ 0.83506162 -0.01815443]
 [-0.01815443  0.41841354]]
[[-407.18772417]], [[ 0.16148577 -0.04484653]
 [-0.04484653  0.63514297]]
[[-394.65275503]], [[ 0.45201688 -0.03333352]
 [-0.03333352  0.54166182]]
[[-394.46126334]], [[ 0.44785053 -0.01771491]
 [-0.01771491  0.50859799]]
[[-394.35579667]], [[ 0.43704956 -0.02222751]
 [-0.02222751  0.43658576]]
[[-394.33730669]], [[ 0.44112949 -0.01850019]
 [-0.01850019  0.45792035]]
[[-394.3368775]], [[ 0.4401742  -0.01897853]
 [-0.01897853  0.45533736]]
[[-394.33688591]], [[ 0.4408756  -0.01904175]
 [-0.01904175  0.45487892]]
[[-394.33687142]], [[ 0.44054113 -0.0190116 ]
 [-0.0190116   0.45509753]]
[[-394.33687065]], [[ 0.4404327  -0.01901491]
 [-0.01901491  0.45511072]]
      fun

In [49]:
output

array([[1.19018594e+09, 7.12526602e+10],
       [7.12526602e+10, 1.00000000e-03]])

In [77]:
modll = model._log_ll(V, model.z[0, :], model.S[0], 
                 model.u[0], model.r[0])

In [74]:
V_norm = V/N
S_inv_root = hp.calc_inv_root(model.S[0])
dist = scipy.stats.multivariate_normal(mean = None,
                                      cov = np.eye(V.shape[0]) + model.r[0] * S_inv_root @ V_norm @ S_inv_root)

nlogll = dist.logpdf(model.z[0, :])
print(nlogll)

-3.3483077628580906


In [78]:
modll

array([[-7.33193113]])

In [114]:
def scipy_logll(V, z, S, u, r):
    '''
    Returns log likelihood as given
    by scipy
    '''
    
    V_norm = V
    S_inv_root = hp.calc_inv_root(S)
    
    covmat = np.eye(V.shape[0]) + r * S_inv_root @ V_norm @ S_inv_root
    
    min_eig = np.min(np.real(np.linalg.eigvals(covmat)))
    if min_eig < 0:
        covmat -= 10*min_eig * np.eye(*covmat.shape)
    
    dist = scipy.stats.multivariate_normal(mean = None,
                                          cov = covmat)

    nlogll = dist.logpdf(z)
    
    return nlogll

def scipy_logll_deriv(V, z, S, u, r):
    '''
    Calculates the numerical derivative of the scipy logll
    '''
    
    g = np.zeros(V.shape)
    for i in range(0,V.shape[0]):
        for j in range(0,V.shape[1]):
            dV = np.zeros((V.shape))
            dV[i,j] = 10 ** (-6)
            V_upper = V+dV
            V_lower = V-dV
            print(V_upper)
            print(scipy_logll(V_upper, z, S, u, r))
            
            
            g[i,j] = (scipy_logll(V_upper, z, S, u, r) - \
                      scipy_logll(V_lower, z, S, u, r)) / (2 * 10 ** (-6))
    return g

    

In [115]:
model.solve(logllfunc = scipy_logll, gradfunc=scipy_logll_deriv)

No initial guess provided.
Making 'optimal' matrix
[[2.e-06 0.e+00]
 [0.e+00 1.e-06]]
[-4.05960453 -1.85519717]


ValueError: setting an array element with a sequence.

# Playing with Log Likelihoods

In [25]:
Si = S[0]
zi = model.z[0].reshape((2, 1))
ri = model.r[0]
zi

array([[-2.85980996],
       [-1.82461152]])

In [47]:
# log likelihood
S_inv_root = hp.calc_inv_root(Si)
Sigma = np.identity(Si.shape[0]) + ri*np.dot(S_inv_root.dot(V),S_inv_root)
logdet = np.linalg.slogdet(Sigma)
Sigma_inv = np.linalg.inv(Sigma)
zi = zi.reshape(V.shape[0],1)
d = V.shape[0]

L = - (d/2) * np.log(2 * np.pi) \
    - (1/2) * logdet[0]*logdet[1] \
    - (1/2) * np.dot(zi.T,Sigma_inv.dot(zi))

In [None]:
# derivative
S_inv_root = hp.calc_inv_root(S)
Sigma = np.identity(S.shape[0])+r*np.dot(S_inv_root.dot(V),S_inv_root)
Sigma_inv = np.linalg.inv(Sigma)
z = z.reshape(z.shape[0],1)
SSigma_inv = S_inv_root.dot(Sigma_inv)
g = r * SSigma_inv.dot(np.dot(Sigma-z.dot(z.T),SSigma_inv.T))