In [61]:
import helperfuncs as hp
import numpy as np
import sib_ldsc_z as ld
from scipy.optimize import minimize
from scipy.special import comb
from scipy.misc import derivative
import scipy.stats
from importlib import reload
import matplotlib.pyplot as plt
reload(ld)

<module 'sib_ldsc_z' from 'C:\\Users\\Hariharan\\Documents\\git_repos\\SNIPar\\ldsc_reg\\inferz\\sib_ldsc_z.py'>

# Defining the PDF and the Log Likelihoods

The likelihood for a SNP $i$ is:

$$
l_i = -\frac{d}{2} log (2 \pi) - \frac{1}{2} log ( |I + r_i S_i^{-1/2} V S_i^{-1/2}| ) - \frac{1}{2} z_i^T (I + r_i S_i^{-1/2} V S_i^{-1/2}) ^{-1} z_i
$$

And its derivative:

$$
\frac{dl}{dV} = r_i S^{-1/2} \Sigma_i^{-1} (\Sigma - z_i z_i^T) \Sigma_i^{-1} S^{-1/2}
$$

In [62]:
np.random.seed(123)

N = int(100)
S_size = int(N/2)
S = np.array([np.array([[.5, 0], [0, .8]]),
    np.array([[0.5, 0], [0, 0.8]])] * S_size )
V = np.identity(2) * 0.5
f = np.random.uniform(0, 1, N)

# N = 100
# S = np.array([0.5/N] * N).reshape((N, 1, 1))
# V = np.atleast_2d(0.5)

In [63]:
model = ld.sibreg(S = S, f = f)
model.simdata(V, N, simr = True)

No value for U given. Generating a vector of ones (all SNPs weighted equally)
No value for r given. Generating a vector of ones for r
Simulated LD scores!
Effect Vectors Simulated!


In [64]:
# playing with allele freq
logll_nonscale = 0
logll_scale = 0
for idx in range(N):
    fi = f[idx]
    normalizer = 2 * fi  * (1 - fi) if fi is not None else 1.0
    zi = np.sqrt(normalizer) * model.z[idx].reshape(2, 1)
    Si = normalizer * model.S[idx]

    logll_nonscale += model._log_ll(V, model.z[idx], model.S[idx], model.u[idx], model.r[idx])
    logll_scale += model._log_ll(V, zi, Si, model.u[idx], model.r[idx])

print(f"Uncaled Logll = {logll_nonscale}, Scaled Logll = {logll_scale}")

Uncaled Logll = [[-394.66726514]], Scaled Logll = [[-408.34509465]]


In [65]:
Vin = hp.extract_upper_triangle(V)
model.neg_logll_grad(Vin)

[[-433.46485554]], [[0.5 0. ]
 [0.  0.5]]


(array([[433.46485554]]), array([50.97545377,  1.92058409, 45.79079357]))

In [66]:
# Testing derivatives
aderiv = model._grad_ll_v(V, model.z[0, :], model.S[0], 
                 model.u[0], model.r[0])

nderiv = model._num_grad_V(V, model.z[0, :], model.S[0], 
                 model.u[0], model.r[0])

np.allclose(aderiv, nderiv)

True

In [67]:
# solving
output, result = model.solve() #, gradfunc = model._num_grad_V)
print(result)

No initial guess provided.
Making 'optimal' matrix
[[-502.23579807]], [[1.e-06 0.e+00]
 [0.e+00 1.e-06]]
[[-446.14859974]], [[0.86300061 0.25593791]
 [0.25593791 0.43557817]]
[[-445.64966499]], [[0.85700304 0.25637862]
 [0.25637862 0.4290796 ]]
[[-443.68421158]], [[0.8330128  0.25814148]
 [0.25814148 0.40308535]]
[[-437.03605276]], [[0.73705182 0.26519292]
 [0.26519292 0.29910834]]
[[-446.88594948]], [[0.32429906 0.1604261 ]
 [0.1604261  0.05594281]]
[[-430.65185474]], [[0.53228756 0.2132187 ]
 [0.2132187  0.17847533]]
[[-416.89333565]], [[0.38200962 0.10702202]
 [0.10702202 0.19114086]]
[[-1920.36942684]], [[ 1.00000000e-06 -1.48600104e-01]
 [-1.48600104e-01  1.94358568e-01]]
[[-407.98300637]], [[0.25339643 0.02096014]
 [0.02096014 0.19222418]]
[[34.28307066]], [[ 1.00000000e-06 -6.07526214e-01]
 [-6.07526214e-01  1.75788424e-01]]
[[-407.11453915]], [[ 0.21304053 -0.07913295]
 [-0.07913295  0.18960662]]
[[-613.29898568]], [[1.00000000e-06 1.03134651e-01]
 [1.03134651e-01 2.05989810e-0

In [7]:
output

array([[0.56661609, 0.01371669],
       [0.01371669, 0.54802061]])

In [8]:
modll = model._log_ll(V, model.z[0, :], model.S[0], 
                 model.u[0], model.r[0], 
                 model.f)

In [9]:
V_norm = V/N
S_inv_root = hp.calc_inv_root(model.S[0])
dist = scipy.stats.multivariate_normal(mean = None,
                                      cov = np.eye(V.shape[0]) + model.r[0] * S_inv_root @ V_norm @ S_inv_root)

nlogll = dist.logpdf(model.z[0, :])
print(nlogll)

-3.54456779428641


In [10]:
modll

array([[-7.74199121]])