In [18]:
import numpy as np
import sib_ldsc_z as ld
from scipy.optimize import minimize
from scipy.special import comb
from scipy.misc import derivative
import scipy.stats
from importlib import reload
import matplotlib.pyplot as plt
import seaborn as sns
import glob
reload(ld)

<module 'sib_ldsc_z' from '/disk/homedirs/nber/harij/gitrepos/SNIPar/ldsc_reg/inferz/sib_ldsc_z.py'>

# Defining the PDF and the Log Likelihoods

The likelihood for a SNP $i$ is:

$$
l_i = -\frac{d}{2} log (2 \pi) - \frac{1}{2} log ( |I + r_i S_i^{-1/2} V S_i^{-1/2}| ) - \frac{1}{2} z_i^T (I + r_i S_i^{-1/2} V S_i^{-1/2}) ^{-1} z_i
$$

And its derivative:

$$
\frac{dl}{dV} = r_i S^{-1/2} \Sigma_i^{-1} (\Sigma - z_i z_i^T) \Sigma_i^{-1} S^{-1/2}
$$

In [19]:
np.random.seed(123)

# N = int(100)
# S_size = int(N/2)
# S = np.array([np.array([[.5, 0], [0, .8]]),
#     np.array([[0.5, 0], [0, 0.8]])] * S_size )/N
# V = np.identity(2) * 0.5

N = int(1e4)
S = np.array([[[1e-4, -5 * 1e-5], [-5 * 1e-5, 1e-4]]] * N)
V = np.array([[0.5, 0.25], [0.25, 0.5]])


# N = int(1e4)
# S = np.array([[[1e-4, -5 * 1e-5, -4 * 1e-5], [-5 * 1e-5, 1e-4, -4 * 1e-5], [-5 * 1e-5, -4 * 1e-5, 1e-4]]] * N)
# V = np.array([[0.5, 0.25, 0.25], [0.25, 0.5, 0.25], [0.25, 0.25, 0.5]])

model = ld.sibreg(S = S)
model.simdata(V/N, N, simld = True)

No value for U given. Generating a vector of ones (all SNPs weighted equally)
No value for LD Scores given. Generating a vector of ones for l
No value for effective number of loci is given. Using total number of loci instead
Simulated LD scores!


In [20]:
# Testing derivatives
aderiv = ld._grad_ll_v(ld.Vmat2V(V, N), model.z[0, :], model.S[0],  model.l[0], N)

nderiv = ld._num_grad_V(ld.Vmat2V(V, N), model.z[0, :], model.S[0], model.l[0],  N)

np.allclose(aderiv, nderiv)

True

In [21]:
# solving
output, result = model.solve()
print(result)

No initial guess provided.
Making Method of Moments Guess
      fun: 37189.05234751942
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([-0.00462407, -0.00017175,  0.00281469])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 12
      nit: 9
   status: 0
  success: True
        x: array([0.51801914, 0.49265043, 0.53215488])


In [22]:
output

{'v1': 0.5180191426965135,
 'v2': 0.492650425895693,
 'r': 0.5321548816445629,
 'invH': array([[ 4.41156965e-04, -1.59973356e-07, -1.45739446e-04],
        [-1.59973315e-07,  4.13858136e-04, -1.46218624e-04],
        [-1.45739446e-04, -1.46218624e-04,  9.02756653e-04]])}

In [46]:
print("Real Parameters:")
print(f"v1 = {V[0, 0]}")
print(f"v2 = {V[1, 1]}")
print(f"r = {V[0, 1]/np.sqrt(V[0, 0] * V[1, 1])}")

Real Parameters:
v1 = 0.5
v2 = 0.5
r = 0.5


In [24]:
model.jackknife_se(blocksize = int(N/100))

Loop Number: 1
Current Block: 0 to 100
Loop Number: 2
Current Block: 100 to 200
Loop Number: 3
Current Block: 200 to 300
Loop Number: 4
Current Block: 300 to 400
Loop Number: 5
Current Block: 400 to 500
Loop Number: 6
Current Block: 500 to 600
Loop Number: 7
Current Block: 600 to 700
Loop Number: 8
Current Block: 700 to 800
Loop Number: 9
Current Block: 800 to 900
Loop Number: 10
Current Block: 900 to 1000
Loop Number: 11
Current Block: 1000 to 1100
Loop Number: 12
Current Block: 1100 to 1200
Loop Number: 13
Current Block: 1200 to 1300
Loop Number: 14
Current Block: 1300 to 1400
Loop Number: 15
Current Block: 1400 to 1500
Loop Number: 16
Current Block: 1500 to 1600
Loop Number: 17
Current Block: 1600 to 1700
Loop Number: 18
Current Block: 1700 to 1800
Loop Number: 19
Current Block: 1800 to 1900
Loop Number: 20
Current Block: 1900 to 2000
Loop Number: 21
Current Block: 2000 to 2100
Loop Number: 22
Current Block: 2100 to 2200
Loop Number: 23
Current Block: 2200 to 2300
Loop Number: 24
Cu

array([0.01235804, 0.01156955, 0.01808044])