In [1]:
import sib_ldsc_cython as ld
import sys
sys.path.append("..")
import sib_ldsc_z as ldbase

In [2]:
import numpy as np
import glob
import pandas as pd
from scipy.optimize import minimize
import scipy.optimize
from scipy.special import comb
from scipy.misc import derivative
import scipy.stats

In [3]:
from importlib import reload
reload(ld)

<module 'sib_ldsc_cython' from '/disk/homedirs/nber/harij/gitrepos/SNIPar/ldsc_reg/cython/sib_ldsc_cython.cpython-37m-x86_64-linux-gnu.so'>

In [4]:
V = np.array([[0.5, 0.25], [0.25, 0.5]])
S = np.array([[0.4, 0.2], [0.2, 0.4]])

In [5]:
%load_ext Cython

In [41]:
%%cython

import numpy as np
cimport numpy as np
import sib_ldsc_cython as ld 

DTYPE = np.float
ctypedef np.float_t DTYPE_t

cpdef DTYPE_t log_ll(np.ndarray[DTYPE_t, ndim=1] V, 
                     np.ndarray[DTYPE_t, ndim=1] z, 
                     np.ndarray[DTYPE_t, ndim=2] S, 
                     double l, 
                     int N):

    """
    Returns the log likelihood matrix for a given SNP i as formulated by:

    .. math::
        l_i = -\frac{d}{2} log (2 \pi) - \frac{1}{2} log ( |\Sigma| ) -
                \frac{1}{2} z_i^T (\Sigma) ^{-1} z_i

    Inputs:
    V = dxd numpy matrix
    z = dx1 numpy matrix
    S = dxd numpy matrix
    l = scalar
    f = scalar

    Outputs:
    logll = scalar
    """
    cdef np.ndarray[DTYPE_t, ndim=2] Vmat = ld.V2Vmat(V, N)

    Vnew, Snew = ld.standardize_mat(Vmat, S, N)
    Sigma = Snew + l * Vnew
    logdet = np.linalg.slogdet(Sigma)

    det = np.linalg.det(Sigma)
    if det > 1e-6 or det < -1e-6:
        Sigma_inv = np.linalg.inv(Sigma)
    else:
        Sigma_inv = np.linalg.pinv(Sigma)

    d = Vmat.shape[0]
    z2d = z.reshape(d,1)

    L = - (d/2.0) * np.log(2 * np.pi) \
        - (1.0/2.0) * logdet[0]*logdet[1] \
        - (1.0/2.0) * z2d.T @ Sigma_inv @ z2d

    logll = L[0, 0]

    return logll


In [42]:
%time log_ll(ldbase.Vmat2V(V, 100), np.array([0.1, 0.1]), S, 0.5, 100)

CPU times: user 1e+03 µs, sys: 0 ns, total: 1e+03 µs
Wall time: 1.15 ms


-1.7007026968501218

In [24]:
%time ldbase._log_ll(ldbase.Vmat2V(V, 100), np.array([0.1, 0.1]), S, 0.5, 100)

CPU times: user 3.79 s, sys: 101 ms, total: 3.89 s
Wall time: 3.89 s


-2.18364641006772

In [7]:
ld._log_ll(np.array([0.5, 0.5, 0.5]), np.array([0.1, 0.1]), V, 0.5, 100)

-1.700702696850121

In [8]:
ldbase._log_ll(np.array([0.5, 0.5, 0.5]), np.array([0.1, 0.1]), V, 0.5, 100)

-1.705657070865306

In [9]:
ld._grad_ll_v(np.array([0.5, 0.5, 0.5]), np.array([0.1, 0.1]), V, 0.5, 100)

array([ 0.,  0., -0.])

In [12]:
# Simulating data
np.random.seed(123)

N = int(1e4)
S = np.array([[[1e-4, -5 * 1e-5], [-5 * 1e-5, 1e-4]]] * N)
V = np.array([[0.5, 0.25], [0.25, 0.5]])

model = ldbase.sibreg(S = S)
model.simdata(V/N, N, simld = True)

No value for U given. Generating a vector of ones (all SNPs weighted equally)
No value for LD Scores given. Generating a vector of ones for l
No value for effective number of loci is given. Using total number of loci instead
Simulated LD scores!


In [13]:
result = minimize(
    ld.neg_logll_grad, 
    np.array([0.3, 0.3, 0.3]),
    jac = True,
    args = (model.z, model.S, model.l, model.u, model.M),
    bounds = [(1e-6, None), (1e-6, None), (-1, 1)],
    method = 'L-BFGS-B'
    # options = {'ftol' : 1e-20}

)

In [14]:
result

      fun: 18378.77066409414
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([-0., -0., -0.])
  message: b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'
     nfev: 1
      nit: 0
   status: 0
  success: True
        x: array([0.3, 0.3, 0.3])

In [13]:
resultbase = minimize(
    ldbase.neg_logll_grad, 
    np.array([0.5, 0.5, 0.5]),
    jac = True,
    args = (model.z, model.S, model.l, model.u, model.M),
    bounds = [(1e-6, None), (1e-6, None), (-1, 1)],
    method = 'L-BFGS-B'
    # options = {'ftol' : 1e-20}

)

  log_ll += (1/ui) * _log_ll(V, zi, Si, li, M)
  log_ll += (1/ui) * _log_ll(V, zi, Si, li, M)


In [14]:
resultbase

      fun: 37189.05234751699
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 0.00287811,  0.00023375, -0.00084613])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 8
      nit: 6
   status: 0
  success: True
        x: array([0.5180204 , 0.49265064, 0.53215352])