In [54]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
from scipy.special import factorial
from scipy.optimize import minimize, fmin_l_bfgs_b

This is conducting some simulations from section 7 of ```sib_gwas.pdf```

# The Data Generating Process

$$
\hat{\theta}_i | \theta_i \sim \mathcal{N}(\theta_i, S_i)
$$

$$
\theta_i \sim \mathcal{N}(O, V)
$$

We want to infer $V$.

# The likelihood function

The contribution of SNP $i$ to the log likelihood is:

$$
l_i = -\frac{d}{2} log(2 \pi) - \frac{1}{2} log |S_i + V| - \frac{1}{2}tr(\hat{\theta}_i \hat{\theta}_i^T(S_i + V)^{-1})
$$

The gradient:

$$
\frac{dl_i}{dV} = -\frac{1}{2}(S_i + V)^{-1} + \frac{1}{2} (S_i + V)^{-1} \hat{\theta}_i \hat{\theta}_i^T (S_i + V)^{-1}
$$

For now we simply sum across all SNPs to get the log-likelihood and the gradient.

The first few functions are to allow us to switch between a "flat" array into a matrix and back. This is so that we can make parameter restrictions more easily. 

In [55]:
def extract_upper_triangle(x):
    
    # Extracts the upper 
    
    n, m = x.shape
    assert n == m
    
    upper_triangle = x[np.triu_indices(n)]
    
    return upper_triangle

In [56]:
def return_to_symmetric(triangle_vec, final_size):
    
    # Given a vector of the upper triangular matrix,
    # get back the symmetric matrix
    
    X = np.zeros((final_size,final_size))
    X[np.triu_indices(X.shape[0], k = 0)] = triangle_vec
    X = X + X.T - np.diag(np.diag(X))
    
    return X

```simdata``` simulates a vector of $\theta$ given $V, S$, and $N$

In [59]:
def simdata(V, S, N):
    
    # Simulated data (theta hats) as per section 7.1
    # V = varcov matrix of true effects
    # S = array of variance covariance matrices (each one
    # for a given snp)
    # N = Number of obs/SNPs to generate
    # Make sure S has as man
    
    θhat_vec = []
    
    # make sure they are np arrays
    for i in range(N):
        
        Si = S[i]
        
        V = np.array(V)
        Si = np.array(Si)

        # get shape of V
        d = V.shape[0]
        zeromat = np.zeros(d)

        # generate true effect vector
        θ = np.random.multivariate_normal(zeromat, V)

        sim = np.random.multivariate_normal(θ, Si)
        
        # Append to vector of effects
        θhat_vec.append(sim)
    
    θhat_vec = np.array(θhat_vec)
    return θhat_vec

In [60]:
N = 100
S = np.array([np.array([[5, 0], [0, 5]]),
    np.array([[2, 0], [0, 2]])] * 50 )# 50 = N/2
V = np.identity(2) * 10.0


θhat_vec = simdata(V, S, N)

The log likelihood function actually returns the negative log likelihood so that we can minimize it.

In [61]:
def logll(V, θ, S):
    
    # calculate negative log likelihood
    
    # Unflatten V into a matrix
    d = S[0].shape[0]
    V = return_to_symmetric(V, d)
    N = len(S)
    log_ll = 0
    
    for i in range(N):
           
        Si = S[i]
        θi = θ[i, :]
        d, ddash = Si.shape
        assert d == ddash # Each S has to be a square matrix
        
        log_ll += -(d/2) * np.log(2 * np.pi)
        log_ll += -(1/2) * np.log(np.linalg.det(Si + V))
        log_ll += -(1/2) * np.trace(np.outer(θi, θi) @ np.linalg.inv(Si + V))
        
    return -log_ll    

In [62]:
Vvec = np.array([10., 0, 10.])
logll(Vvec, θhat_vec, S)

536.1436769422421

In [63]:
def grad_logll(V, θ, S):
    # the gradient of the log
    # likelihood function
    # Unflatten V into a matrix
    d = S[0].shape[0]
    V = return_to_symmetric(V, d)
    N = len(S)
    Gvec = np.zeros((d, d))
    for i in range(N):
        Si = S[i]
        θi = θ[i, :]
        SV_inv = np.linalg.inv(Si + V)
        G = -(1 / 2) * SV_inv
        G += (1 / 2) * np.dot(SV_inv,np.dot(np.outer(θi, θi),SV_inv))
        Gvec += G

    Gvec = extract_upper_triangle(Gvec)
    return -Gvec

In [64]:
grad_logll(Vvec, θhat_vec, S)

array([ 0.84555143,  0.64202002, -0.21064045])

```neg_logll_grad``` is an additional function which calucaltes the log likelihood and the gradient together. This is so that we can use ```fmin_l_bfgs_b``` much better

In [65]:
def neg_logll_grad(V, θ, S):
    
    # returns negative log likelihood and negative
    # of the gradient
  
    # Unflatten V into a matrix
    d = S[0].shape[0]
    V = return_to_symmetric(V, d)
    Gvec = np.zeros((d, d))
    
    N = len(S)
    log_ll = 0
    
    for i in range(N):
        
    
        Si = S[i]
        θi = θ[i, :]
        d, ddash = Si.shape
        assert d == ddash # Each S has to be a square matrix
  
        # calculate log likelihood
        log_ll += -(d/2) * np.log(2 * np.pi)
        log_ll += -(1/2) * np.log(np.linalg.det(Si + V))
        log_ll += -(1/2) * np.trace(np.outer(θi, θi) @ np.linalg.inv(Si + V))
        
        
        # calculate gradient
        SV_inv = np.linalg.inv(Si + V)
        G = -(1 / 2) * SV_inv
        G += (1 / 2) * np.dot(SV_inv,np.dot(np.outer(θi, θi),SV_inv))
        
        Gvec += G

    Gvec = extract_upper_triangle(Gvec)
    
    return -log_ll, -Gvec    
    
    

In [66]:
neg_logll_grad(Vvec, θhat_vec, S)

(536.1436769422421, array([ 0.84555143,  0.64202002, -0.21064045]))

In [70]:
# Lets do some optimizing!

def solve(negloglik, est_init, 
          negloglike_args, bounds):
    
    # == Solves our MLE problem == #
    
    # convert matrix to a single dimension vector
    est_init_array = extract_upper_triangle(est_init)
    
    # 

    result = fmin_l_bfgs_b(
        negloglik, 
        est_init_array,
        fprime = None,
        args = negloglike_args,
        bounds = bounds
    )
    
    return result

In [71]:
solve(neg_logll_grad, np.array([[10., 0.],
                                [0., 10.]]),
     (θhat_vec, S), [(1e-5, None),
                    (None, None),
                    (1e-5, None)])

(array([ 7.13293055, -2.26934301, 10.82120616]),
 532.8231165666621,
 {'grad': array([-4.47869097e-05, -2.34670997e-05, -4.95931317e-05]),
  'task': b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH',
  'funcalls': 9,
  'nit': 7,
  'warnflag': 0})