In [1]:
import numpy as np 
import matplotlib.pyplot as plt
from scipy.linalg import toeplitz
from utils import sample_PLN
import scipy.linalg as SLA
from fastPLN import fastPLN

device :  cpu


In [2]:
%load_ext autoreload

%autoreload 2

$$
\begin{aligned}
W_{i} & \sim \mathcal{N}\left(0, I_{q}\right), \text { iid, } \quad i=1, \ldots, n \\
Z_{i} &=\beta \mathbf{x}_{i}+\mathbf{C} W_{i}, \quad i \in 1, \ldots, n \\
Y_{i j} \mid Z_{i j} & \sim \mathcal{P}\left(\exp \left(o_{i j}+Z_{i j}\right)\right)
\end{aligned}
$$

We want to use the EM algorithm, that is : 

$$
\theta^{h+1}=\underset{\theta}{\arg \max} \mathbb{E}_{\theta^{h}}\left[\log p_{\theta}(Y, Z) \mid Y\right]
$$

But we can't compute the term in the argmax. We thus choose to approximate this with Monte Carlo method. 

sizes : 

$ Y : (n,p)$ 

$O : (n,p)$ 

$C :  (p,q)$ 

covariates ($x$) : $(n,d)$

$\beta : (d,p)$

In [32]:
def build_block_Sigma(p,k): 
    '''
    build a matrix per block of size (p,p). There will be k+1 blocks of size p//k.
    The first k ones will be the same size. The last one will be smaller (size (0,0) if k%p = 0)
    '''
    
    np.random.seed(0)
    alea = np.random.randn(k+1)**2+1# will multiply each block by some random quantities 
    Sigma = np.zeros((p,p))
    block_size,last_block_size = p//k, p%k
    for i in range(k): 
        Sigma[i*block_size : (i+1)*block_size ,i*block_size : (i+1)*block_size] = alea[i]*0.95**np.arange(block_size)
    if last_block_size >0 :
        Sigma[-last_block_size:,-last_block_size:] = alea[k]*toeplitz(0.98**np.arange(last_block_size))
    return Sigma+0.1*toeplitz(0.95**np.arange(p))


def C_from_Sigma(Sigma,q): 
    w,v = SLA.eigh(Sigma)
    C_reduct = v[:,-q:]@np.diag(np.sqrt(w[-q:]))
    return C_reduct

In [33]:
d = 4 # nb of cavariates
n = 200; p = 20
q = 10

In [34]:
#torch.manual_seed(0)
true_Sigma = build_block_Sigma(p,8)
true_C = C_from_Sigma(true_Sigma, q)
true_beta = np.random.randn(d, p)

covariates = np.random.randn(n,d)
O =  1+np.zeros((n,p))
sample_model = sample_PLN()
Y_sampled, Z_sampled  = sample_model.sample(true_Sigma,true_beta, O, covariates)

In [281]:
class MC_PLNPCA(): 
    
    def __init__(self,q): 
        self.q = q
        pass
    
    def init_data(self, Y,O,covariates): 
        #np.random.seed(0)
        self.Y = Y 
        self.covariates = covariates 
        self.O = O 
        self.n = Y.shape[0] 
        self.p = Y.shape[1]
        self.d = self.covariates.shape[1]
        noise = np.random.randn(self.p) 
        self.Sigma =  (np.diag(noise**2)+ 1e-1)
        self.C = C_from_Sigma(self.Sigma,self.q)
        self.beta = np.random.randn(self.d,self.p)
        
        self.beta = true_beta
        self.C = true_C
        
    def compute_single_log_like(self, i, acc):
        N_iter = int(1/acc)
        E = 0 
        for _ in range(N_iter): 
            W = np.random.randn(q)
            E -= 1/2*SLA.norm(W)**2
            E -= np.sum(np.exp(self.O[i,:]+self.beta.T@self.covariates[i,:]+ self.C@W)) 
            E+= np.sum((self.O[i,:]+self.beta.T@self.covariates[i,:]+ self.C@W)*self.Y[i,:])
            
        E/= N_iter
        return E
    
    def batch_log_like(self,acc): 
        batch_E = 0
        for i in range(10): 
            batch_E += self.compute_single_log_like(i,acc) 
        return batch_E
    
    def single_grad_beta_log_like(self,i, acc): 
        N_iter = int(1/acc)
        grad = 0
        for _ in range(N_iter): 
            W = np.random.randn(q)
            grad += self.covariates[i,:].T.reshape(-1,1)@(np.exp(self.O[i,:]+ self.covariates[i,:]@self.beta+self.C@W)).reshape(1,-1)
            grad += self.covariates[i,:].T.reshape(-1,1)@(self.Y[i,:].reshape(1,-1))
        return grad/N_iter
    
    def batch_grad_beta(self, acc): 
        batch_grad = 0
        for i in range(10): 
            batch_grad += self.single_grad_beta_log_like(i,acc) 
        return batch_grad
        

In [284]:
model = MC_PLNPCA(q)
model.init_data(Y_sampled, O, covariates)
SLA.norm(model.batch_grad_beta(0.001))
#SLA.norm(model.single_grad_beta_log_like(10, 0.001))
#model.compute_batch_log_like(0.001)

71942.72567186001

In [204]:
model = MC_PLNPCA(q)
model.init_data(Y_sampled, O, covariates)
model.compute_batch_log_like(0.001)

+ : -107.47812466084537
+ : -14.162344032883682
+ : 31557.749059227102
+ : 9368.355286172467
+ : 33057.72813548736
+ : -10815.753845064106
+ : 26965.51386043473
+ : -120.0552367168073
+ : 2045.650944045968
+ : -103.53783526969535


91834.00989962327

In [11]:
model = fastPLN()
model.fit(Y_sampled, O, covariates, 100)

device :  cpu


In [17]:
np.mean((model.Sigma.detach().numpy()-true_Sigma)**2)

0.0705000298215757