In [197]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import interp1d

import gpflow
from gpflow.utilities import print_summary, positive

import tensorflow as tf
from tensorflow import math as tfm
from tensorflow_probability import bijectors as tfb
from tensorflow_probability import distributions as tfd
from tensorflow_probability import mcmc

from load_puma_data import load_barenco_puma
import math
import random

PI = tf.constant(math.pi, dtype='float64')
plt.style.use('ggplot')
%matplotlib inline

In [390]:
df, genes, genes_se, Y, Y_var = load_barenco_puma()


N_m = 7               # Number of observations
def calc_N_p(N_p, num_disc=8):
    '''A helper recursive function to ensure t is a subset of τ'''
    if num_disc <= 0:
        return N_p
    return N_p -1 + calc_N_p(N_p, num_disc-1)
num_disc = 10
N_p = calc_N_p(N_m, num_disc)  # Number of time discretisations
t = np.arange(N_m)*2           # Observation times
τ = np.linspace(0, 12, N_p)    # Discretised observation times
num_genes = 5
I = 1 # Number of TFs

m = np.float32(Y[:-1])
f = np.float32(np.atleast_2d(Y[-1]))
σ2 = np.float32(Y_var[:-1])
σ2_f = np.float32(np.atleast_2d(Y_var[-1]))

## Metropolis Hastings Custom MCMC Algorithm

In [458]:
# Parameters
w_j0 = 1    # Interaction bias (TODO)
fbar_i = np.ones(N_p, dtype='float32')
v_i = 1 # Fix this to 1 if translation model is not used (pg.8)
l2_i = 0.05
δbar_i = np.float32(1)
h_δ = 1
h_f = tf.ones(N_p, dtype='float32')
h_k = tf.ones(4, dtype='float32')
h_w = tf.ones(num_genes, dtype='float32')
a = tf.constant(-0.5, dtype='float32')
b2 = tf.constant(2., dtype='float32')
kbar_i = np.float32(np.c_[np.ones(num_genes), # a_j
                          np.ones(num_genes), # b_j
                          np.ones(num_genes), # d_j
                          np.ones(num_genes)])# s_j
w_j = 0.5*np.ones((num_genes, I))
σm2 = np.ones(num_genes, dtype='float32')

In [459]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.30f}".format(x)})

# Priors
def w_j_prior(w_j): # At the moment this is the same as w_j0 (see pg.8)
    return np.float64(tfd.Normal(0, 2).prob(w_j0))
def w_j0_prior(w_j0):
    return tfd.Normal(0, 2).prob(w_j0)
    
def kbar_prior(kbar):
    return np.prod(tfd.Normal(a, b2).prob(kbar))

def fbar_prior_params():
    t_1 = np.reshape(np.tile(τ, N_p), [N_p, N_p]).T
    t_2 = np.reshape(np.tile(τ, N_p), [N_p, N_p])
    t_dist = t_1-t_2
    
    K = v_i * np.exp(-np.square(t_dist)/(2*l2_i))
    m = np.zeros(N_p)
    return m, K

def fbar_prior(fbar):
    m, K = fbar_prior_params()
    m = tf.cast(m, 'float32')#tf.zeros(N_p)
    K = tf.cast(K, 'float32')#tf.linalg.diag(tf.ones(N_p))
#     print(m)
#     print(K)
#     print('tfp', tfd.MultivariateNormalFullCovariance(m, K).prob(tf.ones(N_p)))
    return np.float64(tfd.MultivariateNormalFullCovariance(m, K).prob(fbar))
fbar_prior(fbar_i)


def δbar_prior(δ):
    return np.float64(tfd.Normal(a, b2).prob(δ))

# Likelihood
def G(n, k, p_i):
    # TODO add binary vector x_j for prior network knowledge (pg.3)
    numerator = np.exp(-d*(t[n]-τ[k]))
    denominator = (1+np.exp(-w_j0-sum([w[:,i]*np.log(p_i[k]) for i in range(I)])))
    return numerator/denominator

    
def m_likelihood(δbar, fbar, kbar, for_gene_only=False):        
    # Take relevant parameters out of log-space
    δ = np.exp(δbar)
    f_i = np.exp(fbar)
    a_j, b_j, d_j, s_j = (np.exp(kbar_i[:, i]) for i in range(4)) 
#     print('exped', δ)
    # Define p_i vector
    p_i = np.zeros(N_p)
    for n in range(N_p):
        N_pn = n#n*10+n
        p_i[n] = np.trapz([f_i[k] * np.exp(-δ*(τ[n]-τ[k])) for k in range(N_pn)])
#     print('pi', p_i)
    
    # Calculate m_pred
    m_pred = np.zeros((num_genes, N_m), dtype='float32')
    for n in range(N_m):
        N_pn = n*10+n
        ys =  np.array([G(n, k, p_i) for k in range(N_pn)])
        if ys.shape[0] == 0:
            ys = np.zeros((0, num_genes))
        integrals = np.array([np.trapz(ys[:, i]) for i in range(num_genes)])
#         print('integrals', integrals)
        m_pred[:, n] = b_j/d_j + (a_j-b_j/d_j) * np.exp(-d_j*t[n]) + s_j*integrals

#     print(m_pred, m)
    lik = 1    
    if for_gene_only is False:
        for j in range(num_genes):
            prob = tfd.Normal(m[j], σ2[j]+σm2[j]).prob(m_pred[j])
            lik *= tf.reduce_prod(prob)
    else:
        j = for_gene_only
        prob = tfd.Normal(m[j], σ2[j]+σm2[j]).prob(m_pred[j])
        lik *= tf.reduce_prod(prob)
    return lik

def f_likelihood(fbar, i=0): 
    '''TODO this should be for the i-th TF'''
    f_i = np.exp(fbar)
    f_i = np.float32(np.atleast_2d([f_i[i*num_disc+i] for i in range(N_m)]))
#     print(f_i[i], f[i])
    prob = tfd.Normal(f[i], σ2_f[i]).prob(f_i[i])
#     print('prob', prob)
    lik = tf.reduce_prod(prob)
    return lik


In [None]:
print('----- Metropolis Begins -----')

params = {'δ': list(), 
          'k': [list() for _ in range(num_genes)], 
          'w': [list() for _ in range(num_genes)]
         }
T = 100


h_f =0.4*tf.ones(N_p, dtype='float32')

# print(likelihood(δbar_i))
for iteration_number in range(T):
    if iteration_number % 10:
        print(f'{100*iteration_number/T:.2f}% complete)
    # Untransformed tf mRNA vectors F
    for i in range(I):
        # Gibbs step
        z_i = tfd.MultivariateNormalDiag(fbar_i, h_f).sample()
        # MH
        m, K = fbar_prior_params()
        m = tf.cast(m, 'float32')#tf.zeros(N_p)
        K = tf.cast(K, 'float32')#tf.linalg.diag(tf.ones(N_p))

        fstar = tfd.MultivariateNormalFullCovariance(m, K).sample()
        fstar = tfd.MultivariateNormalDiag(fstar, h_f).sample()
        g = lambda fbar: m_likelihood(δbar_i, fbar, kbar_i) * f_likelihood(fbar)
        acceptance = g(fstar)/g(fbar_i)
        if random.random() < min(1, acceptance):
            fbar_i = fstar

    # Log of translation ODE degradation rates
    for i in range(I):
        # Proposal distribution
        Q = tfd.Normal(δbar_i, h_δ)
        δstar = Q.sample() # δstar is in log-space, i.e. δstar = δbar*
        g = lambda δbar: m_likelihood(δbar, fbar_i, kbar_i) * δbar_prior(δbar)
        acceptance = min(1, g(δstar)/g(δbar_i))
        if random.random() < acceptance:
            δbar_i = δstar
            params['δ'].append(δstar)
            
    # Log of transcription ODE kinetic params
    for j in range(num_genes):
        Q = tfd.MultivariateNormalDiag(kbar_i[j], h_k)
        kstar = Q.sample()
        g = lambda kbar: m_likelihood(δbar_i, fbar_i, kbar, for_gene_only=j) * kbar_prior(kbar)
        acceptance = min(1, g(kstar)/g(kbar_i[j]))
        if random.random() < acceptance:
            kbar_i[j] = kstar
            params['k'][j].append(kstar)

    # Interaction weights and biases
    for j in range(num_genes):
        Q = tfd.Normal(w_j[j], 0.1)#h_w[j])
        wstar = Q.sample()[0]
        g = lambda w: m_likelihood(δbar_i, fbar_i, kbar_i, for_gene_only=j) * w_j_prior(w)
#         print('----')
#         print(g(wstar))
#         print(g(w_j[j][0]))
        acceptance = min(1, g(wstar)/g(w_j[j][0]))
#         print('accept', acceptance)
        if random.random() < acceptance:
            w_j[j] = wstar
            params['w'][j].append(wstar)

    # Noise variances
    
    # Length scales and variances of GP kernels

----- Metropolis Begins -----


