In [None]:
import pandas as pd
import numpy as np
import os
import scipy.stats as stats
from utils import load_data, val_loglhood, loglhood

# Gibbs Sampling

In [None]:
def gibbs_sampling(iters, data_path, K, p, q, n_rows=None, debug=False):
    """
    data_path: path where data is saved.
    K: number of plants (n_plants in load_data function).
    p: past time to be considered.
    q: sample distribution for parameters.
    """

    print('Loading data...')
    Y0, X = load_data(data_path, K, p, resample_rule='10T', n_rows=n_rows)
    if debug:
        print('Y0 shape: {}'.format(Y0.shape))
        print('X shape: {}'.format(X.shape))
    
    # Theta is the vector of all parameters that will be sampled.
    # A and CovU are reshaped un a 1-D vector theta.
    theta = init_parameters(K, p, q, Y0, X, debug)
    if debug:
        print('Parameters intialized!')
    samples = []
    for i in range(iters):
        print('Iteration {}'.format(i))
        
        # Loop over all parameters and calculate the logLK of the parameters
        # given all others.
        for j in range(theta.shape[0]):
            while True:
                theta[j] = q.rvs()
                lk = val_loglhood(theta, Y0, X, debug)
                if lk != -np.inf:
                    break
        A    = np.reshape(theta[:p*K**2],(K*p,K)).swapaxes(0,1)
        CovU = np.reshape(theta[p*K**2:],(K,K)).swapaxes(0,1)
        CovU = np.dot(CovU.T,CovU)
        samples.append([A, CovU])
    print('Finished!')
    return samples
        
    
def init_parameters(K, p, q, Y0, X, debug=False):
    """
    Initialization of parameters. This functions search a matrix A
    and a matrix CovU that satisfy some conditions that A and CovU
    must satisfy.
    """
    if debug:
        print('Initializing parameters...')
    while True:
        theta = np.zeros(K ** 2 * (p + 1))
        for i in range(theta.shape[0]):
            theta[i] = q.rvs()

        # Force CovU to be positive semidefinite.
        covu = np.reshape(theta[-K**2:], (K, K)).T
        covu = np.dot(covu.T, covu)
        theta[-K**2:] = np.reshape(covu, K**2)
        
        lk = val_loglhood(theta, Y0, X, debug)
        if debug:
            print('LK = {}'.format(lk))
        if lk != -np.inf:
            return theta
        

# Test

In [None]:
DATA_PATH = '/home/chrisams/Documents/datasets/data_TAIM/processed/'
q = stats.norm(loc=0, scale=1)
K = 3
p = 1
iters = 2
debug = True
n_rows = 10000 # Number of rows of the data to load

In [None]:
samples = gibbs_sampling(iters, DATA_PATH, K, p, q, n_rows=n_rows, debug=debug)

In [None]:
samples