In [1]:
import pandas as pd
import numpy as np
import os
import scipy.stats as stats
from utils import load_data, val_loglhood, loglhood

# Gibbs Sampling

In [13]:
def gibbs_sampling(iters, data_path, K, p, q, n_rows=None, debug=False):
    """
    data_path: path where data is saved.
    K: number of plants (n_plants in load_data function).
    p: time interval to consider.
    q: sample distribution for parameters.
    """
    if debug:
        print('Loading data...')
    Y0, X = load_data(data_path, K, p, resample_rule='10T', n_rows=n_rows)
    if debug:
        print('Y0 shape: {}'.format(Y0.shape))
        print('X shape: {}'.format(X.shape))
    theta = init_parameters(K, p, q, Y0, X, debug)
    if debug:
        print('Parameters intialized!')
    samples = []
    for i in range(iters):
        if debug:
            print('Iteration {}'.format(i))
        for j in range(theta.shape[0]):
            while True:
                theta[j] = q.rvs()
                lk = val_loglhood(theta, Y0, X, False)
                if lk != -np.inf:
                    break
        A    = np.reshape(theta[:p*K**2],(K*p,K)).swapaxes(0,1)
        CovU = np.reshape(theta[p*K**2:],(K,K)).swapaxes(0,1)
        samples.append([A, CovU])
    print('Finished!')
    return samples
        
    
def init_parameters(K, p, q, Y0, X, debug=False):
    if debug:
        print('Initializing parameters...')
    while True:
        theta = np.zeros(K ** 2 * (p + 1))
        for i in range(theta.shape[0]):
            theta[i] = q.rvs()

        # Force CovU to be positive semidefinite.
        covu = np.reshape(theta[-K**2:], (K, K)).T
        covu = np.dot(covu.T, covu)
        theta[-K**2:] = np.reshape(covu, K**2)
        
        lk = val_loglhood(theta, Y0, X, False)
        print('LK = {}'.format(lk))
        if lk != -np.inf:
            return theta
        

In [14]:
DATA_PATH = '/home/chrisams/Documents/datasets/data_TAIM/processed/'
q = stats.norm(loc=0, scale=1)
K = 3
p = 1
iters = 2
debug = True
n_rows = 10000 # Number of rows of the data to load

In [15]:
samples = gibbs_sampling(iters, DATA_PATH, K, p, q, n_rows=n_rows, debug=debug)

Loading data...
File "plant_1" loaded!
Original shape: (398284, 1)
nans: 0
Resample data shape: (398660,)
Final shape: (10000,)
File "plant_3" loaded!
Original shape: (398289, 1)
nans: 0
Resample data shape: (398660,)
Final shape: (10000,)
File "plant_6" loaded!
Original shape: (398382, 1)
nans: 0
Resample data shape: (398660,)
Final shape: (10000,)
Y0 shape: (3, 9999)
X shape: (3, 9999)
Initializing parameters...
LK = -inf
LK = -inf
LK = -inf
LK = -inf
LK = -inf
LK = -inf
LK = -257371.3736437603
Parameters intialized!
Iteration 0
Iteration 1
Finished!


In [9]:
samples

array([[ 1.56149264, -0.63873975,  1.03785602,  1.52149532,  0.55808924,
        -0.61124928, -0.54369528,  0.6056211 , -0.94443797,  1.61410949,
        -1.28113447, -0.70382613,  0.46911877,  1.09661939,  0.11317673,
        -0.85330189,  0.06409345,  1.04866525],
       [ 1.56149264, -0.63873975,  1.03785602,  1.52149532,  0.55808924,
        -0.61124928, -0.54369528,  0.6056211 , -0.94443797,  1.61410949,
        -1.28113447, -0.70382613,  0.46911877,  1.09661939,  0.11317673,
        -0.85330189,  0.06409345,  1.04866525]])