In [1]:
import pandas as pd
import numpy as np
import os
import time
import scipy.stats as stats
from utils import load_data, val_loglhood, loglhood, jump_dst, reconstruct_coefs

# Gibbs Sampling

In [2]:
def gibbs_sampling(iters, data_path, K, p, q, mh_iters=1,n_rows=None, debug=False, method='normal'):
    """
    data_path: path where data is saved.
    K: number of plants (n_plants in load_data function).
    p: past time to be considered.
    q: sample distribution for parameters.
    """
    print('Loading data...')
    Y0, X = load_data(data_path, K, p, resample_rule='10T', n_rows=n_rows)
    if debug:
        print('Y0 shape: {}'.format(Y0.shape))
        print('X shape: {}'.format(X.shape))
    # Theta is the vector of all parameters that will be sampled.
    # A and CovU are reshaped un a 1-D vector theta.
    theta = init_parameters(K, p, q, Y0, X, debug=debug, method=method)
    if debug:
        print('Parameters intialized!')
    samples = []
    for i in range(iters):
        start_it = time.time()
        print('Iteration {}'.format(i))
        # Loop over all parameters and for each parameter theta[j],
        # do a MH sampling over the distribution of theta[j] given theta[-j].
        for j in range(theta.shape[0]):
            start = time.time()
            mh_samples = metropolis_hastings(theta, j, q, mh_iters, Y0, X, K, debug, method=method)
            end = time.time()
            print('Time for sampling theta[{}]: {}'.format(j, end - start))
            theta[j] = np.random.choice(mh_samples)
        if p == 1 and method == 'personalized':
            A, CovU = reconstruct_coefs(theta, K)
        else:
            A    = np.reshape(theta[:p*K**2],(K*p,K)).swapaxes(0,1)
            CovU = np.reshape(theta[p*K**2:],(K,K)).swapaxes(0,1)
            CovU = np.dot(CovU.T,CovU)
        samples.append([A, CovU])
        end_it = time.time()
        print('Time for iteration {}: {}'.format(i, end_it - start_it))
    print('Finished!')
    return samples
        
    
def init_parameters(K, p, q, Y0, X, method='normal', debug=False):
    """
    Initialization of parameters. This functions search a matrix A
    and a matrix CovU that satisfy some conditions that A and CovU
    must satisfy.
    """
    if debug:
        print('Initializing parameters...')
    while True:
        theta = np.zeros(K ** 2 * (p + 1))
        for i in range(theta.shape[0]):
            theta[i] = q.rvs()

        # Force CovU to be positive semidefinite.
        covu = np.reshape(theta[-K**2:], (K, K)).T
        covu = np.dot(covu.T, covu)
        theta[-K**2:] = np.reshape(covu, K**2)
        
        lk = val_loglhood(theta, Y0, X, debug, method=method, init_params=True)
        if debug:
            print('LK = {}'.format(lk))
        if lk != -np.inf:
            print('lk init: {}'.format(lk))
            if p == 1 and method == 'personalized':
                A = np.reshape(theta[:p*K**2],(K*p,K)).swapaxes(0,1)
                print(A)
                print(covu)
                eig_valuesA, eig_vecA = np.linalg.eig(A)
                eig_valuesB, eig_vecB = np.linalg.eig(covu)
                theta = np.concatenate((eig_vecA.reshape(-1), eig_vecB.reshape(-1),
                                        eig_valuesA, eig_valuesB))
                print(theta)
                if np.all(np.isreal(eig_valuesA)):
                    break
            else:
                break
    return theta
        

# Metropolis Hastings

In [3]:
# Metropolis Hastings

def metropolis_hastings(theta, j, q, iters, Y0, X, K, debug, method='normal'):
    """
    theta: theta vector with all parameters.
    j: theta index of the parameter currently been sampled.
    q: jumping distribution.
    """
    user_std = 1
    samples_mh = [theta[j]] # start sample.
    lk_old = val_loglhood(theta, Y0, X, debug, method=method)
    print('init lk: {}'.format(lk_old))
    for t in range(iters):
        lk_new = -np.inf
        c = -1
        while lk_new == -np.inf:
            c += 1
            if method == 'normal':
                x_new = q.rvs(loc=samples_mh[-1], scale=1)
                theta[j] = x_new
            elif method == 'personalized':
                theta, q_eval_new, q_eval_old = jump_dst(theta, j, user_std, K)
            lk_new = val_loglhood(theta, Y0, X, debug, method=method)
            # print('new_lk: {}'.format(lk_new))
        #print('Quantity of -np.infs: {}'.format(c))
        if method == 'normal':
            logalpha = min([lk_new - lk_old + np.log(q.pdf(samples_mh[-1], loc=x_new) \
                                                     / q.pdf(x_new, loc=samples_mh[-1])), 0])
        elif method == 'personalized':
            logalpha = min([lk_new - lk_old + np.log(q_eval_old / q_eval_new), 0])
        alpha = np.exp(logalpha)
        u = stats.uniform.rvs()
        if u < alpha:
            #print('acepted')
            samples_mh.append(theta[j])
            lk_old = lk_new
        else:
            #print('rejected')
            samples_mh.append(samples_mh[-1])
            theta[j] = samples_mh[-1]
    return np.array(samples_mh)

# Test

In [4]:
DATA_PATH = '/home/chrisams/Documents/datasets/data_TAIM/processed/'
q = stats.norm
K = 3
p = 1
iters = 2
debug = False
mh_iters = 10
n_rows = 10000 # Number of rows of the data to load
method = 'personalized'

In [5]:
samples = gibbs_sampling(iters, DATA_PATH, K, p, q, mh_iters=mh_iters, n_rows=n_rows, debug=debug, method=method)

Loading data...
lk init: -591159514.4846909
[[ 0.30026513  0.39074505  0.13886574]
 [ 1.36693547 -0.79227451  1.0587391 ]
 [-0.22176719 -0.33006159 -0.16244046]]
[[ 0.26155615 -0.68579614  0.25476496]
 [-0.68579614  3.08059612 -0.08844742]
 [ 0.25476496 -0.08844742  0.52334906]]
[-7.54527810e-01 -3.19290168e-01 -5.04691976e-01 -3.98443365e-01
  9.00585468e-01  2.70788841e-01  5.21469720e-01  2.94956951e-01
  8.19731305e-01 -2.27914912e-01  8.94621265e-01  3.84327444e-01
  9.72242573e-01  1.87651778e-01  1.39754030e-01 -5.29071993e-02
 -4.05511531e-01  9.12557520e-01  4.10632828e-01 -9.30147710e-01
 -1.34934949e-01  3.24617483e+00  2.22742182e-03  6.17099080e-01]
Iteration 0
init lk: -1312579.3533358953
Time for sampling theta[0]: 6.108119964599609
init lk: -571736.7083982422
Time for sampling theta[1]: 6.0409159660339355
init lk: -571736.7083982422
Time for sampling theta[2]: 6.058082342147827
init lk: -205752.80885278282
Time for sampling theta[3]: 6.10756254196167
init lk: -199227.81

KeyboardInterrupt: 

tiempo normal: 108.39511632919312

tiempo personalizado :

In [None]:
samples

In [None]:
DATA_PATH = '/home/chrisams/Documents/datasets/data_TAIM/processed/'
K = 3
theta_old = np.ones(K*K*2+K*2)
j = 0
user_std = 1
n_rows = 10000
debug = False

In [None]:
Y0, X = load_data(DATA_PATH, K, 1, resample_rule='10T', n_rows=n_rows)
theta_new, q_eval_new, q_eval_old = jump_dst(theta_old, j, user_std, K)
lk_new = val_loglhood(theta_new, Y0, X, debug)

In [None]:
print(Y0.shape)
print(X.shape)

In [None]:
a.pdf(1, loc=2, scale=2)

In [None]:
b.pdf(1, loc=2, scale=2)

In [None]:
A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(A)
U = np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]])
print(U)

In [None]:
Av = A.T.reshape(-1)
print(Av)
Uv = U.T.reshape(-1)
print(Uv)
theta = np.concatenate([Av, Uv])
print(theta)

In [None]:
Kv = 3
pv = 1
A = np.reshape(theta[:pv*Kv**2],(Kv*pv,Kv)).swapaxes(0,1)
CovU = np.reshape(theta[pv*Kv**2:],(Kv,Kv)).swapaxes(0,1)

In [None]:
print(A)
print(CovU)

In [None]:
K = 3
a = np.array([[1, 2, 3], [2, 1, 4], [3, 4, 5]])
b = np.array([[1, 2, 16], [2, 1, 4], [16, 4, 5]])
print(a)
print(b)

In [None]:
eig_valuesA, eig_vecA = np.linalg.eig(a)
eig_valuesB, eig_vecB = np.linalg.eig(b)
theta = np.concatenate((eig_vecA.reshape(-1),eig_vecB.reshape(-1),
                        eig_valuesA,eig_valuesB))

In [None]:
samp_vecA = np.reshape(theta[:(K*K)],(K,K))
samp_vecU = np.reshape(theta[(K*K):(K*K*2)],(K,K))
samp_valA = np.diag(theta[(K*K*2):(K*K*2+K)])
samp_valU = np.diag(theta[(K*K*2+K):(K*K*2+K*2)])

A = samp_vecA @ samp_valA @np.linalg.inv(samp_vecA)
U = samp_vecU @ samp_valU @np.linalg.inv(samp_vecU)

In [None]:
print(A)
print(U)