In [11]:
import sys
sys.path.append('/Users/leah/Columbia/courses/19summer/microbialdynamics')

In [12]:
import os
import pickle
import numpy as np
from scipy.special import logsumexp

import matplotlib.pyplot as plt
import seaborn as sns

import scipy

from src.utils.data_loader import load_data
from src.utils.data_interpolation import interpolate_data
from src.utils.available_data import DATA_DIR_DICT, PERCENTAGE_DATA_DICT, COUNT_DATA_DICT

In [41]:
# x is log additive ratio
def x_to_p(x):
    # x shape (T, Dx)
    # return p shape (T, Dx+1)
    T, Dx = x.shape
   
    x = np.concatenate((x, np.zeros((T, 1))), axis=-1)  # (T, Dx + 1)
    p = np.exp(x - logsumexp(x, axis=-1, keepdims=True))  # (T, Dx + 1) 
    
    assert p.shape == (T, Dx+1)
    return p

In [42]:
def compute_mse(yhat, ytrue):
    """
    yhat: (T, Dy)
    ytrue: (T, Dy)
    return: mse: ()
            var: (Dy, )
    """
    
    time = yhat.shape[0]
    
    mse = np.sum((yhat - ytrue)**2)  
    
    mean = np.mean(ytrue, axis=0)  # (Dy,)
    var = np.sum((ytrue - mean)**2, axis=0)  # (Dy, )
    return time, mse, mean, var    
    

In [53]:
def compute_Rsq(mse_and_stuff, Dy):
    combined_MSE = np.zeros((1, ))             # combined MSE_ks across all batches
    combined_y_means = np.zeros((Dy, ))        # combined y_means across all batches
    combined_y_vars = np.zeros((Dy, ))         # combined y_vars across all batches
    
    i = 0
    for time, mse, mean, var in mse_and_stuff:
        combined_MSE +=  mse
        
        n1 = time * i
        n2 = time
        
        combined_y_means_new = (n1 * combined_y_means + n2 * mean) / (n1 + n2)
        
        combined_y_vars = combined_y_vars + var + n1 * (combined_y_means - combined_y_means_new)**2 + n2 * (mean - combined_y_means_new)**2

        combined_y_means = combined_y_means_new
        
        i += 1
    
    combined_y_vars = np.sum(combined_y_vars, axis=0)
    R_square = 1 - combined_MSE / combined_y_vars

    return R_square        
    

In [54]:
def compute_0step_Rsq(datatype, Dx):
    # load hidden states
    general_data_dir = "/Users/leah/Columbia/courses/19summer/microbialdynamics/"

    data_dir = DATA_DIR_DICT[datatype]
    data_dir = os.path.join(general_data_dir, data_dir)
    Dy = Dx + 1

    hidden_train, hidden_test, obs_train, obs_test, input_train, input_test, extra_inputs_train, extra_inputs_test = load_data(data_dir, Dx, False, training_sample_idx=None)
    
    ytrue = [obs[:,1:] for obs in obs_train]
    if datatype in COUNT_DATA_DICT:
        for i, y in enumerate(ytrue):
            # (T, Dy)
            ytrue[i] = y / np.sum(y, axis=-1, keepdims=True)
    
    # percentage Rsq
    percentages = list(map(x_to_p, hidden_train))
    mse_and_stuff = list(map(compute_mse, percentages, ytrue))    
    percentage_Rsq = compute_Rsq(mse_and_stuff, Dy)
    
    # log percentage Rsq
    log_percentages = [np.log(p + 1e-6)/(1+Dy*1e-6) for p in percentages]
    log_ytrue = [np.log(y + 1e-6) / (1+Dy*1e-6) for y in ytrue]
    logp_mse_and_stuff = list(map(compute_mse, log_percentages, log_ytrue))    
    logp_Rsq = compute_Rsq(logp_mse_and_stuff, Dy)
    
    # aitchison distance Rsq
    a_hat = [log_p - np.mean(log_p, axis=-1, keepdims=True) for log_p in log_percentages]
    a_true = [log_y - np.mean(log_y, axis=-1, keepdims=True) for log_y in log_ytrue]
    a_mse_and_stuff = list(map(compute_mse, a_hat, a_true))
    a_Rsq = compute_Rsq(a_mse_and_stuff, Dy)
    
    return percentage_Rsq, logp_Rsq, a_Rsq
    

In [55]:
# test
for Dx in range(1, 11):
    for scale in [1, 4]:
        datadir = "clv_percentage_Dx_{}_scale_{}".format(Dx, scale)
        print(datadir)
        print(compute_0step_Rsq(datadir, Dx))

clv_percentage_Dx_1_scale_1
(array([0.99983612]), array([0.99875555]), array([0.99917602]))
clv_percentage_Dx_1_scale_4
(array([0.99988045]), array([0.99914898]), array([0.99945849]))
clv_percentage_Dx_2_scale_1
(array([0.99987877]), array([0.99801643]), array([0.99844716]))
clv_percentage_Dx_2_scale_4
(array([0.99989443]), array([0.99802962]), array([0.99846378]))
clv_percentage_Dx_3_scale_1
(array([0.99982129]), array([0.99795202]), array([0.99822008]))
clv_percentage_Dx_3_scale_4
(array([0.99984437]), array([0.99729484]), array([0.99764174]))
clv_percentage_Dx_4_scale_1
(array([0.99977171]), array([0.99742618]), array([0.99766459]))
clv_percentage_Dx_4_scale_4
(array([0.99980869]), array([0.99736649]), array([0.99754418]))
clv_percentage_Dx_5_scale_1
(array([0.99981361]), array([0.99671276]), array([0.9968332]))
clv_percentage_Dx_5_scale_4
(array([0.99984744]), array([0.99546355]), array([0.99534022]))
clv_percentage_Dx_6_scale_1
(array([0.99975625]), array([0.99646927]), array([0.9

In [56]:
# test
for Dx in range(1, 11):
    for scale in [1, 4]:
        datadir = "clv_count_Dx_{}_scale_{}".format(Dx, scale)
        print(datadir)
        print(compute_0step_Rsq(datadir, Dx))

clv_count_Dx_1_scale_1
(array([0.99983612]), array([0.99875555]), array([0.99917602]))
clv_count_Dx_1_scale_4
(array([0.99988045]), array([0.99914898]), array([0.99945849]))
clv_count_Dx_2_scale_1
(array([0.99987877]), array([0.99801643]), array([0.99844716]))
clv_count_Dx_2_scale_4
(array([0.99989443]), array([0.99802962]), array([0.99846378]))
clv_count_Dx_3_scale_1
(array([0.99982129]), array([0.99795202]), array([0.99822008]))
clv_count_Dx_3_scale_4
(array([0.99984437]), array([0.99729484]), array([0.99764174]))
clv_count_Dx_4_scale_1
(array([0.99977171]), array([0.99742618]), array([0.99766459]))
clv_count_Dx_4_scale_4
(array([0.99980869]), array([0.99736649]), array([0.99754418]))
clv_count_Dx_5_scale_1
(array([0.99981361]), array([0.99671276]), array([0.9968332]))
clv_count_Dx_5_scale_4
(array([0.99984744]), array([0.99546355]), array([0.99534022]))
clv_count_Dx_6_scale_1
(array([0.99975625]), array([0.99646927]), array([0.99650997]))
clv_count_Dx_6_scale_4
(array([0.99973919]),

In [58]:
import tensorflow as tf

In [59]:
tf.zeros((2,2,1))

<tf.Tensor 'zeros:0' shape=(2, 2, 1) dtype=float32>

In [63]:
t1 = [np.array([1,2]), np.array([2,3])]

In [64]:
t2 = []

In [65]:
for i in range(len(t1)):
    t2.append(t1[i])
    t1[i] = t1[i] + 1

In [66]:
t2

[array([1, 2]), array([2, 3])]

In [67]:
t1

[array([2, 3]), array([3, 4])]