In [1]:
import pickle
import numpy as np
import pandas as pd

In [52]:
n_taxa = 11

In [53]:
with open("oral_{}_taxa.p".format(n_taxa), "rb") as f:
    data = pickle.load(f)

In [54]:
data.keys()

dict_keys(['theta', 'Ytrain', 'Ytest', 'Vtrain', 'Vtest'])

In [55]:
data['theta'].shape

(10, 11)

In [56]:
ytable = data['Ytrain']

In [57]:
id_list = [np.ones((len(d), 1))*idx for idx, d in enumerate(ytable)]

In [58]:
id_array = np.concatenate(id_list, axis=0)

In [59]:
id_array.shape

(635, 1)

In [60]:
out = np.concatenate(ytable, axis=0)

In [61]:
out.shape

(635, 12)

In [62]:
out = np.concatenate([id_array, out], axis=1)

In [63]:
out.shape

(635, 13)

In [64]:
df = pd.DataFrame(out.T)

In [65]:
df.shape

(13, 635)

In [66]:
n_otus = ytable[0].shape[1] - 1

In [67]:
new_index = ["id", "time"] + ["OTU {}".format(i) for i in range(n_otus)]

In [68]:
new_index

['id',
 'time',
 'OTU 0',
 'OTU 1',
 'OTU 2',
 'OTU 3',
 'OTU 4',
 'OTU 5',
 'OTU 6',
 'OTU 7',
 'OTU 8',
 'OTU 9',
 'OTU 10']

In [69]:
len(new_index)

13

In [70]:
df.shape

(13, 635)

In [71]:
df.index = new_index

In [72]:
df.to_csv("oral_{}_otu_table.csv".format(n_taxa), header=False)

# compute basis

In [73]:
theta = data['theta']

In [74]:
def get_n_plus_and_n_minus(theta, w):
    Dm1, D = theta.shape
    n_plus = np.empty(Dm1, dtype=np.float)
    n_minus = np.empty(Dm1, dtype=np.float)
    for i in range(Dm1):
        n_plus[i] = np.sum(w[theta[i]==1])
        n_minus[i] = np.sum(w[theta[i]==-1])
    return n_plus, n_minus

In [75]:
def get_psi(theta, n_plus, n_minus):
    m, n = theta.shape
    psi = np.zeros_like(theta, dtype=np.float)
    for i in range(m):
        for j in range(n):
            if theta[i, j] == 1:
                psi[i, j] = 1/n_plus[i] * np.sqrt(n_plus[i] * n_minus[i] / (n_plus[i] + n_minus[i]))
            elif theta[i, j] == -1:
                psi[i, j] = -1/n_minus[i] * np.sqrt(n_plus[i] * n_minus[i] / (n_plus[i] + n_minus[i]))
    return psi

In [76]:
theta.shape

(10, 11)

In [77]:
w = np.ones(theta.shape[1])

In [78]:
n_plus, n_minus = get_n_plus_and_n_minus(theta, w)

In [79]:
psi = get_psi(theta, n_plus, n_minus)

In [80]:
psi.shape

(10, 11)

In [82]:
np.savetxt("oral_otu_{}_basis.csv".format(n_taxa), psi.T, delimiter=",")