In [1]:
import h5py
import matplotlib.pyplot as plt
import numpy as np

from neurobiases import (TriangularModel,
                         EMSolver,
                         TCSolver,
                         solver_utils,
                         plot)
from sklearn.model_selection import check_cv

%matplotlib inline

In [2]:
files = ['em_oracle.h5', 'tc_oracle.h5']

In [3]:
results = [h5py.File(file, 'r') for file in files]

In [4]:
# Print out entries in H5 files
for file, result in zip(files, results):
    print(file)
    print(list(result))
    print('---')

em_oracle.h5
['B_est', 'B_true', 'L_est', 'L_true', 'Psi_est', 'Psi_true', 'a_est', 'a_true', 'b_est', 'b_true', 'bics', 'coupling_lambdas', 'coupling_locs', 'coupling_rngs', 'dataset_rngs', 'mlls', 'shape_key', 'tuning_lambdas', 'tuning_locs', 'tuning_rngs']
---
tc_oracle.h5
['B_true', 'L_true', 'Psi', 'a_est', 'a_true', 'b_est', 'b_true', 'bics', 'coupling_lambdas', 'coupling_locs', 'coupling_rngs', 'dataset_rngs', 'mses', 'shape_key', 'tuning_lambdas', 'tuning_locs', 'tuning_rngs']
---


In [5]:
# Print out shape keys
for file, result in zip(files, results):
    print(file)
    print(result['shape_key'][:])
    print('---')

em_oracle.h5
['tuning_loc' 'coupling_loc' 'model_idx' 'dataset_idx' 'split_idx']
---
tc_oracle.h5
['tuning_loc' 'coupling_loc' 'model_idx' 'dataset_idx' 'split_idx']
---


In [6]:
# Print out seeds
for file, result in zip(files, results):
    print(file)
    print(result.attrs['coupling_rng'])
    print(result.attrs['tuning_rng'])
    print(result.attrs['dataset_rng'])
    print(result.attrs['fitter_rng'])
    print('---')

em_oracle.h5
1234
2345
3456
4567
---
tc_oracle.h5
1234
2345
3456
4567
---


In [8]:
# Print out actual seeds for generating parameters/data
for file, result in zip(files, results):
    print(file)
    print(result['coupling_rngs'][:])
    print(result['tuning_rngs'][:])
    print(result['dataset_rngs'][:])
    print('---')

em_oracle.h5
[4206279609 4194893554]
[ 478333422 2172486199]
[3823197773 1990415756]
---
tc_oracle.h5
[4206279609 4194893554]
[ 478333422 2172486199]
[3823197773 1990415756]
---


In [9]:
# Print out actual seeds for generating parameters/data
for file, result in zip(files, results):
    print(file)
    print(result['shape_key'][:])
    print(result['a_true'].shape)
    print(result['b_true'].shape)
    print('---')

em_oracle.h5
['tuning_loc' 'coupling_loc' 'model_idx' 'dataset_idx' 'split_idx']
(2, 2, 2, 2, 2, 10)
(2, 2, 2, 2, 2, 10)
---
tc_oracle.h5
['tuning_loc' 'coupling_loc' 'model_idx' 'dataset_idx' 'split_idx']
(2, 2, 2, 2, 2, 10)
(2, 2, 2, 2, 2, 10)
---


In [10]:
# Print out actual seeds for generating parameters/data
for file, result in zip(files, results):
    print(file)
    tm = TriangularModel(
        model='linear',
        parameter_design='direct_response',
        M=result.attrs['M'],
        N=result.attrs['N'],
        K=result.attrs['K'],
        corr_cluster=result.attrs['corr_cluster'],
        corr_back=result.attrs['corr_back'],
        coupling_distribution=result.attrs['coupling_distribution'],
        coupling_sparsity=result.attrs['coupling_sparsity'],
        coupling_loc=result['coupling_locs'][1],
        coupling_scale=result.attrs['coupling_scale'],
        coupling_rng=result['coupling_rngs'][1],
        tuning_distribution=result.attrs['tuning_distribution'],
        tuning_loc=result['tuning_locs'][0],
        tuning_scale=result.attrs['tuning_scale'],
        tuning_sparsity=result.attrs['tuning_sparsity'],
        tuning_rng=result['tuning_rngs'][1]
    )
    print(tm.a.ravel())
    print(result['a_true'][0, 1, 1, 0, 1])
    print(tm.b.ravel())
    print(result['b_true'][0, 1, 1, 0, 1])
    print('---')


em_oracle.h5
[0.80838106 0.81751534 0.         0.33651402 0.96797874 1.10600455
 0.         0.         0.         0.        ]
[0.80838106 0.81751534 0.         0.33651402 0.96797874 1.10600455
 0.         0.         0.         0.        ]
[ 0.          0.          0.03748457  0.01899363 -0.46485895  0.28747416
 -0.11043689  0.          0.          0.        ]
[ 0.          0.          0.03748457  0.01899363 -0.46485895  0.28747416
 -0.11043689  0.          0.          0.        ]
---
tc_oracle.h5
[0.80838106 0.81751534 0.         0.33651402 0.96797874 1.10600455
 0.         0.         0.         0.        ]
[0.80838106 0.81751534 0.         0.33651402 0.96797874 1.10600455
 0.         0.         0.         0.        ]
[ 0.          0.          0.03748457  0.01899363 -0.46485895  0.28747416
 -0.11043689  0.          0.          0.        ]
[ 0.          0.          0.03748457  0.01899363 -0.46485895  0.28747416
 -0.11043689  0.          0.          0.        ]
---


In [None]:
# Print out actual seeds for generating parameters/data
for file, result in zip(files, results):
    print(file)
    tm = TriangularModel(
        model='linear',
        parameter_design='direct_response',
        M=result.attrs['M'],
        N=result.attrs['N'],
        K=result.attrs['K'],
        corr_cluster=result.attrs['corr_cluster'],
        corr_back=result.attrs['corr_back'],
        coupling_distribution=result.attrs['coupling_distribution'],
        coupling_sparsity=result.attrs['coupling_sparsity'],
        coupling_loc=result['coupling_locs'][1],
        coupling_scale=result.attrs['coupling_scale'],
        coupling_rng=result['coupling_rngs'][1],
        tuning_distribution=result.attrs['tuning_distribution'],
        tuning_loc=result['tuning_locs'][0],
        tuning_scale=result.attrs['tuning_scale'],
        tuning_sparsity=result.attrs['tuning_sparsity'],
        tuning_rng=result['tuning_rngs'][1]
    )
    X, Y, y = tm.generate_samples(n_samples=result.attrs['D'], rng=result['dataset_rngs'][0])
    cv = check_cv(2)

    train_idx, test_idx = list(cv.split(X))[1]
    X_train = X[train_idx]
    Y_train = Y[train_idx]
    y_train = y[train_idx]
    X_test = X[test_idx]
    Y_test = Y[test_idx]
    y_test = y[test_idx]

    if 'em' in file:
        solver = EMSolver(
            X=X_train,
            Y=Y_train,
            y=y_train,
            K=1,
            a_mask=result['a_true'][0, 1, 1, 0, 1] != 0,
            b_mask=result['b_true'][0, 1, 1, 0, 1] != 0,
            B_mask=result['B_true'][0, 1, 1, 0, 1] != 0,
            c_coupling=0,
            c_tuning=0,
            tol=result.attrs['tol'],
            max_iter=result.attrs['max_iter'],
            initialization='fits',
            solver='ow_lbfgs',
            rng=result.attrs['fitter_rng']).fit_em(refit=False)
        print(solver.a.ravel())
        print(result['a_est'][0, 1, 1, 0, 1])
        print(solver.b.ravel())
        print(result['b_est'][0, 1, 1, 0, 1])
    elif 'tc' in file:
        solver = TCSolver(
            X=X_train,
            Y=Y_train,
            y=y_train,
            a_mask=result['a_true'][0, 1, 1, 0, 1] != 0,
            b_mask=result['b_true'][0, 1, 1, 0, 1] != 0,
            tol=result.attrs['tol'],
            max_iter=result.attrs['max_iter'],
            initialization='zeros',
            solver='cd').fit_ols()
        print(solver.a.ravel())
        print(result['a_est'][0, 1, 1, 0, 1])
        print(solver.b.ravel())
        print(result['b_est'][0, 1, 1, 0, 1])


em_oracle.h5
