In [8]:
import os
import pathlib
import tables as pyt 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.cm as cm
import scipy.io as sio

from numpy import inf
from matplotlib.ticker import FormatStrFormatter
from sklearn import decomposition 
from module_tools import get_file_paths

%matplotlib inline  
#%matplotlib widget

### Inputs & Paths

In [9]:
# Simulation data and target trial folders
all_trials_data_path = os.path.join('/', 'Users', 'camerongallivan', 'Research_Data', 'Simulation_Data_Py', '')
trial_folders = ['Trial_0004-py']

# Path to save the hdf5 file to
output_path = os.path.join('outputs', 'simulation_outputs')
hdf5_outputpath = os.path.join(output_path, 'simdata_3_models.h5')

pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)

### Data Manipulation Functions

In [10]:
def calc_prob2d(probvec, dimensions, dimensions_to_reduce=(2,3)):
    prob_full_d = probvec.reshape(dimensions, order='F')  # Collapses probability along all system dimensions
    prob_2d = np.sum(prob_full_d, axis=dimensions_to_reduce)  # Reduces probability to two dimensions
    return prob_2d

### Creating HDF5 Database

In [11]:
# Creating hdf5 file
h5file = pyt.open_file(hdf5_outputpath, title='Simulation Trial Data', mode='w')

In [12]:
for trial in trial_folders:
    # Initializing trial group
    current_group = h5file.create_group('/', trial)
    
    # Assigning Paths
    trial_path = os.path.join(all_trials_data_path, trial)
    trial_paths, parameters_df, simulation_files = get_file_paths.generate_trial_paths(trial_path)
    total_sets = parameters_df.index[-1]
    prob_vec_path = trial_paths['probvec']
    rate_matrix_path = trial_paths['ratematrix']
    system_probvec_entropies_path = trial_paths['system_entropies']
    model_name_path = os.path.join(trial_path, 'model_name.txt')
    
    # Loading and saving parameters, model name, dimensions, entropy and prob2D
    parameters_df.to_hdf(hdf5_outputpath, '/'+trial+'/paramValues')
    current_group._v_title = 'Parameter Values saved with pd.to_hdf'
    
    with open(model_name_path) as file:
        model_name = file.read()
        current_group._v_attrs.model_name = model_name

    dimensions = sio.loadmat(rate_matrix_path + simulation_files[0])['Dimensions'][0]
    current_group._v_attrs.dimensions = dimensions
    
    system_probvec_entropies = np.nan_to_num(np.loadtxt(system_probvec_entropies_path)[:,1])
    h5file.create_array(current_group, 'system_probvec_entropies', system_probvec_entropies, "Shannon's Entropy of ProbVec")
    
    phenotype_count = dimensions[0:2].prod()
    prob_2d_vector_array = np.empty((total_sets, phenotype_count))
    for i, input_file in enumerate(simulation_files):
        prob_vec = sio.loadmat(prob_vec_path + input_file)['ProbVec']
        prob_2D = calc_prob2d(prob_vec, dimensions)
        prob_2D_vector = prob_2D.reshape(phenotype_count)
        prob_2d_vector_array[i] = prob_2D_vector.T
    h5file.create_array(current_group, 'prob_2d_vector_array', prob_2d_vector_array, "Prob2D Vectorized Array")
    
    prob_2d_vectors = np.abs(prob_2d_vector_array)
    system_prob2d_entropies = np.real(-np.sum(np.multiply(prob_2d_vectors, np.log(prob_2d_vectors)), axis=1))
    h5file.create_array(current_group, 'system_prob2d_entropies', system_prob2d_entropies, "Shannon's Entropy of Prob2D")

In [13]:
h5file.close()

### Checking HDF5 file contents

In [14]:
h5file = pyt.open_file(hdf5_outputpath, mode='a')

In [15]:
for trial in trial_folders:
    print(h5file.root[trial]._v_attrs.model_name)
    print(h5file.root[trial]._v_attrs.dimensions)
    sys_ent = np.asarray(h5file.root[trial].system_probvec_entropies)
    display(sys_ent)
    prob_2d_vector_array_tmp = np.asarray(h5file.root[trial].prob_2d_vector_array)
    print(prob_2d_vector_array_tmp.shape)
    print()

MISAEx_N20
[21 21  3  3]


array([0.11549412, 0.11356327, 0.11349749, ..., 0.54894044, 0.27122737,
       0.93615986])

(1296, 441)

MISAInc_N20
[21 21  4  4]


array([0.11549438, 0.11356328, 0.11349749, ..., 0.54900639, 0.27087202,
       0.93608807])

(1296, 441)

MISAEx_Act_N20
[21 21  3  3]


array([4.55034259, 5.14851221, 5.09010057, ..., 3.33513239, 3.32482612,
       4.18954842])

(1296, 441)



In [16]:
for group in h5file.walk_groups():
    print(group)
    for val in group.__iter__():
        print(val)
    print()
print(h5file)

/ (RootGroup) 'Simulation Trial Data'
/Trial_0020 (Group) 'Parameter Values saved with pd.to_hdf'
/Trial_0021 (Group) 'Parameter Values saved with pd.to_hdf'
/Trial_0022 (Group) 'Parameter Values saved with pd.to_hdf'

/Trial_0020 (Group) 'Parameter Values saved with pd.to_hdf'
/Trial_0020/paramValues (Group) ''
/Trial_0020/prob_2d_vector_array (Array(1296, 441)) 'Prob2D Vectorized Array'
/Trial_0020/system_prob2d_entropies (Array(1296,)) "Shannon's Entropy of Prob2D"
/Trial_0020/system_probvec_entropies (Array(1296,)) "Shannon's Entropy of ProbVec"

/Trial_0021 (Group) 'Parameter Values saved with pd.to_hdf'
/Trial_0021/paramValues (Group) ''
/Trial_0021/prob_2d_vector_array (Array(1296, 441)) 'Prob2D Vectorized Array'
/Trial_0021/system_prob2d_entropies (Array(1296,)) "Shannon's Entropy of Prob2D"
/Trial_0021/system_probvec_entropies (Array(1296,)) "Shannon's Entropy of ProbVec"

/Trial_0022 (Group) 'Parameter Values saved with pd.to_hdf'
/Trial_0022/paramValues (Group) ''
/Trial_002

In [17]:
h5file.close()