In [1]:
import os
import pathlib
import tables as pyt 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.cm as cm
import scipy.io as sio

from numpy import inf
from matplotlib.ticker import FormatStrFormatter
from sklearn import decomposition 
from module_tools import get_file_paths
from module_tools import system_entropy # calc_system_entropy

%matplotlib inline  
#%matplotlib widget

### Inputs & Paths

In [2]:
# Simulation data and target trial folders
trial_path = os.path.join('/', 'Users', 'camerongallivan', 'Research_Data', 'Simulation_Data_Py', 'Trial_0001-py')

# Path to save the hdf5 file to
output_path = os.path.join(trial_path, 'Analysis', 'PCA')
hdf5_outputpath = os.path.join(output_path, 'simdata.h5')

pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)

### Data Manipulation Functions

In [3]:
def calc_prob2d(probvec, dimensions, dimensions_to_reduce=(2,3)):
    prob_full_d = probvec.reshape(dimensions, order='F')  # Collapses probability along all system dimensions
    prob_2d = np.sum(prob_full_d, axis=dimensions_to_reduce)  # Reduces probability to two dimensions
    return prob_2d

### Creating HDF5 Database

In [4]:
# Creating hdf5 file
h5file = pyt.open_file(hdf5_outputpath, title='Simulation Trial Data', mode='w')

data_name = 'Simulation_Data'
results_group = h5file.create_group('/', data_name)

# Assigning Paths
trial_paths, parameters_df, simulation_files = get_file_paths.generate_trial_paths(trial_path)
total_sets = parameters_df.index[-1]
prob_vec_path = trial_paths['probvec']
rate_matrix_path = trial_paths['ratematrix']

# Loading and saving parameters, model name, dimensions, entropy and prob2D
parameters_df.to_hdf(hdf5_outputpath, '/'+data_name+'/paramValues')
results_group._v_title = 'Parameter Values saved with pd.to_hdf'

dimensions = sio.loadmat(rate_matrix_path + simulation_files[0])['Dimensions'][0]
results_group._v_attrs.dimensions = dimensions

phenotype_count = dimensions[0:2].prod()
microstate_count = dimensions.prod()

probvec_array = np.empty((total_sets, microstate_count))
prob_2d_vector_array = np.empty((total_sets, phenotype_count))

for i, input_file in enumerate(simulation_files):
    prob_vec = sio.loadmat(prob_vec_path + input_file)['ProbVec']
    probvec_array[i] = prob_vec[0]    
    prob_2d = calc_prob2d(prob_vec, dimensions)
    prob_2d_vector = prob_2d.reshape(phenotype_count)
    prob_2d_vector_array[i] = prob_2d_vector

system_probvec_entropies = np.real(-np.sum(np.multiply(probvec_array, np.log(probvec_array)), axis=1))
system_probvec_entropies = np.nan_to_num(system_probvec_entropies)

prob_2d_vectors = np.abs(prob_2d_vector_array)
system_prob2d_entropies = np.real(-np.sum(np.multiply(prob_2d_vectors, np.log(prob_2d_vectors)), axis=1))

h5file.create_array(results_group, 'prob_2d_vector_array', prob_2d_vector_array, "Prob2D Vectorized Array")
h5file.create_array(results_group, 'system_probvec_entropies', system_probvec_entropies, "Shannon's Entropy of ProbVec")
h5file.create_array(results_group, 'system_prob2d_entropies', system_prob2d_entropies, "Shannon's Entropy of Prob2D")

h5file.close()



### Checking HDF5 file contents

In [5]:
h5file = pyt.open_file(hdf5_outputpath, mode='a')

In [6]:
print(h5file)

/Users/camerongallivan/Research_Data/Simulation_Data_Py/Trial_0001-py/Analysis/PCA/simdata.h5 (File) 'Simulation Trial Data'
Last modif.: 'Sat Mar  9 13:52:10 2019'
Object Tree: 
/ (RootGroup) 'Simulation Trial Data'
/Simulation_Data (Group) 'Parameter Values saved with pd.to_hdf'
/Simulation_Data/prob_2d_vector_array (Array(4096, 441)) 'Prob2D Vectorized Array'
/Simulation_Data/system_prob2d_entropies (Array(4096,)) "Shannon's Entropy of Prob2D"
/Simulation_Data/system_probvec_entropies (Array(4096,)) "Shannon's Entropy of ProbVec"
/Simulation_Data/paramValues (Group) ''
/Simulation_Data/paramValues/axis0 (Array(10,)) ''
/Simulation_Data/paramValues/axis1 (Array(4096,)) ''
/Simulation_Data/paramValues/block0_items (Array(8,)) ''
/Simulation_Data/paramValues/block0_values (Array(4096, 8)) ''
/Simulation_Data/paramValues/block1_items (Array(2,)) ''
/Simulation_Data/paramValues/block1_values (Array(4096, 2)) ''



In [7]:
for group in h5file.walk_groups():
    print(group)
    for val in group.__iter__():
        print(val)
    print()

/ (RootGroup) 'Simulation Trial Data'
/Simulation_Data (Group) 'Parameter Values saved with pd.to_hdf'

/Simulation_Data (Group) 'Parameter Values saved with pd.to_hdf'
/Simulation_Data/paramValues (Group) ''
/Simulation_Data/prob_2d_vector_array (Array(4096, 441)) 'Prob2D Vectorized Array'
/Simulation_Data/system_prob2d_entropies (Array(4096,)) "Shannon's Entropy of Prob2D"
/Simulation_Data/system_probvec_entropies (Array(4096,)) "Shannon's Entropy of ProbVec"

/Simulation_Data/paramValues (Group) ''
/Simulation_Data/paramValues/axis0 (Array(10,)) ''
/Simulation_Data/paramValues/axis1 (Array(4096,)) ''
/Simulation_Data/paramValues/block0_items (Array(8,)) ''
/Simulation_Data/paramValues/block0_values (Array(4096, 8)) ''
/Simulation_Data/paramValues/block1_items (Array(2,)) ''
/Simulation_Data/paramValues/block1_values (Array(4096, 2)) ''



In [8]:
h5file.close()