In [1]:
import pandas as pd
import numpy as np
import h5py
from pprint import pprint

In [2]:
with h5py.File("recidivism_gf.h5", 'r') as hdf:
    base_items = list(hdf.keys())
    print("root:")
    print(base_items)
    
    meta = hdf.get('meta').get('table')
    print("\nroot > meta:")
    print(meta)
    
    method_parameters = hdf.get('method_parameters')
    method_parameters_items = list(method_parameters.keys())
    print("\nroot > method_parameters:")
    print(method_parameters_items)
    
    task_parameters = hdf.get('task_parameters')
    task_parameters_items = list(task_parameters.keys())
    print("\nroot > task_parameters:")
    print(task_parameters_items)  
    
    task_parameters_i_table = task_parameters.get('_i_table')
    task_parameters_i_table_items = list(task_parameters_i_table.keys())
    print("\nroot > task_parameters > _i_table:")
    print(task_parameters_i_table_items)     
    
    task_parameters_table = task_parameters.get('table')
    print("\nroot > task_parameters > table:")
    print(type(task_parameters_table))

root:
['meta', 'method_parameters', 'results', 'task_parameters']

root > meta:
<HDF5 dataset "table": shape (1,), type "|V24">

root > method_parameters:
['GroupFairnessSRL', 'Naive', 'OffsetTree']

root > task_parameters:
['_i_table', 'table']

root > task_parameters > _i_table:
['index']

root > task_parameters > table:
<class 'h5py._hl.dataset.Dataset'>


In [9]:
from astropy.table import Table
import tables
filehdf5 = tables.open_file('recidivism_gf_50trials.h5')

columns0 = Table(filehdf5.root.results.block0_items.read()).to_pandas()
tab0 = Table(filehdf5.root.results.block0_values.read()).to_pandas()
tab0.columns = columns0.values.astype(str).tolist()[0]

columns1 = Table(filehdf5.root.results.block1_items.read()).to_pandas()
tab1 = Table(filehdf5.root.results.block1_values.read()).to_pandas()
tab1.columns = columns1.values.astype(str).tolist()[0]

columns2 = Table(filehdf5.root.results.block2_items.read()).to_pandas()
tab2 = Table(filehdf5.root.results.block2_values.read())
tab2 = pd.DataFrame(tab2['col0'].data)
tab2.columns = columns2.values.astype(str).tolist()[0]

columns3 = Table(filehdf5.root.results.block3_items.read()).to_pandas()
tab3 = Table(filehdf5.root.results.block3_values.read()).to_pandas()
tab3.columns = columns3.values.astype(str).tolist()[0]
pd.options.display.max_columns = None
pd.options.display.max_rows = None
temp = pd.concat([tab0, tab1, tab2, tab3], axis=1)
temp[temp.name == 'GroupFairnessSRL']

Unnamed: 0,is_seldonian,accept,predicted_accept,candidate_bqf_0_mean,candidate_co_0_mean,return_candidate,safety_bqf_0_mean,safety_co_0_mean,return_safety,train_bqf_0_mean,train_co_0_mean,return_train,test_bqf_0_mean,test_co_0_mean,return_test,co_0_safety_thresh,co_0_psafety_thresh,train_time,name,tid,pid,seed
0,True,False,False,0.0,-0.1,0.720415,0.0,-0.1,0.276032,0.0,-0.1,0.446949,0.0,-0.1,0.38126,1.4,1.4,0.477909,GroupFairnessSRL,0,0,39513308
5,True,False,False,0.0,-0.1,0.455356,0.0,-0.1,0.570435,0.0,-0.1,0.524403,0.0,-0.1,0.423208,0.275,0.65,0.802815,GroupFairnessSRL,3,0,39513308
8,True,True,False,0.0,-0.1,0.329365,0.0,-0.1,0.576717,0.0,-0.1,0.478541,0.0,-0.1,0.471211,-0.03023256,0.005769,1.644543,GroupFairnessSRL,6,0,39513308
18,True,False,False,0.0,-0.1,0.679784,0.0,-0.1,0.496983,0.0,-0.1,0.561501,0.0,-0.1,0.473468,0.9,1.4,0.456046,GroupFairnessSRL,1,0,39513308
19,True,False,False,0.555556,0.455556,0.698754,0.183333,0.08333333,0.066646,0.047619,-0.052381,0.314974,0.069444,-0.030556,0.32783,0.8859287,1.291108,0.803272,GroupFairnessSRL,2,0,39513308
21,True,False,False,0.0,-0.1,0.32963,0.0,-0.1,0.35464,0.0,-0.1,0.344838,0.0,-0.1,0.552979,0.05789474,0.314286,1.167617,GroupFairnessSRL,4,0,39513308
30,True,False,False,0.0,-0.1,0.475171,0.0,-0.1,0.422597,0.0,-0.1,0.443627,0.0,-0.1,0.517717,0.007142857,0.114286,1.417835,GroupFairnessSRL,5,0,39513308
31,True,True,True,0.012346,-0.087654,0.78421,0.038095,-0.06190476,0.77665,0.026882,-0.073118,0.779674,0.019305,-0.080695,0.689514,-0.01900908,-0.033333,97.705735,GroupFairnessSRL,7,0,39513308
32,True,True,True,0.0,-0.1,0.780602,0.0,-0.1,0.601101,0.0,-0.1,0.672621,0.0,-0.1,0.76322,-0.08,-0.061788,125.797294,GroupFairnessSRL,8,0,39513308
36,True,True,True,0.0,-0.1,0.758567,0.0,-0.1,0.737699,0.0,-0.1,0.746026,0.0,-0.1,0.770705,-0.08672566,-0.078029,624.330545,GroupFairnessSRL,9,0,39513308


In [4]:
task_table = Table(filehdf5.root.task_parameters.table.read())
block0 = pd.DataFrame(task_table['values_block_0'].data)
block1 = pd.DataFrame(task_table['values_block_1'].data)
block2 = pd.DataFrame(task_table['values_block_2'].data)
block3 = pd.DataFrame(task_table['values_block_3'].data)
pd.concat([block0, block1, block2, block3], axis=1)

Unnamed: 0,0,1,2,3,4,0.1,0.2,1.1,2.1,0.3,1.2,2.2,3.1
0,-1.0,1.0,0.4,0.01,0.4,4,False,False,True,b'recidivism_gf',b'violent',b'Caucasian',b'African-American'
1,-1.0,1.0,0.4,0.012743,0.4,4,False,False,True,b'recidivism_gf',b'violent',b'Caucasian',b'African-American'
2,-1.0,1.0,0.4,0.020691,0.4,4,False,False,True,b'recidivism_gf',b'violent',b'Caucasian',b'African-American'
3,-1.0,1.0,0.4,0.033598,0.4,4,False,False,True,b'recidivism_gf',b'violent',b'Caucasian',b'African-American'
4,-1.0,1.0,0.4,0.054556,0.4,4,False,False,True,b'recidivism_gf',b'violent',b'Caucasian',b'African-American'
5,-1.0,1.0,0.4,0.088587,0.4,4,False,False,True,b'recidivism_gf',b'violent',b'Caucasian',b'African-American'
6,-1.0,1.0,0.4,0.143845,0.4,4,False,False,True,b'recidivism_gf',b'violent',b'Caucasian',b'African-American'
7,-1.0,1.0,0.4,0.233572,0.4,4,False,False,True,b'recidivism_gf',b'violent',b'Caucasian',b'African-American'
8,-1.0,1.0,0.4,0.379269,0.4,4,False,False,True,b'recidivism_gf',b'violent',b'Caucasian',b'African-American'
9,-1.0,1.0,0.4,0.615848,0.4,4,False,False,True,b'recidivism_gf',b'violent',b'Caucasian',b'African-American'
