In [1]:
# load matlab file Diablito-12122024-006_formatted.mat # use h5py
import h5py
import numpy as np
from os.path import join
mat_root = r"S:\Data-Ephys-MAT"
mat_file = join(mat_root, "Diablito-12122024-006_formatted.mat")
data = h5py.File(mat_file, "r")
print(data.keys())

<KeysViewHDF5 ['#refs#', 'Trials', 'lfps', 'meta', 'rasters']>


In [2]:
def hdf5_dataset_to_string(dset):
    # Read all data into a NumPy array
    arr = dset[()]  # This should be a 2D array of shape (N, 1)
    # Flatten to a 1D array of 16-bit code points
    code_points = arr.flatten()
    # Convert code points directly to characters and join
    # If these are standard Unicode code points, this works directly.
    # If the data is UTF-16, using chr() will usually still work correctly for BMP characters.
    # For more complex cases (e.g., surrogate pairs), consider decoding from bytes as UTF-16:
    # byte_data = code_points.tobytes()
    # return byte_data.decode('utf-16-le')
    return ''.join(chr(cp) for cp in code_points)


def hdf5_string_array_to_string_array(dataset, ref_object):
    obj_shape = ref_object.shape
    result = np.empty(obj_shape, dtype=object)
    # Iterate through all dimensions using nested loops
    for idx in np.ndindex(obj_shape):
        # Get the reference at this index
        ref = ref_object[idx]
        # Convert the reference to a string using the helper function
        result[idx] = hdf5_dataset_to_string(dataset[ref])
    return result

In [63]:
from easydict import EasyDict as edict
def h5_to_dict(h5_obj, dataset, no_attrs=False):
    """
    Recursively convert an HDF5 file or group into a nested dictionary.
    """
    result = edict()
    
    # Extract attributes
    if len(h5_obj.attrs) > 0:
        attrs = {key: h5_obj.attrs[key] for key in h5_obj.attrs}
        if not no_attrs:
            result['__attrs__'] = attrs
    
    # If this is a group, recurse into its members
    if isinstance(h5_obj, h5py.Group):
        for key in h5_obj.keys():
            item = h5_obj[key]
            result[key] = h5_to_dict(item, dataset, no_attrs=no_attrs)
    
    # If this is a dataset, read its data
    elif isinstance(h5_obj, h5py.Dataset):
        data = h5_obj[()]
        # Initialize data_py to None so it's always defined
        data_py = None
        try:
            if 'MATLAB_class' in h5_obj.attrs:
                if h5_obj.attrs['MATLAB_class'] == b'cell':
                    # cell array => object array
                    data_py = np.empty(data.shape, dtype=object)
                    for idx in np.ndindex(data.shape):
                        ref = data[idx]
                        if isinstance(ref, h5py.Reference):
                            data_py[idx] = h5_to_dict(dataset[ref], dataset, no_attrs=no_attrs)
                        else:
                            data_py[idx] = ref
                elif h5_obj.attrs['MATLAB_class'] == b'struct':
                    # TODO maybe to better handle structs?
                    data_py = h5_to_dict(h5_obj, dataset)
                elif h5_obj.attrs['MATLAB_class'] == b'double':
                    data_py = data.astype(np.float64)
                elif h5_obj.attrs['MATLAB_class'] == b'int64':
                    data_py = data.astype(np.int64)
                elif h5_obj.attrs['MATLAB_class'] == b'uint64':
                    data_py = data.astype(np.uint64)
                elif h5_obj.attrs['MATLAB_class'] == b'logical':
                    data_py = data.astype(np.bool_)
                elif h5_obj.attrs['MATLAB_class'] == b'char':
                    try:
                        if isinstance(data, np.ndarray):
                            # Load data fully
                            arr = data[()]
                            # Handle empty char array case
                            if 'MATLAB_empty' in h5_obj.attrs and h5_obj.attrs['MATLAB_empty']:
                                data_py = ''
                            # Handle single character case
                            elif arr.size == 1:
                                # Extract the scalar value and convert to char
                                val = arr.item()  # Convert array of shape (1,1) to a Python scalar
                                data_py = chr(int(val))
                            else:
                                # Normal case: flatten and convert each code point
                                arr_flat = arr.flatten()
                                # Option 1: Direct chr conversion
                                data_py = ''.join(chr(int(cp)) for cp in arr_flat)
                                # Option 2 (if direct chr gives incorrect output, try UTF-16 decoding):
                                # byte_data = arr.astype('<u2').tobytes()
                                # data_py = byte_data.decode('utf-16-le')
                        else:
                            # data is not an ndarray, fallback
                            data_py = str(data)
                    except (IndexError, TypeError, ValueError) as e:
                        print(f"Warning: Failed to convert char data: {e}  {h5_obj} {attrs}")
                        data_py = str(data)  # Fallback to string representation
                else:
                    data_py = data  # Default case - use raw data
                    print(f"Warning: Unknown MATLAB class: {h5_obj.attrs['MATLAB_class']} for {h5_obj.name}")
            else:
                # cell array => object array
                data_py = np.empty(data.shape, dtype=object)
                for idx in np.ndindex(data.shape):
                    ref = data[idx]
                    if isinstance(ref, h5py.Reference):
                        data_py[idx] = h5_to_dict(dataset[ref], dataset, no_attrs=no_attrs)
                    else:
                        data_py[idx] = ref
                # No MATLAB class specified - use raw data
                print(f"Warning: No MATLAB class specified for {h5_obj} at path {h5_obj.name}")
        except Exception as e:
            raise ValueError(f"Failed loading path {h5_obj} {h5_obj.name} {attrs}: {str(e)}")
            
        if data_py is None:
            raise ValueError(f"Failed to convert data for {h5_obj} {h5_obj.name}")
            
        result['__data__'] = data_py
    
    return result


def h5_to_dict_simplify(h5_obj, dataset, ):
    """
    Recursively convert an HDF5 file or group into a nested dictionary.
    """
    # if any(h5_obj.name.startswith(path) for path in exluding_paths):
    #     if verbose:
    #         print(f"Excluding {h5_obj.name}")
    #     return None
    # if verbose:
    #     print(f"Loading {h5_obj.name}")
    result = None
    attrs = edict()    # Initialize attrs to empty dict
    
    # Extract attributes
    if len(h5_obj.attrs) > 0:
        attrs = {key: h5_obj.attrs[key] for key in h5_obj.attrs}
    
    # If this is a group, recurse into its members
    if isinstance(h5_obj, h5py.Group):
        result = edict()
        for key in h5_obj.keys():
            item = h5_obj[key]
            result[key] = h5_to_dict_simplify(item, dataset, )
    
    # If this is a dataset, read its data
    elif isinstance(h5_obj, h5py.Dataset):
        data = h5_obj[()]
        data_py = None  # Initialize data_py
        try:
            if 'MATLAB_class' in h5_obj.attrs:
                # if MATLAB_class is set, use it to convert the data
                if h5_obj.attrs['MATLAB_class'] == b'cell':
                    # cell array => object array
                    data_py = np.empty(data.shape, dtype=object)
                    for idx in np.ndindex(data.shape):
                        ref = data[idx]
                        if isinstance(ref, h5py.Reference):
                            data_py[idx] = h5_to_dict_simplify(dataset[ref], dataset, )
                        else:
                            data_py[idx] = ref
                    result = data_py
                elif h5_obj.attrs['MATLAB_class'] == b'struct':
                    # TODO maybe to better handle structs?
                    result = h5_to_dict_simplify(h5_obj, dataset,)
                elif h5_obj.attrs['MATLAB_class'] == b'double':
                    result = data.astype(np.float64)
                elif h5_obj.attrs['MATLAB_class'] == b'single':
                    result = data.astype(np.float32)
                elif h5_obj.attrs['MATLAB_class'] == b'int64':
                    result = data.astype(np.int64)
                elif h5_obj.attrs['MATLAB_class'] == b'int32':
                    result = data.astype(np.int32)
                elif h5_obj.attrs['MATLAB_class'] == b'uint64':
                    result = data.astype(np.uint64)
                elif h5_obj.attrs['MATLAB_class'] == b'logical':
                    result = data.astype(np.bool_)
                elif h5_obj.attrs['MATLAB_class'] == b'char':
                    try:
                        if isinstance(data, np.ndarray):
                            # Load data fully
                            arr = data[()]
                            # Handle empty char array case
                            if 'MATLAB_empty' in h5_obj.attrs and h5_obj.attrs['MATLAB_empty']:
                                data_py = ''
                            # Handle single character case
                            elif arr.size == 1:
                                # Extract the scalar value and convert to char
                                val = arr.item()  # Convert array of shape (1,1) to a Python scalar
                                data_py = chr(int(val))
                            else:
                                # Normal case: flatten and convert each code point
                                arr_flat = arr.flatten()
                                # Option 1: Direct chr conversion
                                data_py = ''.join(chr(int(cp)) for cp in arr_flat)
                                # Option 2 (if direct chr gives incorrect output, try UTF-16 decoding):
                                # byte_data = arr.astype('<u2').tobytes()
                                # data_py = byte_data.decode('utf-16-le')
                        else:
                            # data is not an ndarray, fallback
                            data_py = str(data)
                    except (IndexError, TypeError, ValueError) as e:
                        print(f"Warning: Failed to convert char data: {e}  {h5_obj} {attrs}")
                        data_py = str(data)  # Fallback to string representation
                    result = data_py
                else:
                    result = data  # Default case - use raw data
                    print(f"Warning: Existing but unknown MATLAB class: {h5_obj.attrs['MATLAB_class']} for {h5_obj.name}")
            else:
                if h5_obj.dtype == np.float64:
                    result = data.astype(np.float32)
                elif h5_obj.dtype == np.int64:
                    result = data.astype(np.int32)
                elif h5_obj.dtype == np.uint64:
                    result = data.astype(np.uint32)
                elif h5_obj.dtype == np.bool_:
                    result = data.astype(np.bool_)
                else:
                    # No MATLAB class specified - default to cell array => object array 
                    result = np.empty(data.shape, dtype=object)
                    for idx in np.ndindex(data.shape):
                        ref = data[idx]
                        if isinstance(ref, h5py.Reference):
                            result[idx] = h5_to_dict_simplify(dataset[ref], dataset, )
                        else:
                            result[idx] = ref
                    print(f"Warning: No MATLAB class specified for {h5_obj} at path {h5_obj.name} {h5_obj.dtype}")
        except Exception as e:
            raise ValueError(f"Failed loading path {h5_obj} {h5_obj.name} {attrs}: {str(e)}")
            
        if result is None:
            raise ValueError(f"Failed to convert data for {h5_obj} {h5_obj.name}")
            
    return result

In [19]:
data['Trials/imageName']

<HDF5 dataset "imageName": shape (1, 1386), type "|O">

In [34]:
# Iterate through the HDF5 dataset and print info about each object
def print_hdf5_info(obj, indent=''):
    if isinstance(obj, h5py.Group):
        print(f"{indent}Group: {obj.name}, {len(obj.items())} members")
        for key, value in obj.items():
            if obj.name.startswith("/#refs#"):
                continue
            else:
                print_hdf5_info(value, indent + '  ')
    elif isinstance(obj, h5py.Dataset):
        print(f"{indent}Dataset: {obj.name}, shape {obj.shape}, type {obj.dtype}")

print("HDF5 File Structure:")
print_hdf5_info(data)

HDF5 File Structure:
Group: /, 5 members
  Group: /#refs#, 100003 members
  Group: /Trials, 18 members
    Group: /Trials/B, 18 members
      Dataset: /Trials/B/AbsoluteTrialStartTime, shape (516, 1), type object
      Dataset: /Trials/B/AnalogData, shape (516, 1), type object
      Dataset: /Trials/B/BehavioralCodes, shape (516, 1), type object
      Dataset: /Trials/B/Block, shape (516, 1), type object
      Dataset: /Trials/B/BlockCount, shape (516, 1), type object
      Dataset: /Trials/B/Condition, shape (516, 1), type object
      Dataset: /Trials/B/CycleRate, shape (516, 1), type object
      Dataset: /Trials/B/ObjectStatusRecord, shape (516, 1), type object
      Dataset: /Trials/B/ReactionTime, shape (516, 1), type object
      Dataset: /Trials/B/RewardRecord, shape (516, 1), type object
      Dataset: /Trials/B/TaskObject, shape (516, 1), type object
      Dataset: /Trials/B/Trial, shape (516, 1), type object
      Dataset: /Trials/B/TrialDateTime, shape (516, 1), type object

In [None]:
list(data['Trials'].keys())


['B',
 'MLConfig',
 'TrialRecord',
 'XY',
 'block',
 'event03',
 'event10',
 'eventMarkers',
 'eyePupil',
 'eyeXY',
 'imageInTrial',
 'imageName',
 'imageOFFtime',
 'imageONtime',
 'trialNum',
 'trialStart',
 'width',
 'words']

In [71]:
list(Trial_dict['TrialRecord']["CurrentConditionStimulusInfo"]["Info"])

[array([{'Attribute': array([['pic'],
               ['C:\\Users\\Poncelab-ML2a\\Documents\\monkeylogic\\selectivity_basic\\images_to_encode\\noise_eig20_lin-0.16.jpg'],
               [array([[0.]])],
               [array([[0.]])],
               [array([[152.]])],
               [array([[152.]])]], dtype=object), 'Label': 'PIC: noise_eig20_lin-0.16.jpg [152 x 152]'}                            ],
       dtype=object),
 array([{'Attribute': array([['pic'],
               ['C:\\Users\\Poncelab-ML2a\\Documents\\monkeylogic\\selectivity_basic\\images_to_encode\\noise_eig2_lin-0.16.jpg'],
               [array([[0.]])],
               [array([[0.]])],
               [array([[152.]])],
               [array([[152.]])]], dtype=object), 'Label': 'PIC: noise_eig2_lin-0.16.jpg [152 x 152]'}                            ],
       dtype=object),
 array([{'Attribute': array([['pic'],
               ['C:\\Users\\Poncelab-ML2a\\Documents\\monkeylogic\\selectivity_basic\\images_to_encode\\class_eig9_l

In [None]:
# fast selective loading
meta_dict = h5_to_dict_simplify(data['meta'], data)
# turn rasters into a numpy array, reduce precision to 32-bit float
rasters = np.array(data['rasters']).astype(np.float32)
lfps = np.array(data['lfps']).astype(np.float32)
Trial_dict = edict()
for key in data['Trials'].keys():
    if key in ["B", "MLConfig"]:
        continue
    print(f"Loading {key}", end="\t")
    Trial_dict[key] = h5_to_dict_simplify(data['Trials'][key], data)
    print("done")
# TrialRecord_dict = h5_to_dict_simplify(data['Trials/TrialRecord'], data)

import pickle as pkl
pkl.dump({"Trials": Trial_dict, "meta": meta_dict, "rasters": rasters, }, open("Trial_dict.pkl", "wb"))

done
Loading XY	done
Loading block	done
Loading event03	done
Loading event10	done
Loading eventMarkers	done
Loading eyePupil	done
Loading eyeXY	done
Loading imageInTrial	done
Loading imageName	done
Loading imageOFFtime	done
Loading imageONtime	done
Loading trialNum	done
done
Loading width	done
Loading words	done


### Mass loading and reformatting

In [75]:
import pandas as pd
import pickle as pkl
import time
mat_root = r"S:\Data-Ephys-MAT"
pkl_root = r"S:\Data-Ephys-PKL"
exp_record_pathdict = {"Alfa": r"S:\Exp_Record_Alfa.xlsx", 
                       "Beto": r"S:\ExpSpecTable_Augment.xlsx",
                       "Caos": r"S:\Exp_Record_Caos.xlsx",
                       "Diablito": r"S:\Exp_Record_Diablito.xlsx"}
for Animal, path in exp_record_pathdict.items():
    exp_record = pd.read_excel(path)
    exp_record.to_csv(path.replace(".xlsx", ".csv"), index=False)

In [77]:
ExpRecord_CD = pd.concat([pd.read_excel(exp_record_pathdict[Animal]) for Animal in ("Caos", "Diablito")])
ExpRecord_CD.query("")


In [81]:
ExpRecord_CD.Exp_collection[ExpRecord_CD.Exp_collection.str.contains('BigGAN_FC6|BigGAN_Hessian', na=False)]

0                   NaN
1                   NaN
2                   NaN
3                   NaN
4                   NaN
5                   NaN
6                   NaN
7                   NaN
8                   NaN
9                   NaN
10                  NaN
11           BigGAN_FC6
12       BigGAN_Hessian
13           BigGAN_FC6
14           BigGAN_FC6
15       BigGAN_Hessian
16                  NaN
17           BigGAN_FC6
18       BigGAN_Hessian
19    selectivity_movie
20    selectivity_movie
21           BigGAN_FC6
22       BigGAN_Hessian
23                  NaN
24           BigGAN_FC6
25           BigGAN_FC6
26       BigGAN_Hessian
27                  NaN
28           BigGAN_FC6
29       BigGAN_Hessian
30                  NaN
31           BigGAN_FC6
32       BigGAN_Hessian
0                   NaN
1            BigGAN_FC6
2        BigGAN_Hessian
3                   NaN
4     NatImgSelectivity
5                   NaN
6            BigGAN_FC6
7        BigGAN_Hessian
8            Big

In [85]:
exp_mask = ExpRecord_CD.Exp_collection.str.contains('BigGAN_FC6|BigGAN_Hessian', na=False) & ~ ExpRecord_CD.Expi.isna()
ExpRecord_CD.loc[exp_mask, :]

Unnamed: 0,ChanQual,Exp_collection,Expi,WebCam,comments,ephysFN,expControlFN,pref_chan,pref_unit,stim_size,stimuli
11,,BigGAN_FC6,1.0,,002 generate biggan\n 91 (0 0) 2 2 cmaes SU 1/...,Caos-12022024-002,241202_143902_Caos_generate_BigGAN,91.0,,,N:\Stimuli\Evolutions\2024\2024-12-02-Ev-Caos-...
12,,BigGAN_Hessian,1.0,,"Manifold with biggan\n 003 at 309 PM, manifold...",Caos-12022024-003,241202_150857_Caos_selectivity_basic,91.0,,,N:\Stimuli\Evolutions\2024\2024-12-02-Ev-Caos-...
14,,BigGAN_FC6,2.0,,"Let's try chan 94, with hash\n 005 biggan\n 94...",Caos-12022024-005,241202_153847_Caos_generate_BigGAN,94.0,,,N:\Stimuli\Evolutions\2024\2024-12-02-Ev-Caos-...
15,,BigGAN_Hessian,2.0,,"006 manifold\n alas, it's all dogs\n 006 at 3...",Caos-12022024-006,241202_155547_Caos_selectivity_basic,94.0,,,N:\Stimuli\Evolutions\2024\2024-12-02-Ev-Caos-...
17,,BigGAN_FC6,3.0,,002 biggan\nstarted at 218 PM\n71 (0 0) 2 2 CM...,Caos-12042024-002,241204_141711_Caos_generate_BigGAN,71.0,,2.0,N:\Stimuli\Evolutions\2024\2024-12-04-Ev-Caos-...
18,,BigGAN_Hessian,3.0,,"003 at 239 PM\nmanifold\n(0,0) 2\nhas had arou...",Caos-12042024-003,241204_143902_Caos_selectivity_basic,71.0,,2.0,N:\Stimuli\Evolutions\2024\2024-12-04-Ev-Caos-...
21,,BigGAN_FC6,4.0,,Will try another biggan\nch 72 ( 0 0) 2 1 CMAE...,Caos-12042024-006,241204_151045_Caos_generate_BigGAN,72.0,,,N:\Stimuli\Evolutions\2024\2024-12-04-Ev-Caos-...
22,,BigGAN_Hessian,4.0,,Running manifold\n007 at 3:30 PM\nch 72 ( 0 0)...,Caos-12042024-007,241204_152944_Caos_selectivity_basic,72.0,,,N:\Stimuli\Evolutions\2024\2024-12-04-Ev-Caos-...
25,,BigGAN_FC6,5.0,,Trying biggan again with SU in 92\n92 (0 0) 2 ...,Caos-12092024-003,241209_142223_Caos_generate_BigGAN,92.0,,2.0,N:\Stimuli\Evolutions\2024\2024-12-09-Ev-Caos-...
26,,BigGAN_Hessian,5.0,,Manifold selectivity basic started at 2.48PM -...,Caos-12092024-004,241209_144750_Caos_selectivity_basic,92.0,,2.0,N:\Stimuli\Evolutions\2024\2024-12-09-Ev-Caos-...


In [86]:
import pickle as pkl
import time
from easydict import EasyDict as edict
mat_root = r"S:\Data-Ephys-MAT"
pkl_root = r"S:\Data-Ephys-PKL"
# ephys_name = "Diablito-12122024-006"
T0 = time.time()
for ephys_name in ExpRecord_CD.loc[exp_mask, :].ephysFN.values:
    t0 = time.time()
    mat_file = join(mat_root, f"{ephys_name}_formatted.mat")
    data = h5py.File(mat_file, "r")
    print(f"Loading {ephys_name} mat file")
    t1 = time.time()
    print(f"Time taken: {t1 - t0:.2f} seconds for loading mat file")
    # fast selective loading
    meta_dict = h5_to_dict_simplify(data['meta'], data)
    # turn rasters into a numpy array, reduce precision to 32-bit float
    rasters = np.array(data['rasters']).astype(np.float32)
    # lfps = np.array(data['lfps']).astype(np.float32)
    Trial_dict = edict()
    for key in data['Trials'].keys():
        if key in ["B", "MLConfig"]:
            continue
        print(f"Loading {key}", end="\t")
        Trial_dict[key] = h5_to_dict_simplify(data['Trials'][key], data)
        print("done")
    # TrialRecord_dict = h5_to_dict_simplify(data['Trials/TrialRecord'], data)
    t2 = time.time()
    print(f"Time taken: {t2 - t1:.2f} seconds for loading trials")

    pkl.dump({"Trials": Trial_dict, "meta": meta_dict, "rasters": rasters, }, open(join(pkl_root, f"{ephys_name}.pkl"), "wb"))
    t3 = time.time()
    print(f"Time taken: {t3 - t2:.2f} seconds for dumping pkl file [Total: {t3 - T0:.2f} seconds]")


Loading Caos-12022024-002 mat file
Time taken: 0.01 seconds for loading mat file
done
Loading XY	done
Loading block	done
Loading event03	done
Loading event10	done
Loading eventMarkers	done
Loading eyePupil	done
Loading eyeXY	done
Loading imageInTrial	done
Loading imageName	done
Loading imageOFFtime	done
Loading imageONtime	done
Loading trialNum	done
Loading trialStart	done
Loading width	done
Loading words	done
Time taken: 19.34 seconds for loading trials
Time taken: 2.10 seconds for dumping pkl file [Total: 21.45 seconds]
Loading Caos-12022024-003 mat file
Time taken: 0.26 seconds for loading mat file
done
Loading XY	done
Loading block	done
Loading event03	done
Loading event10	done
Loading eventMarkers	done
Loading eyePupil	done
Loading eyeXY	done
Loading imageInTrial	done
Loading imageName	done
Loading imageOFFtime	done
Loading imageONtime	done
Loading trialNum	done
Loading trialStart	done
Loading width	done
Loading words	done
Time taken: 5.22 seconds for loading trials
Time taken: 0.

In [25]:
TrialRecord_dict = h5_to_dict_simplify(data['Trials/TrialRecord'], data)



In [28]:
TrialRecord_dict = h5_to_dict_simplify(data['Trials/TrialRecord'], data, exluding_paths=["/Trials/TrialRecord/CurrentConditionStimulusInfo",])



In [30]:
TrialRecord_dict["CurrentConditionStimulusInfo"]

{'Angle': array([[array([[0.]])],
        [array([[0.]])],
        [array([[0.]])],
        [array([[0.]])]], dtype=object),
 'ID': array([[array([[23075.]])],
        [array([[23084.]])],
        [array([[23043.]])],
        [array([[23129.]])]], dtype=object),
 'Info': array([[{'Attribute': array([['pic'],
                ['C:\\Users\\Poncelab-ML2a\\Documents\\monkeylogic\\selectivity_basic\\images_to_encode\\noise_eig20_lin-0.16.jpg'],
                [array([[0.]])],
                [array([[0.]])],
                [array([[152.]])],
                [array([[152.]])]], dtype=object), 'Label': 'PIC: noise_eig20_lin-0.16.jpg [152 x 152]'}                            ],
        [{'Attribute': array([['pic'],
                ['C:\\Users\\Poncelab-ML2a\\Documents\\monkeylogic\\selectivity_basic\\images_to_encode\\noise_eig2_lin-0.16.jpg'],
                [array([[0.]])],
                [array([[0.]])],
                [array([[152.]])],
                [array([[152.]])]], dtype=object)

In [60]:
block_dict = h5_to_dict_simplify(data['Trials/B'], data) # this loading takes very long time. 



KeyboardInterrupt: 

In [55]:
# fast selective loading
meta_dict = h5_to_dict_simplify(data['meta'], data)
TrialRecord_dict = h5_to_dict_simplify(data['Trials/TrialRecord'], data)
# turn rasters into a numpy array, reduce precision to 32-bit float
rasters = np.array(data['rasters']).astype(np.float32)
lfps = np.array(data['lfps']).astype(np.float32)
# get the image names
image_names_arr = hdf5_string_array_to_string_array(data, data['Trials/imageName'])
image_names_arr = image_names_arr.squeeze(axis=0) # remove the extra dimension, (Trials, )

Loading /meta
Loading /meta/baselineWindowLength
Loading /meta/ephysFN
Loading /meta/equipment
Loading /meta/expControl
Loading /meta/expControlFN
Loading /meta/imList
Loading /meta/pathBHV
Loading /meta/pathIMList
Loading /meta/pathMat
Loading /meta/pathRAW
Loading /meta/rasterWindow
Loading /meta/sdf
Loading /meta/spikeID
Loading /meta/unitID
Loading /meta/wvfms
Loading /Trials/TrialRecord
Loading /Trials/TrialRecord/BehaviorSummary
Loading /Trials/TrialRecord/BlockChange
Loading /Trials/TrialRecord/BlockCount
Loading /Trials/TrialRecord/BlockOrder
Loading /Trials/TrialRecord/BlocksPlayed
Loading /Trials/TrialRecord/BlocksSelected
Loading /Trials/TrialRecord/ConditionsPlayed
Loading /Trials/TrialRecord/ConditionsThisBlock
Loading /Trials/TrialRecord/CurrentBlock
Loading /Trials/TrialRecord/CurrentBlockCount
Loading /Trials/TrialRecord/CurrentCondition
Loading /Trials/TrialRecord/CurrentConditionInfo
Loading /Trials/TrialRecord/CurrentConditionStimulusInfo
Loading /Trials/TrialRecord/

KeyboardInterrupt: 

In [None]:
rasters

In [6]:
meta_dict

{'baselineWindowLength': array([[90.]]),
 'ephysFN': 'Diablito-12122024-006',
 'equipment': 'OMNIPLEX',
 'expControl': 'ML',
 'expControlFN': '241212_123905_Diablito_selectivity_basic',
 'imList': 'imlist_taliaObjects01_BKBKG_255.txt',
 'pathBHV': 'N:\\Data-Behavior (BHV2)',
 'pathIMList': 's:\\Data-imList',
 'pathMat': 's:\\Data-Ephys-MAT',
 'pathRAW': 's:\\Data-Ephys-Raw',
 'rasterWindow': array([[  0.],
        [200.]]),
 'sdf': 'sdf',
 'spikeID': array([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  8.,  9., 10., 11., 12.,
         13., 14., 15., 16., 17., 18., 19., 20., 21., 21., 22., 23., 23.,
         24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 34., 35.,
         36., 37., 38., 39., 40., 41., 42., 42., 43., 44., 45., 46., 47.,
         48., 49., 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., 60.,
         61., 62., 63., 64., 64.]]),
 'unitID': array([[1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1.,

In [190]:
meta_dict = h5_to_dict(data['meta'], data)

In [182]:
meta_dict['ephysFN']

{'__attrs__': {'H5PATH': b'/meta',
  'MATLAB_class': b'char',
  'MATLAB_int_decode': 2},
 '__data__': 'Diablito-12122024-006'}

In [191]:
mat_formatted_dict = h5_to_dict_simplify(data, data)



KeyboardInterrupt: 

In [192]:
import mat73
pyfile = mat73.loadmat(mat_file)



KeyboardInterrupt: 

In [193]:
h5_to_dict_simplify(data['Trials'], data)



In [183]:
meta_dict

{'__attrs__': {'MATLAB_class': b'struct',
  'MATLAB_fields': array([array([b'b', b'a', b's', b'e', b'l', b'i', b'n', b'e', b'W', b'i', b'n',
                b'd', b'o', b'w', b'L', b'e', b'n', b'g', b't', b'h'], dtype='|S1'),
         array([b'e', b'p', b'h', b'y', b's', b'F', b'N'], dtype='|S1'),
         array([b'e', b'q', b'u', b'i', b'p', b'm', b'e', b'n', b't'], dtype='|S1'),
         array([b'e', b'x', b'p', b'C', b'o', b'n', b't', b'r', b'o', b'l'],
               dtype='|S1')                                                 ,
         array([b'e', b'x', b'p', b'C', b'o', b'n', b't', b'r', b'o', b'l', b'F',
                b'N'], dtype='|S1')                                              ,
         array([b'i', b'm', b'L', b'i', b's', b't'], dtype='|S1'),
         array([b'r', b'a', b's', b't', b'e', b'r', b'W', b'i', b'n', b'd', b'o',
                b'w'], dtype='|S1')                                              ,
         array([b's', b'd', b'f'], dtype='|S1'),
         array(

In [162]:
data[data["/Trials/MLConfig/IO/SignalType"][0,0]]

<HDF5 dataset "q3z": shape (16, 1), type "<u2">

In [184]:
Trials_dict = h5_to_dict(data['Trials/MLConfig'], data, no_attrs=True)



In [185]:
Trials_dict["Webcam"]

{'ID': {'__data__': array([[{'__data__': ''}],
         [{'__data__': ''}],
         [{'__data__': ''}],
         [{'__data__': ''}]], dtype=object)},
 'Property': {'__data__': array([[{'__data__': array([0., 0.])}],
         [{'__data__': array([0., 0.])}],
         [{'__data__': array([0., 0.])}],
         [{'__data__': array([0., 0.])}]], dtype=object)}}

In [167]:
hdf5_string_array_to_string_array(data, data['Trials/MLConfig/USBJoystick/IP_address'])

array([['127.0.0.1'],
       ['127.0.0.1']], dtype=object)

{'__attrs__': {'H5PATH': b'/meta', 'MATLAB_class': b'double'},
 '__data__': array([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  8.,  9., 10., 11., 12.,
         13., 14., 15., 16., 17., 18., 19., 20., 21., 21., 22., 23., 23.,
         24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 34., 35.,
         36., 37., 38., 39., 40., 41., 42., 42., 43., 44., 45., 46., 47.,
         48., 49., 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., 60.,
         61., 62., 63., 64., 64.]])}

In [64]:
meta_dict['spikeID']

{'__attrs__': {'H5PATH': b'/meta', 'MATLAB_class': b'double'},
 '__data__': array([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  8.,  9., 10., 11., 12.,
         13., 14., 15., 16., 17., 18., 19., 20., 21., 21., 22., 23., 23.,
         24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 34., 35.,
         36., 37., 38., 39., 40., 41., 42., 42., 43., 44., 45., 46., 47.,
         48., 49., 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., 60.,
         61., 62., 63., 64., 64.]])}

In [67]:
meta_dict['wvfms']

{'__attrs__': {'H5PATH': b'/meta', 'MATLAB_class': b'double'},
 '__data__': array([[ -5.51425108, -13.51046059,  -4.55936045, ...,  -5.3247664 ,
          -4.86026285,   3.55355909],
        [ -8.27572297, -18.5380348 ,  -7.42121222, ..., -13.50964508,
         -12.82060795,   2.25201566],
        [-11.26839384, -24.45973388, -10.56054046, ..., -23.80016318,
         -22.84673538,   2.99832988],
        ...,
        [  2.04367176,   5.91021494,   2.53590072, ...,  18.45292072,
          20.63538841,  48.72201106],
        [  2.2952105 ,   6.50186285,   2.9956941 , ...,  18.27829905,
          20.02248744,  41.34381479],
        [  2.54905971,   6.87357216,   3.33897034, ...,  17.54515609,
          18.17254957,  34.17962451]])}

In [106]:
data['Trials/MLConfig/EyeTracerShape'][0,0]

<HDF5 object reference>

In [120]:
isinstance(data['Trials/MLConfig/EyeTransform'][0,0], h5py.Reference)
data[data['Trials/MLConfig/EyeTransform'][0,0]]

<HDF5 group "/#refs#/#a/A4z" (1 members)>

In [121]:
grp = data[data['Trials/MLConfig/EyeTransform'][0,0]]  # Access the group

# Now you can inspect its contents, e.g.:
print(list(grp.keys()))    # Lists its members

['offset']


In [122]:
data[data['Trials/MLConfig/EyeTransform'][0,0]]['offset']

<HDF5 dataset "offset": shape (2, 1), type "<f8">

In [112]:
hdf5_string_array_to_string_array(data, data['Trials/MLConfig/EyeTransform'])

TypeError: Accessing a group is done with bytes or str,  not <class 'tuple'>

In [103]:
hdf5_string_array_to_string_array(data, data['Trials/MLConfig/EyeTracerShape'])

array([['Circle'],
       ['Circle']], dtype=object)

In [100]:
list(data['Trials/MLConfig/EyeTracerShape'].attrs.keys())

['H5PATH', 'MATLAB_class']

In [52]:
# turn rasters into a numpy array, reduce precision to 32-bit float
rasters = np.array(data['rasters']).astype(np.float32)
# get the image names
image_names_ref = data['Trials/imageName']
image_names_arr = hdf5_string_array_to_string_array(data, image_names_ref)
image_names_arr = image_names_arr.squeeze(axis=0) # remove the extra dimension, (Trials, )
assert image_names_arr.shape[0] == rasters.shape[0]

In [4]:
print(data["Trials"].keys())

<KeysViewHDF5 ['B', 'MLConfig', 'TrialRecord', 'XY', 'block', 'event03', 'event10', 'eventMarkers', 'eyePupil', 'eyeXY', 'imageInTrial', 'imageName', 'imageOFFtime', 'imageONtime', 'trialNum', 'trialStart', 'width', 'words']>


In [13]:
# Get the image names dataset
# Get the actual string values from the HDF5 dataset
image_names = data['Trials']['imageName']
image_names

<HDF5 dataset "imageName": shape (1, 1386), type "|O">

In [24]:
data[image_names[0,0]]

<HDF5 dataset "Uob": shape (18, 1), type "<u2">

In [25]:
image_names[0,0]

<HDF5 object reference>

In [45]:
# test the functions single 
hdf5_dataset_to_string(data[image_names[0,0]])

'noise_eig2_lin0.24'

In [36]:
image_names.dtype == np.dtype('O')

True