In [2]:
import scipy.io as spio
import inspect
import dis

class MatlabStruct(dict):
    
    def __getattr__(self, attr):
        #Access the dictionary keys for unknown attributes.
        try:
            return self[attr]
        except KeyError:
            msg = "'MatlabStruct' object has no attribute %s" % attr
            raise AttributeError(msg)

    def __getitem__(self, attr):
        """
        Get a dict value; create a MatlabStruct if requesting a submember.
        Do not create a key if the attribute starts with an underscore.
        """
        
        if attr in self.keys() or attr.startswith('_'):
            return dict.__getitem__(self, attr)
        frame = inspect.currentframe()
        # step into the function that called us
        if frame.f_back.f_back and self._is_allowed(frame.f_back.f_back):
            dict.__setitem__(self, attr, MatlabStruct())
        elif self._is_allowed(frame.f_back):
            dict.__setitem__(self, attr, MatlabStruct())
        return dict.__getitem__(self, attr)

    def _is_allowed(self, frame):
        """Check for allowed op code in the calling frame"""
        allowed = [dis.opmap['STORE_ATTR'], dis.opmap['LOAD_CONST'],
                   dis.opmap.get('STOP_CODE', 0)]
        bytecode = frame.f_code.co_code
        instruction = bytecode[frame.f_lasti + 3]
        instruction = instruction
        return instruction in allowed

    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

    @property
    def __dict__(self):
        """Allow for code completion in a REPL"""
        return self.copy()


def get_matlab_value(val):
    """
    Extract a value from a Matlab file

    From the oct2py project, see
    https://pythonhosted.org/oct2py/conversions.html
    """
    import numpy as np

    # Extract each item of a list.
    if isinstance(val, list):
        return [get_matlab_value(v) for v in val]

    # Ignore leaf objects.
    if not isinstance(val, np.ndarray):
        return val

    # Convert user defined classes.
    if hasattr(val, 'classname'):
        out = dict()
        for name in val.dtype.names:
            out[name] = get_matlab_value(val[name].squeeze().tolist())
        cls = type(val.classname, (object,), out)
        return cls()

    # Extract struct data.
    elif val.dtype.names:
        out = MatlabStruct()
        for name in val.dtype.names:
            out[name] = get_matlab_value(val[name].squeeze().tolist())
        val = out

    # Extract cells.
    elif val.dtype.kind == 'O':
        val = val.squeeze().tolist()
        if not isinstance(val, list):
            val = [val]
        val = get_matlab_value(val)

    # Compress singleton values.
    elif val.size == 1:
        val = val.item()

    # Compress empty values.
    elif val.size == 0:
        if val.dtype.kind in 'US':
            val = ''
        else:
            val = []

    return val


def load_matlab(filename):
            try:
                out = spio.loadmat(filename, struct_as_record=True)
                data = dict()
                for (key, value) in out.items():
                    data[key] = get_matlab_value(value)
                return data, None
            except Exception as error:
                return None, str(error)
            
struct=load_matlab("/Data_DSB_SSB")
data=struct[0]['Data']

In [3]:
import numpy as np

prob = np.array(data['Train']['pssm_nr2011']['prob'])
sequence = np.array(data['Train']['pssm_nr2011']['sequence'])
np.save('/pssm/Train/pssm_nr2011/prob',prob)
np.save('/pssm/Train/pssm_nr2011/sequence',sequence)


prob = np.array(data['Test']['pssm_nr2011']['prob'])
sequence = np.array(data['Test']['pssm_nr2011']['sequence'])
np.save('/pssm/Test/pssm_nr2011/tprob',prob)
np.save('/pssm/Test/pssm_nr2011/tsequence',sequence)