In [1]:
#Systems I/O
import os
from pathos.pools import ProcessPool
import glob
import mat73
from scipy.io import loadmat as lm
import re
import h5py

#data/stats
import numpy as np
from sklearn import svm

from CrpStats import map_label
#viz
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
BANDS = ['delta', 'theta','alpha', 'beta', 'gamma_low','gamma_high']
DIR = '/mnt/ernie_main/000_Data/SEEG/SEEG_EyesClosed_RestingState/results/Graham_81pats/PDC_RestingState/'


In [3]:
pdc_f = os.path.join(DIR,"PDC_pats_6Band_pycomp.mat")
pdc_struct = lm(pdc_f, chars_as_strings=True)
pats = pdc_struct['pats']

## Notes on Matlab struct 
- Load struct and use 'pats' key to pull out 1x81 structs wrapped in tuple
- pull struct out of tuple for 81 structs with 7 fields per struct
- Index 0 again to get the row of 7 structs
- 




In [4]:
'''
Index to selects col in struct
                          |
Index to select patients  |
                       |  |  
                       |  |  
                       |  |  
epat_26_alpha = tst[0][0][2][0][0][2]
Columns in Struct
0 - subID
1 - labels
2 - "long" -> 6 x N x N PDC matrix (N= number of bipoles)
3 - pat_ID_clean - NOTE: use this one instead of labels! 
4 - SOZ, labels for each bipole -> 0 NIZ, 1 SOZ, 2 PZ, 3 NIZ
5 - AVG_SOZ  - ??
6 - long_Z - ?
'''

'\nIndex to selects col in struct\n                          |\nIndex to select patients  |\n                       |  |  \n                       |  |  \n                       |  |  \nepat_26_alpha = tst[0][0][2][0][0][2]\nColumns in Struct\n0 - subID\n1 - labels\n2 - "long" -> 6 x N x N PDC matrix (N= number of bipoles)\n3 - pat_ID_clean - NOTE: use this one instead of labels! \n4 - SOZ, labels for each bipole -> 0 NIZ, 1 SOZ, 2 PZ, 3 NIZ\n5 - AVG_SOZ  - ??\n6 - long_Z - ?\n'

In [5]:
DTYPE = h5py.special_dtype(vlen=str)
def format_soz(soz_labels):
    return [map_label(l) for l in soz_labels]

def format_bipole(bipole):
    bipole = bipole.strip()
    p = "[0-9][a-zA-Z]"
    bip_match = re.search(p, bipole)

    assert bip_match != None, f"Bipole {bipole}, is not captures by regex pattern {p}"
    s, _ = bip_match.span()
    return bipole[0:s+1] + "-" + bipole[s+1:]

def format_bipoles(char_list):
    return [format_bipole(c) for c in char_list]

def load_pdc(pdc_struct):
    pdc_all = [pdc for pdc in pdc_struct[0][0]]
    return dict(zip(BANDS, pdc_all))
def save_to_h5(h5f, subj_id, tissue_labels, contact_labels, pdc_mats):
    subj_id = f"/{subj_id}/"
    with h5py.File(h5f, 'a') as f:
        grp = f.require_group(subj_id)
        dset = grp.require_dataset('tissue_label',len(tissue_labels), DTYPE)
        dset[:] = tissue_labels
        dset.attrs['description'] = "Labels for contacts as SOZ, NIZ, etc, derived from SOZ label in OG matlab struct \
            \nNOTE: str will be saved as ascii. \
            \nFor utf-8 (the normal python format), use .decode() per entry)\n"

        dset = grp.require_dataset('bipole_labels', len(contact_labels), DTYPE)
        dset[:] = contact_labels
        dset.attrs['description'] = "Contact labels indicating where they were implanted \
            \nNOTE: str will be saved as ascii. \
            \nFor utf-8 (the normal python format), use .decode() per entry)\n"

        pdc_group =f.require_group(os.path.join(subj_id,'pdc'))

        for k,v in pdc_mats.items():
            dset = pdc_group.require_dataset(k, v.shape, float)
            dset[:] = v

    

In [6]:
pat_dict = dict()
h5f = os.path.join(DIR, 'PDC_pats_6Band.hdf5')
for i, subj_struct in enumerate(pats[0]):
    subj_id = subj_struct[3][0]
    soz_labels = format_soz(subj_struct[4])
    bip_labels = format_bipoles(subj_struct[1])
    pdc_mats = load_pdc(subj_struct[2])
    save_to_h5(h5f, subj_id, soz_labels, bip_labels,pdc_mats)


In [7]:
with h5py.File( h5f, 'r') as f:
    spat46 = f['/Spat46']
    soz = spat46['tissue_label']
    print(soz.attrs['description'])
    soz = soz[()]
    bipole = spat46['bipole_labels']
    print(bipole.attrs['description'])
    bipole = bipole[()]

    pdc = spat46['pdc/alpha'][()]



Labels for contacts as SOZ, NIZ, etc, derived from SOZ label in OG matlab struct             
NOTE: str will be saved as ascii.             
For utf-8 (the normal python format), use .decode() per entry)

Contact labels indicating where they were implanted             
NOTE: str will be saved as ascii.             
For utf-8 (the normal python format), use .decode() per entry)



In [47]:
def norm_adj_matrix(mat, by='col'):
    """normalizes a directed adjancy matrix against in-degree or out-degree

    Args:
        mat (np.array): NxN matrix of directed connections
        by (str, optional): normalize by row or column. If specify col then this will center 
        all in degrees (column-wise normalize). Defaults to 'col'.
    NOTE: when choosing a column wise operations, the dim will = 0, this is because
    the columnwise norm collapses rows. numpy.nanmeans asks which dimension to summarize,
    and thus specifying 0 means we wish to collapse along the row dimension
    Returns:
        _type_: _description_
    """
    assert by== 'col' or by =='row', "select proper dimension to summarize, only supports 2D"
    dim = 0 if by == 'col' else 1
    n = mat.shape[0] 

    mu = np.nanmean(mat, axis=dim)
    std = np.nanstd(mat, axis=dim)

    if by == "col": #reshape so that broadcast on subtract does element-wise along row/col
        mu = mu.reshape(1,n)
        std = std.reshape(1,n)
    else:
        mu = mu.reshape(n,1)
        std = std.reshape(n,1)
    return mu, std

mu, std = norm_adj_matrix(pdc,by='row')

In [48]:
sub_tst = np.subtract(pdc,mu)

In [50]:
0.0249084995 - mu[12,0] #12,1

-0.016279565419575766

(110, 110)