In [8]:
#Systems I/O
import os
from pathos.pools import ProcessPool
import glob
import mat73
from scipy.io import loadmat as lm
import re
import h5py

#data/stats
import numpy as np
from sklearn import svm

#viz
import seaborn as sns
import matplotlib.pyplot as plt

#specialty
from utils import *
from CrpStats import *

In [2]:
BANDS = ['delta', 'theta','alpha', 'beta', 'gamma_low','gamma_high']
DIR = '/mnt/ernie_main/000_Data/SEEG/SEEG_EyesClosed_RestingState/results/Graham_81pats/PDC_RestingState/'


In [3]:
pdc_f = os.path.join(DIR,"PDC_pats_6Band_pycomp.mat")
pdc_struct = lm(pdc_f, chars_as_strings=True)
pats = pdc_struct['pats']

## Notes on Matlab struct 
- Load struct and use 'pats' key to pull out 1x81 structs wrapped in tuple
- pull struct out of tuple for 81 structs with 7 fields per struct
- Index 0 again to get the row of 7 structs
- 




In [4]:
'''
Index to selects col in struct
                          |
Index to select patients  |
                       |  |  
                       |  |  
                       |  |  
epat_26_alpha = tst[0][0][2][0][0][2]
Columns in Struct
0 - subID
1 - labels
2 - "long" -> 6 x N x N PDC matrix (N= number of bipoles)
3 - pat_ID_clean - NOTE: use this one instead of labels! 
4 - SOZ, labels for each bipole -> 0 NIZ, 1 SOZ, 2 PZ, 3 NIZ
5 - AVG_SOZ  - ??
6 - long_Z - ?
'''

'\nIndex to selects col in struct\n                          |\nIndex to select patients  |\n                       |  |  \n                       |  |  \n                       |  |  \nepat_26_alpha = tst[0][0][2][0][0][2]\nColumns in Struct\n0 - subID\n1 - labels\n2 - "long" -> 6 x N x N PDC matrix (N= number of bipoles)\n3 - pat_ID_clean - NOTE: use this one instead of labels! \n4 - SOZ, labels for each bipole -> 0 NIZ, 1 SOZ, 2 PZ, 3 NIZ\n5 - AVG_SOZ  - ??\n6 - long_Z - ?\n'

In [34]:
pat_dict = dict()
h5f = os.path.join(DIR, 'PDC_pats_6Band.hdf5')
for i, subj_struct in enumerate(pats[0]):
    subj_id = subj_struct[3][0]
    soz_labels = format_soz(subj_struct[4])
    bip_labels = format_bipoles(subj_struct[1])
    pdc_mats = load_pdc(subj_struct[2])
    save_pdc_to_h5(h5f, subj_id, soz_labels, bip_labels,pdc_mats)


In [85]:
def load_pdc_h5(h5f):
    pdc_dict = defaultdict(lambda: dict())
    with h5py.File( h5f, 'r') as f:
        for k in f.keys():
            pat = f[k]
            labels_soz = pat['tissue_label']
            pdc_dict[k]['labels_soz'] = [ l.decode() for l in labels_soz[()]]
            bipole = pat['bipole_labels']
            pdc_dict[k]['bipole_labels'] = [bip.decode() for bip in bipole[()]]
       
            pdc_dict[k]['pdc'] = {band:v[()]for band, v in pat['pdc'].items()}
        return pdc_dict
adj_dicts = load_pdc_h5(h5f)


In [86]:
adj_dicts['Spat46']['pdc']

{'alpha': array([[       nan, 0.08121775, 0.0419141 , ..., 0.02757294, 0.0127103 ,
         0.02362049],
        [0.31402523,        nan, 0.14849229, ..., 0.02355176, 0.02071194,
         0.01919556],
        [0.13611629, 0.18367411,        nan, ..., 0.03895171, 0.02042827,
         0.03889821],
        ...,
        [0.0235339 , 0.01576947, 0.01412634, ...,        nan, 0.65840044,
         0.16857772],
        [0.02170797, 0.01619505, 0.01357687, ..., 0.32668203,        nan,
         0.27478232],
        [0.02955   , 0.01942208, 0.01558826, ..., 0.2817237 , 0.82951735,
                nan]]),
 'beta': array([[       nan, 0.14682119, 0.02225201, ..., 0.01783484, 0.00804556,
         0.01274602],
        [0.31681712,        nan, 0.15778989, ..., 0.01471994, 0.01374899,
         0.01627823],
        [0.13377487, 0.26485376,        nan, ..., 0.02522372, 0.01417333,
         0.02442025],
        ...,
        [0.01488578, 0.01131484, 0.00877774, ...,        nan, 0.59365538,
         0.149436

In [94]:
def adj_dict_to_df(adj_mat, labels, conn_col):
    df = pd.DataFrame(columns=[conn_col, 'label'])
    df[conn_col]= adj_mat.flatten()
    df['label']= flatten_labels(labels)
    return df

def flatten_labels(labels):
    label_arr = np.char.array(labels)
    label_mat = label_arr[:,None] + label_arr
    return label_mat.flatten() #rowmajor flatten

def construct_subj_df(adj_dicts:dict(), norm_style='bidirectional', band='alpha')->pd.DataFrame:
    """Takes a dictionary of partial directed coherencies and assembles a flattened
    dataframe where each row is a SUBJ, node values (avg-in/outdegree) after normalization, contact label

    Args:
        pdc_dicts (dict): _description_
        norm_style (str, optional): how to norm the adjacency matrix. Defaults to 'bidirectional'.

    Returns:
        pd.DataFrame: _description_
    """
    conn_dfs = []
    for subj, vals in adj_dicts.items():
        bipoles = vals['bipole_labels']
        mat = vals['pdc'][band]
        df = get_scored_df(mat,bipoles,norm_style)
        df['subj'] = subj
        df['label'] = vals['labels_soz']
        conn_dfs.append(df)
    return pd.concat(conn_dfs)

def get_scored_df(adj_mat, node_labels, norm_style='bidirectional'):
    df = pd.DataFrame()
    df['node_labels'] = node_labels
    if norm_style == 'bidirectional':
        outward_norm = score_adj_matrix(adj_mat, by='row')
        df['in_weight'] = np.nanmean(outward_norm, 0)
        inward_norm = score_adj_matrix(adj_mat, by='col')
        df['out_weight'] = np.nanmean(inward_norm, 1)
        #TODO: double check
    return df



pdc_df = construct_subj_df(adj_dicts)

In [95]:
pdc_df

Unnamed: 0,node_labels,in_weight,out_weight,subj,label
0,LAC1-LAC2,0.011204,0.067724,Epat02,IZ
1,LAC2-LAC3,0.117305,0.114727,Epat02,IZ
2,LAC3-LAC4,0.060941,0.003989,Epat02,IZ
3,LAC4-LAC5,0.181197,0.114048,Epat02,IZ
4,LAC5-LAC6,-0.278438,0.224232,Epat02,NIZ
...,...,...,...,...,...
71,LTP1-LTP2,-0.281157,-0.121697,pat33,NIZ
72,LTP3-LTP4,-0.593999,0.596561,pat33,NIZ
73,LTP7-LTP8,-0.559977,0.342413,pat33,NIZ
74,LTP8-LTP9,-0.126116,0.229394,pat33,NIZ


In [96]:
out_dir = "/mnt/ernie_main/Ghassan/ephys/data/"
spes_df = pd.read_csv(os.path.join(out_dir, "crp_trials.csv"))


In [104]:
spes_df['stim_reg'] = spes_df.stim_sesh.apply(lambda x: "-".join(x.split("-")[0:2]))
spes_df['resp_reg'] = spes_df.stim_sesh.apply(lambda x: "-".join(x.split("-")[2:]))
spes_df['stim_label'] = spes_df.stim_rel.apply(lambda x : x.split('-')[0])
spes_df['resp_label'] = spes_df.stim_rel.apply(lambda x : x.split('-')[1])

In [146]:
def edgedf_to_adj(edge_df:pd.DataFrame, start_node:str, end_node:str, val_col:str)->list:
    edge_df = edge_df.sort_values(by =[start_node])
    node_list = list(set(edge_df[start_node]).union(set(edge_df[end_node])))
    node_list.sort()
    n = len(node_list)
    adj_mat = np.zeros((n,n))
    for index, edge in edge_df.iterrows():
        edge_val = edge[val_col]
        node_i = edge[start_node]
        node_j = edge[end_node] 
        i = node_list.index(node_i)
        j = node_list.index(node_j)
        adj_mat[i,j] = edge_val
    adj_mat[adj_mat ==0] = np.nan
    return node_list, adj_mat
tst = spes_df[spes_df.subj=='Epat26']

spes_dicts = defaultdict(lambda:dict())
for subj in set( spes_df.subj):
    bip_labels, ev_mat = edgedf_to_adj(spes_df[spes_df.subj==subj], 'stim_reg','resp_reg','explained_variance')
    spes_dicts[subj]['bipole_labels'] = bip_labels
    spes_dicts[subj]['ev_matrix'] = ev_mat



In [147]:
def construct_spes_df(adj_dicts:dict(),  mat_key, norm_style='bidirectional')->pd.DataFrame:
    """Takes a dictionary of any adjaceny matrix dictionary  and assembles a flattened
    dataframe where each row is a SUBJ, node values (avg-in/outdegree) after normalization, contact label

    Args:
        adj_dicts (dict): _description_
        norm_style (str, optional): how to norm the adjacency matrix. Defaults to 'bidirectional'.

    Returns:
        pd.DataFrame: _description_
    """
    conn_dfs = []
    for subj, vals in adj_dicts.items():
        bipoles = vals['bipole_labels']
        mat = vals[mat_key]
        df = get_scored_df(mat,bipoles,norm_style)
        df['subj'] = subj
        conn_dfs.append(df)
    return pd.concat(conn_dfs)

In [159]:
spes_node_df.columns

Index(['node_labels', 'in_weight', 'out_weight', 'subj'], dtype='object')

In [160]:
spes_node_df = construct_spes_df(spes_dicts,'ev_matrix')
spes_node_df = spes_node_df.rename(columns={'in_weight':'in_ev','out_weight':'out_ev'},)

  mu = np.nanmean(adj_mat, axis=dim)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  df['out_weight'] = np.nanmean(inward_norm, 1)
  mu = np.nanmean(adj_mat, axis=dim)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  df['out_weight'] = np.nanmean(inward_norm, 1)
  mu = np.nanmean(adj_mat, axis=dim)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  df['out_weight'] = np.nanmean(inward_norm, 1)
  mu = np.nanmean(adj_mat, axis=dim)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  df['out_weight'] = np.nanmean(inward_norm, 1)
  mu = np.nanmean(adj_mat, axis=dim)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  df['out_weight'] = np.nanmean(inward_norm, 1)
  mu = np.nanmean(adj_mat, axis=dim)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  df['out_weight'] = np.nanmean(inward_norm, 1)
  mu = np.nanmean(adj_mat, axis=dim)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  df['out_weight'] 

In [161]:
pdc_df.merge(spes_node_df, on=['node_labels','subj'])

Unnamed: 0,node_labels,in_weight,out_weight,subj,label,in_ev,out_ev
0,LTP1-LTP2,0.134494,-0.425316,Epat26,SOZ,0.312501,-0.413934
1,LTP2-LTP3,-0.121020,-0.077281,Epat26,SOZ,0.800890,
2,LTP4-LTP5,-0.422090,0.179612,Epat26,NIZ,0.152063,
3,LTP5-LTP6,-0.513446,0.421867,Epat26,NIZ,0.048854,0.537831
4,LTP6-LTP7,-0.237330,0.081386,Epat26,NIZ,0.465222,
...,...,...,...,...,...,...,...
2096,TPE3-TPE4,0.069670,-0.073670,Spat53,NIZ,-0.486200,
2097,TPE4-TPE5,0.039783,-0.080117,Spat53,NIZ,-0.249810,
2098,TPE5-TPE6,0.021195,-0.025264,Spat53,NIZ,-0.376631,-0.108330
2099,TPE6-TPE7,-0.293114,0.049377,Spat53,NIZ,-0.222400,
