In [1]:
import warnings
warnings.filterwarnings('ignore')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import os.path

from os import getcwd
import numpy as np
import pandas as pd
import hdf5storage
import h5py
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.io import savemat, loadmat
import math
from scipy.fftpack import fft, ifft
from scipy.stats import zscore, f_oneway
#import samplerate
import multiprocessing as mp
from scipy.signal import resample
import random
import pyreadr
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import r2_score
import statsmodels.api as sm
import scipy
import matplotlib.patches as mpatches
import statsmodels.stats.multitest as multitest

import mne
from mne.preprocessing import ICA
from mne.time_frequency import tfr_morlet

import textgrids


In [2]:
def get_info(field_num):
    field = []
    for item in range(trial_num):
        field.append(group_data['DATA'][0][item][field_num][0])
    return pd.DataFrame(field)

def load_time_series(subject, session, unit_id):
    #subj_mat = hdf5storage.loadmat(f"Z:\DBS\\{subject}\Preprocessed Data\SpikeSorting\\{subject}_ft_raw_spike.mat")
    subj_mat = hdf5storage.loadmat(f"/Volumes/Nexus/DBS/{subject}/Preprocessed Data/SpikeSorting/{subject}_ft_raw_spike.mat")
    time_data = subj_mat['IFR'][0]['time'][0][int(session-1)][0]
    neural_data = subj_mat['IFR'][0]['trial'][0][int(session-1)][int(unit_id-1)]
    
    time_data = time_data[~np.isnan(neural_data)]
    neural_data = neural_data[~np.isnan(neural_data)]
    
    return time_data, neural_data

def get_df(time_point, neural_data, time_data, after=3.5, before=1.5, fs=1000):
    dfs = []
    time_point = time_point[time_point<time_data.max()-2][time_point>time_data.min()+2]
    for item in time_point:
        for i2 in range(len(neural_data)):
            if time_data[i2] > item:
                dfs.append(neural_data[i2-int(before*fs):i2+int(after*fs)])
                break
    return np.array(dfs)

# function to convert to superscript
def get_super(x):
    normal = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-=()"
    super_s = "ᴬᴮᶜᴰᴱᶠᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾQᴿˢᵀᵁⱽᵂˣʸᶻᵃᵇᶜᵈᵉᶠᵍʰᶦʲᵏˡᵐⁿᵒᵖ۹ʳˢᵗᵘᵛʷˣʸᶻ⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾"
    res = x.maketrans(''.join(normal), ''.join(super_s))
    return x.translate(res)

In [3]:
def get_avg_spike(time_table, neural_data, time_data, fs=1000):
    time_point = time_table['starts']
    before = time_table['before_time']
    after = time_table['duration']
    dfs = []

    for item in range(len(neural_data)):
        if time_data[item] > time_point:
            dfs.append(neural_data[item-int(before*fs):item+int(after*fs)])
            break
    return np.mean(np.array(dfs))

In [4]:

#group_data = hdf5storage.loadmat(r"Z:\Commits\DBS_speech\Dev\U01SpikeAnalysis\data\group_data\group_data_narrow_09082021.mat")
group_data = hdf5storage.loadmat(r"../../../../Macintosh HD/Users/yanming/Desktop/stn_auditory/group_data_narrow_09082021.mat")

trial_num = len(group_data['DATA'][0])

SubjectID = get_info(0)
SubjectID.columns = ['SubjectID']

session = get_info(1)
session.columns = ['session']

electrode = get_info(2)
electrode.columns = ['electrode']

unit_id = get_info(3)
unit_id.columns = ['unit_id']

unit = get_info(4)
unit.columns = ['unit']

RecType = get_info(5)
RecType.columns = ['RecType']

grade = get_info(6)
grade.columns = ['grade']

info = pd.concat([SubjectID, session, electrode, unit_id, unit, RecType, grade], axis=1)
info.head(2)
info.shape

Unnamed: 0,SubjectID,session,electrode,unit_id,unit,RecType,grade
0,DBS3001,1,micro_m,6,4,MU,B
1,DBS3001,2,micro_c,7,1,MU,C


(229, 7)

In [5]:
def get_produced_freq():
    produced_freq = pd.read_csv('all_freq.csv', index_col='id')
    d = {'ah':0, 'oo':1, 'ee':2}
    produced_freq['stim_factor'] = produced_freq.stim.map(d)
    produced_freq['spl'] = 20*np.log10(produced_freq['rms_audio_p'])
    produced_freq = produced_freq.dropna().reset_index(drop=1)

    produced_freq['before_time'] = np.nan
    for item in range(1, produced_freq.shape[0]):
        if produced_freq.syl_id[item] == 2:
            if produced_freq.syl_id[item-1] == 1:
                produced_freq.before_time[item] = produced_freq.starts[item] - produced_freq.ends[item-1]
        elif produced_freq.syl_id[item] == 3:
            if produced_freq.syl_id[item-1] == 2:
                produced_freq.before_time[item] = produced_freq.starts[item] - produced_freq.ends[item-1]

    for item in range(1, produced_freq.shape[0]):
        if produced_freq.syl_id[item] == 1:
            if produced_freq.syl_id[item+1] == 2:
                if produced_freq.syl_id[item+2] == 3:
                    produced_freq.before_time[item] = (produced_freq.before_time[item+1] + 
                                                       produced_freq.before_time[item+2])/2

    produced_freq.before_time[0] = (produced_freq.before_time[1] + produced_freq.before_time[2])/2
    produced_freq = produced_freq.dropna().reset_index(drop=1)
    return produced_freq

produced_df = get_produced_freq()
produced_df.shape
produced_df.head(2)

(41210, 26)

Unnamed: 0,starts,ends,duration,cons_onset,trial_id,session_id,syl_id,channel,fundamental_freq,type,...,subject,freq2,discont,contrun_id,t,t2,t3,stim_factor,spl,before_time
0,36145.009713,36145.184457,0.174745,36145.009713,1,1,1,audio_p,142.196732,vowel,...,DBS3001,20219.91053,1,1,-195.420094,38189.013071,-7462901.0,0,-18.178208,0.404081
1,36145.494905,36145.643055,0.14815,36145.494905,1,1,2,audio_p,140.861581,vowel,...,DBS3001,19841.984898,0,1,-194.934901,37999.615755,-7407451.0,1,-21.894791,0.310448


In [6]:
def get_subj_table(produced_freq, number_elec, info, timing_subj, neural_subj):
    SUBJ = info.iloc[number_elec, 0]
    SESS = info.iloc[number_elec, 1]
    subj_table = produced_freq[produced_freq.subject == SUBJ].reset_index(drop=1)
    subj_table = subj_table[subj_table.session_id == SESS].reset_index(drop=1)
    subj_table = subj_table[subj_table.starts > timing_subj.min()+3.5].reset_index(drop=1)
    subj_table = subj_table[subj_table.starts < timing_subj.max()-3.5].reset_index(drop=1)

        
    subj_table['avg_spike'] = np.nan
    for trial in range(subj_table.shape[0]):
        if subj_table.ends[trial] < timing_subj.max() - 3.5:
            if subj_table.starts[trial] > timing_subj.min() + 3.5:
                subj_table.avg_spike[trial] = get_avg_spike(subj_table.iloc[trial], 
                                                            neural_subj, timing_subj)
    subj_table = subj_table.dropna().reset_index(drop=1)
    
    subj_table['t_z'] = (subj_table['t'] - subj_table['t'].mean())/subj_table['t'].std(ddof=0)
    stim_vol_mean = subj_table['stim_volume'].mean()
    subj_table['stim_vol_z'] = (subj_table['stim_volume'] - 
                                   stim_vol_mean)/subj_table['stim_volume'].std(ddof=0)
    if subj_table['stim_volume'].unique().shape[0]==1:
        subj_table['stim_vol_z'] = 0
        

    subj_table['syl_id_z'] = (subj_table['syl_id'] - 
                              subj_table['syl_id'].mean())/subj_table['syl_id'].std(ddof=0)
    subj_table['spl_z'] = (subj_table['spl'] - 
                           subj_table['spl'].mean())/subj_table['spl'].std(ddof=0)
    stim_fac_mean = subj_table['stim_factor'].mean()
    subj_table['stim_z'] = (subj_table['stim_factor'] - 
                                   stim_fac_mean)/subj_table['stim_factor'].std(ddof=0)
    fund_freq_mean = subj_table['fundamental_freq'].mean()
    subj_table['pitch_z'] = (subj_table['fundamental_freq'] - 
                             fund_freq_mean)/subj_table['fundamental_freq'].std(ddof=0)
    subj_table['pitch_spl_z'] = subj_table['pitch_z']*subj_table['spl_z']

    return subj_table

In [7]:
def get_subj_df_cue(subject, sess, subj_table):
    #cue_df = pd.read_csv(f'Z:\DBS\\{subject}\Preprocessed Data\Sync\\annot\\{subject}_stimulus_syllable.txt', sep='	', index_col='id')
    cue_df = pd.read_csv(f'/Volumes/Nexus/DBS/{subject}/Preprocessed Data/Sync/annot/{subject}_stimulus_syllable.txt', sep='	', index_col='id')
    cue_df = cue_df[cue_df['session_id']==sess].reset_index(drop=1)
    subj_table_cue = pd.merge(subj_table, cue_df, on=['session_id', 'trial_id', 'syl_id'], how="inner")
    return subj_table_cue

def find_rest_time(subject_df):
    resting_time = subject_df[subject_df['syl_id'] == 1]['starts_y'] - 1
    return resting_time

def nansem(a, axis=1):
    return np.nanstd(a, axis=0)/np.sqrt(a.shape[axis])

def find_full_3_trial(subj_table, timing_subj):
    good_trial = []
    subj_table = subj_table.dropna()
    subj_table = subj_table[subj_table['starts_y'] > timing_subj.min() + 3.5]
    subj_table = subj_table[subj_table['ends_x'] < timing_subj.max() - 3.5]
    for item in subj_table['trial_id'].unique():
        if subj_table[subj_table['trial_id'] == item].shape[0] == 3:
            good_trial.append(item)
    return subj_table[subj_table['trial_id'].isin(good_trial)]

In [10]:
def get_grid(subject, session):
    
    grid = textgrids.TextGrid(f'textgrid/{subject}_Session{session}_Tr1.TextGrid')
    grid_text = []
    grid_xmin = []
    grid_xmax = []

    for item in range(len(grid['Ambient'])):
        if grid['Ambient'][item].text != "":
            grid_text.append(grid['Ambient'][item].text)
            grid_xmin.append(grid['Ambient'][item].xmin)
            grid_xmax.append(grid['Ambient'][item].xmax)

    grid_ = pd.DataFrame({"text":grid_text, 'xmin':grid_xmin, "xmax":grid_xmax})
    
    return grid_

In [79]:
SUBJECT = "DBS3008"

In [80]:
file_path = f'../../../DBS/{SUBJECT}/Preprocessed Data/FieldTrip'
subj_mat = hdf5storage.loadmat(f"{file_path}/{SUBJECT}_ft_raw_filt_trial_denoised.mat")['D'][0]

In [81]:
chanlabel = []
for item in range(subj_mat['label'].shape[0]):
    chanlabel.append(subj_mat['label'][item][0][0][0])
chanlabel = np.array(chanlabel)

fs = subj_mat['fsample'][0][0]
nChans = len(subj_mat['label'])
nTrials = len(subj_mat['trial'][0])

ecog_chan = []
for item in range(nChans):
    if chanlabel[item][:4] == "ecog":
        ecog_chan.append(item)
ecog_chan = np.array(ecog_chan)

dbs_chan = []
for item in range(nChans):
    if chanlabel[item][:4] == "dbs_":
        dbs_chan.append(item)
dbs_chan = np.array(dbs_chan)

macro_chan = []
for item in range(nChans):
    if chanlabel[item][:5] == "macro":
        macro_chan.append(item)
macro_chan = np.array(macro_chan)

micro_chan = []
for item in range(nChans):
    if chanlabel[item][:5] == "micro":
        micro_chan.append(item)
micro_chan = np.array(micro_chan)

time_mat = subj_mat['time'][0]
trial_mat = subj_mat['trial'][0]


In [82]:
time_mat.shape
trial_mat.shape

(358,)

(358,)

In [83]:
len(ecog_chan)
len(dbs_chan)
len(macro_chan)
len(micro_chan)

63

4

3

3

In [24]:
xvals = np.linspace(-1.5, 3.5, 5000)

In [27]:
subject_table = produced_df[produced_df.subject == SUBJECT].reset_index(drop=1)
#subject_table = find_full_3_trial(subject_table).reset_index(drop=1)
subject_table = subject_table[subject_table.syl_id == 1].reset_index(drop=1)

cue_df = pd.read_csv(f'/Volumes/Nexus/DBS/{SUBJECT}/Preprocessed Data/Sync/annot/{SUBJECT}_stimulus_syllable.txt', sep='	', index_col='id')
subj_df_cue = pd.merge(subject_table, cue_df, on=['session_id', 'trial_id', 'syl_id'], how="inner")


In [98]:
session = 2

if os.path.isfile(f'textgrid/{SUBJECT}_Session{session}_Tr1.TextGrid') == 0:
    print('None exist')
else:
    grid_info = get_grid(SUBJECT, session)
    annot_path = f'/Volumes/Nexus/DBS/{SUBJECT}/Preprocessed Data/Sync/annot/'
    sync_df = pd.read_csv(annot_path + f'{SUBJECT}_sync.txt', sep='	', index_col='id')
    sync_df = sync_df[sync_df['sync_channel']=="*Tr1"].reset_index(drop=1)
    Fs = sync_df[sync_df['session_id']==session].reset_index(drop=1).Fs[0]
    s1 = sync_df[sync_df['session_id']==session].reset_index(drop=1).s1[0]
    t2 = sync_df[sync_df['session_id']==session].reset_index(drop=1).t2[0]
    t1 = sync_df[sync_df['session_id']==session].reset_index(drop=1).t1[0]
    s2 = sync_df[sync_df['session_id']==session].reset_index(drop=1).s2[0]
    grid_info['onset'] = (grid_info['xmin']*Fs - s1)*(t2-t1)/(s2-s1) + t1

None exist


In [14]:
grid_info

Unnamed: 0,text,xmin,xmax,onset
0,Mark,2.857601,3.073926,41271.547058
1,Mark,4.149942,5.154828,41272.839384
2,Mark,286.449051,287.322404,41555.13529
3,Mark,287.629337,288.961114,41556.315563
4,Mark,309.836943,312.099924,41578.522917
5,Mark,312.984307,314.805096,41581.670245
6,Mark,320.245093,320.921386,41588.930949
7,Mark,321.6416,323.246794,41590.32744
8,Mark,555.760309,557.616973,41824.443493
9,Mark,558.444132,561.445832,41827.127285


In [72]:
time_min
time_max

51635.985

51649.54000000001

In [78]:
dbs_mat = np.zeros((grid_info.shape[0], len(dbs_chan), 5000))
macro_mat = np.zeros((grid_info.shape[0], len(macro_chan), 5000))
micro_mat = np.zeros((grid_info.shape[0], len(micro_chan), 5000))


for item in range(grid_info.shape[0]):
    time_point = grid_info.iloc[item].onset
    #print(time_point)
    for i2 in range(nTrials):
        time_min = time_mat[i2][0].min()
        time_max = time_mat[i2][0].max()
        if (time_point < time_max - 3.5) and (time_point > time_min + 1.5):
            print(i2)
            for i3 in range(len(time_mat[i2][0])):
                if time_mat[i2][0][i3] > time_point:
                    print(i3)
                    if ~np.all(trial_mat[i2][dbs_chan][:, i3-1500:i3+3500]==0):
                        dbs_mat[item] = trial_mat[i2][dbs_chan][:, i3-1500:i3+3500]
                    if ~np.all(trial_mat[i2][macro_chan][:, i3-1500:i3+3500]==0):
                        macro_mat[item] = trial_mat[i2][macro_chan][:, i3-1500:i3+3500]
                    if ~np.all(trial_mat[i2][micro_chan][:, i3-1500:i3+3500]==0):

                        micro_mat[item] = trial_mat[i2][micro_chan][:, i3-1500:i3+3500]
                    break
                    break
                        #continue

165
3230
165
4410


In [66]:
trial_mat[i2][dbs_chan][:, i3-1500:i3+3500]

array([[ 65.64952163,  73.42103829,  79.68918887, ..., -45.92242528,
        -39.37871312, -31.45082948],
       [ 51.09862668,  58.48469291,  70.94963158, ..., -18.95763987,
        -12.06352393,  -3.98739555],
       [ 62.90149585,  79.38581323,  96.08489121, ..., -20.1413889 ,
        -18.18445165,  -9.20533485],
       ...,
       [ 88.9694509 ,  90.45078291, 101.67546966, ..., -21.28158733,
        -24.01422666, -23.92931269],
       [ 89.98270726,  94.70252917, 105.91514141, ..., -29.01144357,
        -28.24769846, -24.51960436],
       [ 91.63623818,  97.76136396, 106.74509403, ..., -25.34300986,
        -24.12731093, -20.16842462]])

In [37]:
trial_mat.shape
grid_info.shape[0]

(345,)

12

In [64]:
dbs_mat.shape
macro_mat.shape

(12, 16, 5000)

(12, 3, 5000)

In [65]:
dbs_mat

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [74]:
~np.all(trial_mat[i2][dbs_chan][:, i3-1500:i3+3500]==0)

True

In [47]:
macro_mat[2].shape

(3, 5000)

In [49]:
dbs_mat[2].shape

(16, 5000)

In [55]:
test = macro_mat[~np.all(macro_mat == 0, axis=(0, 1))]
test

IndexError: boolean index did not match indexed array along dimension 0; dimension is 12 but corresponding boolean dimension is 5000

In [60]:
test2 = ~np.all(macro_mat == 0, axis=(1))
test2.shape

(12, 5000)

In [61]:
macro_mat.shape

(12, 3, 5000)

In [95]:
grid_info.onset

0     41271.547058
1     41272.839384
2     41555.135290
3     41556.315563
4     41578.522917
5     41581.670245
6     41588.930949
7     41590.327440
8     41824.443493
9     41827.127285
10    41830.808334
11    41833.685147
Name: onset, dtype: float64

In [93]:
chanlabel = []
for item in range(subj_mat['label'].shape[0]):
    chanlabel.append(subj_mat['label'][item][0][0][0])
chanlabel = np.array(chanlabel)

fs = subj_mat['fsample'][0][0]
nChans = len(subj_mat['label'])
nTrials = len(subj_mat['trial'][0])

dbs_chan = []
for item in range(nChans):
    if chanlabel[item][:4] == "dbs_":
        dbs_chan.append(item)

time_mat = subj_mat['time'][0]
trial_mat = subj_mat['trial'][0]

subject_table = produced_df[produced_df.subject == SUBJECT].reset_index(drop=1)
#subject_table = find_full_3_trial2(subject_table).reset_index(drop=1)
subject_table = subject_table[subject_table.syl_id == 1].reset_index(drop=1)

#cue_df = pd.read_csv(f'Z:\DBS\\{SUBJECT}\Preprocessed Data\Sync\\annot\\{SUBJECT}_stimulus_syllable.txt', sep='	', index_col='id')
cue_df = pd.read_csv(f'/Volumes/Nexus/DBS/{SUBJECT}/Preprocessed Data/Sync/annot/{SUBJECT}_stimulus_syllable.txt', sep='	', index_col='id')
subj_df_cue = pd.merge(subject_table, cue_df, on=['session_id', 'trial_id', 'syl_id'], how="inner")

ecog_mat = np.zeros((grid_info.shape[0], 5000))

elec=3

for item in range(grid_info.shape[0]):
    time_point = grid_info.iloc[item].onset
    for i2 in range(nTrials):
        time_min = time_mat[i2][0].min()
        time_max = time_mat[i2][0].max()
        if (time_point < time_max - 3.5) and (time_point > time_min + 1.5):
            for i3 in range(len(time_mat[i2][0])):
                if time_mat[i2][0][i3] > time_point:
                    if len(dbs_chan)> 0:
                        good_trial = trial_mat[i2][dbs_chan][elec]
                        ecog_mat[item] = good_trial[i3-1500:i3+3500]


                    break
                    break
                    
elec_mat = ecog_mat[~np.all(ecog_mat == 0, axis=1)]
elec_mat.shape

(0, 5000)

(0, 5000)

In [None]:
dbs_mat = np.zeros((grid_info.shape[0], len(dbs_chan), 5000))
macro_mat = np.zeros((grid_info.shape[0], len(macro_chan), 5000))
micro_mat = np.zeros((grid_info.shape[0], len(micro_chan), 5000))


for item in range(grid_info.shape[0]):
    time_point = grid_info.iloc[item].onset
    #print(time_point)
    for i2 in range(nTrials):
        time_min = time_mat[i2][0].min()
        time_max = time_mat[i2][0].max()
        if (time_point < time_max - 3.5) and (time_point > time_min + 1.5):
            print(i2)
            for i3 in range(len(time_mat[i2][0])):
                if time_mat[i2][0][i3] > time_point:
                    print(i3)
                    if ~np.all(trial_mat[i2][dbs_chan][:, i3-1500:i3+3500]==0):
                        dbs_mat[item] = trial_mat[i2][dbs_chan][:, i3-1500:i3+3500]
                    if ~np.all(trial_mat[i2][macro_chan][:, i3-1500:i3+3500]==0):
                        macro_mat[item] = trial_mat[i2][macro_chan][:, i3-1500:i3+3500]
                    if ~np.all(trial_mat[i2][micro_chan][:, i3-1500:i3+3500]==0):

                        micro_mat[item] = trial_mat[i2][micro_chan][:, i3-1500:i3+3500]
                    break
                    break
                        #continue