In [1]:
# reading in mat files 
import os, glob
import h5py 

# handling data
import pandas as pd
import numpy as np

In [2]:
PATH = '/Users/Serena/Desktop/CRPS_decoding_paper/EEG Data/Results/'

# Get timepoints in seconds
time = np.genfromtxt(PATH + 'time.csv', delimiter = ',') * 1000 # transform into ms
# Quickly define the time windows identified 
windows = {'early' : [28,56], 'mid' : [84,124], 'midlate' : [180,320], 'late' : [364, 500]}

In [3]:
# Open mat files and extract classifier output (i.e. diagonal or TGM
def read_output(file, output_type = 'diag'):
    with h5py.File(PATH + file, 'r') as f:
        if output_type == 'tgm':
            pattern = pd.DataFrame(list(f['BDM']['ClassOverTime']))
        else:
            class_over_time = pd.DataFrame(list(f['BDM']['ClassOverTime']))
            # estract diagonal values
            pattern = np.array(np.diag(class_over_time))
            # smooth the timeseries
            # pattern = savgol_filter(pattern, filt[0], filt[1]) # window size 9, polynomial order 5
    return(pattern)

def build_dataframes(newpaths, dfnames, n_subjs = 13):
    # this function takes a list of paths and df names and uses read_output to 
    # extract decoding performance data, store it in a dataframe with time as index
    # and subjects as colums, and stores it using dfnames as variable names
    for i in range(len(newpaths)):
        globals()[dfnames[i]] = pd.DataFrame(np.nan, index=time, columns=range(n_subjs))
        
        os.chdir(PATH + newpaths[i])
        
        for n, file in enumerate(glob.glob("*.mat")):
            x = read_output(newpaths[i] + file)
            x = pd.DataFrame(x, index = time)
            globals()[dfnames[i]].iloc[:,n] = x

        # add a mean and SD column (that will be plotted later)
        globals()[dfnames[i]]['m'] = globals()[dfnames[i]].mean(numeric_only = True, axis=1)
        globals()[dfnames[i]]['s'] = globals()[dfnames[i]].std(numeric_only = True, axis=1)/4
        globals()[dfnames[i]]['sign'] = np.nan

In [4]:
# Read in group results and create a daframe from each condition-side-group
group_paths = [ 'allvsall/CRPS active/affected_active/ALL_NOSELECTION/',
                'allvsall/CRPS passive/affected_passive/ALL_NOSELECTION/',
                'allvsall/CRPS active/unaffected_active/ALL_NOSELECTION/',
                'allvsall/CRPS passive/unaffected_passive/ALL_NOSELECTION/',
                'allvsall/HV active/left_active/ALL_NOSELECTION/',
                'allvsall/HV passive/left_passive/ALL_NOSELECTION/',
                'allvsall/HV active/right_active/ALL_NOSELECTION/',
                'allvsall/HV passive/right_passive/ALL_NOSELECTION/']
group_dfnames = ['a_aff','p_aff','a_unaff','p_unaff','a_left','p_left', 'a_right', 'p_right']

build_dataframes(group_paths, group_dfnames)


In [5]:
# Read in CORRECT ONLY group results and create a daframe from each condition-side-group
noerr_paths = ['NoErr/affected/ALL_NOSELECTION/','NoErr/unaffected/ALL_NOSELECTION/',
               'NoErr/left/ALL_NOSELECTION/','NoErr/right/ALL_NOSELECTION/',
               'NoErr/CRPS/ALL_NOSELECTION/','NoErr/HV/ALL_NOSELECTION/',]

noerr_dfnames = ['aff_cor','unaff_cor','left_cor','right_cor', 'CRPS_cor', 'HV_cor']

build_dataframes(noerr_paths[:2], noerr_dfnames[:2], 12) # 1 file lost (lack of training examples)
build_dataframes(noerr_paths[2:4], noerr_dfnames[2:4]) # no files lost
build_dataframes([noerr_paths[4]], [noerr_dfnames[4]], 21) # 5 total lost (P14, P20 and left P18)
build_dataframes([noerr_paths[5]], [noerr_dfnames[5]], 26) # no files lost


In [6]:
Pid = ["P04","P07","P08","P10","P12","P13","P14","P15","P16","P17","P18","P19","P20"]
Hid = ["H07","H08","H10","H11","H14","H16","H17","H19","H20","H21","H22","H23","H25"]

dfs = [left_cor, right_cor, p_left, p_right, aff_cor, unaff_cor, p_aff, p_unaff]
temp_names = ["temp_a_left", "temp_a_right", "temp_p_left", "temp_p_right", 
              "temp_a_aff", "temp_a_unaff", "temp_p_aff", "temp_p_unaff"]

In [7]:
for i, df in enumerate(dfs):
    # create temporary (empty) dataframe
    temp = pd.DataFrame()
    # first four dataframes are HVs rest is CRPS
    temp['id'] = Hid if i < 4 else Pid
    temp['diagnosis'] = ['HV']*13 if i < 4 else ['CRPS']*13
    # dataframes in even posiitons (i%2==0) are 'Affected/non-dominat'
    temp['side'] = ['Affected/non-dominat']*13 if i % 2 == 0 else ['Unaffected/dominant']*13
    temp['condition'] = ['Active']*13 if i in [0,1,4,5] else ['Passive']*13
    
    for period in windows:
        # only select the timepoints in the window
        wind = df.loc[windows[period][0]:windows[period][1]]
        temp[period + ' mean'] = wind.mean(axis = 0) # calculate subjects' mean accuracy for period
        temp[period + ' peak'] = wind.max(axis = 0) # calculate subjects' peak accuracy for period
        temp[period + ' peak latency'] = wind.idxmax(axis = 0) # calculate subjects' peak latency for period
    
    # save current current dataset
    globals()[temp_names[i]] = temp   

In [10]:
# Finally stuck together all the created dataset and save the table into a .cvs file
final_dataset = pd.concat([temp_a_left, temp_a_right, temp_p_left, temp_p_right, 
              temp_a_aff, temp_a_unaff, temp_p_aff, temp_p_unaff], ignore_index=True)

final_dataset.to_csv(PATH + "ALLvsALLnoerr.csv")

In [9]:
final_dataset

Unnamed: 0,id,diagnosis,side,condition,early mean,early peak,early peak latency,mid mean,mid peak,mid peak latency,midlate mean,midlate peak,midlate peak latency,late mean,late peak,late peak latency
0,H07,HV,Affected/non-dominat,Active,0.261161,0.284325,40.0,0.254455,0.341032,104.0,0.264448,0.309960,256.0,0.274659,0.338968,364.0
1,H08,HV,Affected/non-dominat,Active,0.260218,0.353571,56.0,0.257327,0.300397,116.0,0.286023,0.402976,208.0,0.321012,0.414563,500.0
2,H10,HV,Affected/non-dominat,Active,0.302133,0.348373,36.0,0.210675,0.303373,108.0,0.210556,0.266151,312.0,0.223989,0.293571,488.0
3,H11,HV,Affected/non-dominat,Active,0.271369,0.340952,36.0,0.170390,0.221429,84.0,0.215847,0.266667,212.0,0.215143,0.298095,488.0
4,H14,HV,Affected/non-dominat,Active,0.239271,0.282500,44.0,0.262551,0.288611,100.0,0.226628,0.271389,208.0,0.291905,0.382500,484.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,P16,CRPS,Unaffected/dominant,Passive,0.231179,0.257714,36.0,0.214528,0.249810,116.0,0.224902,0.271429,216.0,0.193293,0.257714,368.0
100,P17,CRPS,Unaffected/dominant,Passive,0.281176,0.338333,48.0,0.313755,0.355476,108.0,0.238244,0.272262,184.0,0.203262,0.240714,364.0
101,P18,CRPS,Unaffected/dominant,Passive,0.200500,0.248667,56.0,0.239455,0.268667,104.0,0.189722,0.250000,192.0,0.184190,0.202667,412.0
102,P19,CRPS,Unaffected/dominant,Passive,0.311868,0.338333,36.0,0.277626,0.342722,92.0,0.222154,0.264833,292.0,0.198359,0.216444,452.0
