In [9]:
import numpy as np
import scipy.io as sio
from scipy.signal import hilbert, coherence, welch
from scipy import signal
import pandas as pd
import os

In [10]:
##############################################################
#Filter the signal
def filtersignal(s,f):

    # Define bandpass filter parameters
    fs = 200  # Sampling frequency
    lowcut = f[0]  # Lower cutoff frequency (Hz)
    highcut = f[1]  # Upper cutoff frequency (Hz)
    order = 4  # Filter order

    # Design a bandpass Butterworth filter
    b, a = signal.butter(order, [lowcut/(fs/2), highcut/(fs/2)], btype='band')

    return signal.filtfilt(b, a, s)

##############################################################
#Phase Lag Index
def phase_lag_index(signal1, signal2):
    pli =  []
    freq_bands = [(4,8),(8,12),(12,24),(24,35),(35,45)]
    for f in freq_bands:
        signal1 = filtersignal(signal1,f)
        signal2 = filtersignal(signal2,f)
        # Calculate the analytic signals
        analytic_signal1 = hilbert(signal1)
        analytic_signal2 = hilbert(signal2)
        
        # Compute the phases
        phase1 = np.angle(analytic_signal1)
        phase2 = np.angle(analytic_signal2)
        
        # Calculate the phase differences
        phase_diff = phase1 - phase2
        
        # Compute the Phase Lag Index (PLI)
        pli.append(np.abs(np.mean(np.sign(phase_diff))))
    
    return pli

##################################################################
#Phase Locking Value
def phase_locking_value(signal1, signal2):
    plv =  []
    freq_bands = [(4,8),(8,12),(12,24),(24,35),(35,45)]
    for f in freq_bands:
        signal1 = filtersignal(signal1,f)
        signal2 = filtersignal(signal2,f)
    # Calculate the analytic signals
        analytic_signal1 = hilbert(signal1)
        analytic_signal2 = hilbert(signal2)
        
        # Compute the phases
        phase1 = np.angle(analytic_signal1)
        phase2 = np.angle(analytic_signal2)
        
        # Calculate the phase differences
        phase_diff = np.exp(1j * (phase1 - phase2))
        
        # Compute the Phase Locking Value (PLV)
        plv.append(np.abs(np.mean(phase_diff)))
    
    return plv

###################################################################
#Coherence
def coherence_signals(x, y):
    freqs, Cxy = coherence(x, y, fs=200, noverlap= 100,window='hann',nperseg=200)
    Cxy_t = np.mean(Cxy[np.logical_and(freqs >= 4, freqs < 8)])
    Cxy_a = np.mean(Cxy[np.logical_and(freqs >= 8, freqs < 12)])
    Cxy_bl = np.mean(Cxy[np.logical_and(freqs >= 12, freqs < 24)])
    Cxy_bh = np.mean(Cxy[np.logical_and(freqs >= 24, freqs < 35)])
    Cxy_g = np.mean(Cxy[np.logical_and(freqs >= 35, freqs < 45)])
    Cxy_f = [Cxy_t, Cxy_a, Cxy_bl, Cxy_bh, Cxy_g]
    return Cxy_f

#######################################################################
#PSD
def power_spectral_density(x):
    freqs,Pxx = welch(x, fs=200, noverlap= 100, window='hann',nperseg=200)
    Pxx_t = np.mean(Pxx[np.logical_and(freqs >= 4, freqs < 8)])/np.mean(Pxx[np.logical_and(freqs >= 4, freqs < 45)])
    Pxx_a = np.mean(Pxx[np.logical_and(freqs >= 8, freqs < 12)])/np.mean(Pxx[np.logical_and(freqs >= 4, freqs < 45)])
    Pxx_bl = np.mean(Pxx[np.logical_and(freqs >= 12, freqs < 24)])/np.mean(Pxx[np.logical_and(freqs >= 4, freqs < 45)])
    Pxx_bh = np.mean(Pxx[np.logical_and(freqs >= 24, freqs < 35)])/np.mean(Pxx[np.logical_and(freqs >= 4, freqs < 45)])
    Pxx_g = np.mean(Pxx[np.logical_and(freqs >= 35, freqs < 45)])/np.mean(Pxx[np.logical_and(freqs >= 4, freqs < 45)])
    Pxx_f = [Pxx_t, Pxx_a, Pxx_bl, Pxx_bh, Pxx_g]
    return Pxx_f

In [11]:
def data_processing(filepath, filename):
    mat = sio.loadmat(filepath)
    label = sio.loadmat('/home/desktop/Desktop/22104412_Docs/EEG-COGMusic/EEG-Emotion/Datasets/SEED/Data/label.mat')
    labels = label['label']
    labels = labels.transpose()
    ## clipped the data to 3 minutes
    data = []
    for k,v in mat.items():
        if 'eeg' in k:
            data.append(v[:,60*200:3*60*200])
    data = np.array(data)
    deap_consistent_indexs = [1,4,8,6,16,18,26,24,34,36,44,42,53,59,60,46,3,5,10,12,14,22,20,28,30,32,40,38,48,50,55,61]
    deap_consistent_indexs = np.array(deap_consistent_indexs)-1

    data = data[:,deap_consistent_indexs,:] # 32 channels

    ## Z score normalization
    data = (data - np.mean(data,2)[:,:,np.newaxis])/np.std(data,2)[:,:,np.newaxis]
    data.shape
    l_kfold = label['label'][0]

    ## two class
    ni = np.where(np.logical_or(labels == -1, labels == 1))[0]


    data_dict = {}
    labels_dict = {}
    for i in range(15):
        data_dict[str(i)] = data[i*1:(i+1)*1,:,:]
        labels_dict[str(i)] = labels[i*1:(i+1)*1,:]

    sampling_rate = 200

    for key,dt in data_dict.items():
        print(f'Dict Key {key} processed')
        lb = labels_dict[key]
        window_length = 3
        window_stride = 0.5
        segments = int((dt.shape[2]-window_length*sampling_rate)/(window_stride*sampling_rate) + 1)
        segmented_data_per_channel = np.zeros((dt.shape[0],dt.shape[1],segments, window_length*sampling_rate))
        for i in range(dt.shape[0]):
            for j in range(dt.shape[1]):
                for k in range(segments):
                    start = k*window_stride
                    stop = window_length+start
                    segmented_data_per_channel[i,j,k,:] = dt[i,j,int(start*sampling_rate):int(stop*sampling_rate)]
        segmented_data_per_channel = np.transpose(segmented_data_per_channel,(0,2,1,3))
        labels_segment = []
        dom_segment = []
        for i in range(segmented_data_per_channel.shape[0]):
            for j in range(segmented_data_per_channel.shape[1]):
                labels_segment.append(lb[i,:])
        labels_segment = np.array(labels_segment)
        labels_dict[key] = labels_segment
        data_dict[key] = segmented_data_per_channel[0]

    data_dict = np.array(list(data_dict.values()))
    labels_dict = np.array(list(labels_dict.values()))

    print(data_dict.shape,labels_dict.shape)



    dt = data_dict
    labels = labels_dict

    twoclass = []
    multiclass = []

    for fold in labels:
        tc = []
        mc = []
        for sample in fold:
            if (sample == -1):
                tc.append([1,0])
                mc.append([1,0,0])
            if (sample == 1):
                tc.append([0,1])
                mc.append([0,0,1])
            elif (sample == 0):
                mc.append([0,1,0])
                tc.append([0,0])
        twoclass.append(tc)
        multiclass.append(mc)


    twoclass = np.array(twoclass)
    multiclass = np.array(multiclass)

    feature_pli = np.zeros((15,235,32,32,5))
    feature_coh = np.zeros((15,235,32,32,5))
    feature_psd = np.zeros((15,235,32,5))
    for fold in range(dt.shape[0]):
        sample = dt[fold]
        for k in range(sample.shape[0]):
            data = sample[k]
            for i in range(data.shape[0]):
                j = 0
                while j<i:
                    feature_pli[fold,k,i,j,:] = coherence_signals(data[i],data[j])
                    feature_coh[fold,k,i,j,:] = phase_lag_index(data[i],data[j])
                    j+=1
                feature_psd[fold,k,i,:] = power_spectral_density(data[i])
        print(f'Processed {fold} trial')

    feature_coh_clipped = feature_coh
    feature_pli_clipped = feature_pli
    feature_psd_clipped = feature_psd
    dt_clipped = dt
    two_class = twoclass
    multi_class = multiclass

    print(feature_coh_clipped.shape, feature_pli_clipped.shape, feature_psd_clipped.shape, dt_clipped.shape,two_class.shape,multi_class.shape)

    sio.savemat('./datasets/SEED/Zscore_clipped/'+filename,{'coh' : feature_coh_clipped,
                                            'pli' : feature_pli_clipped,
                                            'psd' : feature_psd_clipped,
                                            'labels_kfold': l_kfold,
                                            'twoclass': two_class,
                                            'multiclass': multi_class,
                                            'EEGNet': dt_clipped,
                                            'ni': ni})


In [12]:
# folder_path = '/home/desktop/Desktop/22104412_Docs/EEG-COGMusic/EEG-Emotion/Datasets/SEED/Data/Preprocessed_EEG'
# for file in os.listdir(folder_path):
#     filepath = folder_path+os.path.sep+file
#     filename = file
#     data_processing(filepath, filename)

In [15]:
folder_path = '/home/desktop/Desktop/22104412_Docs/EEG-COGMusic/EEG-Emotion/Datasets/SEED/Data/Preprocessed_EEG'
dest_path = '/home/desktop/Desktop/22104412_Docs/EEG-COGMusic/DA-AFNet/datasets/SEED/Zscore_clipped'
list_files = os.listdir(dest_path)
for file in os.listdir(folder_path):
    if file in list_files:
        print(f'{file} Exists')
    elif file.endswith('.mat'):
        filepath = folder_path+os.path.sep+file
        filename = file
        data_processing(filepath, filename)
    else:
        print(f'Not Processable {file}')

8_20140521.mat Exists
10_20131211.mat Exists
15_20130709.mat Exists
15_20131105.mat Exists
5_20140411.mat Exists
9_20140704.mat Exists
6_20131113.mat Exists
14_20140601.mat Exists
7_20131030.mat Exists
7_20131106.mat Exists
3_20140603.mat Exists
9_20140620.mat Exists
13_20140603.mat Exists
2_20140404.mat Exists
4_20140705.mat Exists
3_20140629.mat Exists
10_20131204.mat Exists
7_20131027.mat Exists
11_20140618.mat Exists
14_20140627.mat Exists
11_20140630.mat Exists
5_20140418.mat Exists
8_20140511.mat Exists
12_20131207.mat Exists
8_20140514.mat Exists
2_20140413.mat Exists
2_20140419.mat Exists
1_20131030.mat Exists
6_20131016.mat Exists
15_20131016.mat Exists
5_20140506.mat Exists
4_20140621.mat Exists
3_20140611.mat Exists
4_20140702.mat Exists
9_20140627.mat Exists
12_20131201.mat Exists
1_20131107.mat Exists
Dict Key 0 processed
Dict Key 1 processed
Dict Key 2 processed
Dict Key 3 processed
Dict Key 4 processed
Dict Key 5 processed
Dict Key 6 processed
Dict Key 7 processed
Dict K