<a href="https://colab.research.google.com/github/DreamSki/DEAP--emotion-classifier/blob/main/lstm_lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import tensorflow as tf
import os
import numpy as np
import _pickle as cPickle
import tensorflow_probability as tfp


import matplotlib.pyplot as plt
from tqdm import tqdm

In [3]:
tf.test.gpu_device_name()

'/device:GPU:0'

In [4]:
gpus = tf.config.list_physical_devices("GPU")
 
if gpus:
    gpu0 = gpus[0] #如果有多个GPU，仅使用第0个GPU
    tf.config.experimental.set_memory_growth(gpu0, True) #设置GPU显存用量按需使用
    # 或者也可以设置GPU显存为固定使用量(例如：4G)
    #tf.config.experimental.set_virtual_device_configuration(gpu0,
    #    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)]) 
    tf.config.set_visible_devices([gpu0],"GPU") 

In [5]:
assert(tf.test.gpu_device_name())

tf.keras.backend.clear_session()
tf.config.optimizer.set_jit(False)

In [6]:
deap_dir = "/content/drive/MyDrive/data_preprocessed_python"
channel = [1,2,3,4,6,11,13,17,19,20,21,25,29,31]

In [7]:
def load_DEAP(data_dir, n_subjects = 32, only_phys = False, only_EEG = True):
    # get all files name to a list
    filenames = os.listdir(data_dir)
    filepaths = []
    for i in filenames:
        filepath = data_dir + "/" + i
        filepaths.append(filepath)
    all_data = []
    all_labels = []
    if n_subjects < 16:
        filepaths = filepaths[:n_subjects]
    else:
        filepaths = filepaths[-n_subjects:]
    print(len(filepaths))
    for filepath in filepaths:
        loaddata = cPickle.load(open(filepath, 'rb'), encoding="latin1",)
        labels = loaddata['labels']
        new_data = loaddata['data'].astype(np.float32)
        if only_phys:
            new_data = new_data[:, 32:, :]
        elif only_EEG:
            #print(new_data.head())
            new_data = new_data[:, channel, :]
        all_labels.append(labels)
        all_data.append(new_data)
    all_labels = np.array(all_labels)
    all_data = np.array(all_data)

    all_data = all_data.reshape(-1, 14, all_data.shape[-1])
    all_data = all_data[:, :, 3*128:]
    all_labels = all_labels.reshape(-1, all_labels.shape[-1])
    print("data shape: ", all_data.shape)

    # all_data = [all_data[:, 128*i: 128*(i+10)] for i in range(6)]
    # all_data = np.concatenate((all_data[0], all_data[1], all_data[2], all_data[3], all_data[4], all_data[5]), axis = 0)
    # # print(all_labels.shape)
    # print(all_data.shape)

    return all_data.astype(np.float16), all_labels

In [8]:
def feature_extraction(all_data, labels, label_type = 'valence', task = "R", C = 14, N = 10, K = 8, L = 2):
    if label_type == "valence":
            labels = labels[:, 0].squeeze()
    elif label_type == 'arousal':
        label = labels[:, 1].squeeze()
    # drop data which has label == 5
    hl_indices = np.where(labels != 5)
    labels = labels[hl_indices]
    all_data = all_data[hl_indices]
    n_samples = 8*32

    all_segmented_data = []
    for r in range(0, len(all_data), n_samples):
        print(r + n_samples, len(all_data))
        if r + n_samples < len(all_data):
            data = all_data[r: r+n_samples]
        else:
            data = all_data[r:]
        data = data[:, :, :int(data.shape[-1]/L)*L]
        
        # reshape: original shape: (_, C, 8064) -> (_, C, t * N * K, L) 前后两段
        data = data.reshape(data.shape[0], data.shape[1], -1, L)
        
        # calculate single channel feature (it's the mean value)
        data = np.mean(data, axis = -1).squeeze()
        
        # reshape : original shape: (_, C, N*K, 1) -> (_, C, t * N, K)
        data = data.reshape(data.shape[0], data.shape[1], -1, K)
        # segmenting ovelap: overlap ratio = (N-1)/N
        segmented_data = []
        for i in range(0, data.shape[-2] - N, 1):  # ( _, N , C , K)
            segmented_data.append(np.transpose(data[:, :, i:i+N, :], (0, 2, 1, 3)))
        # reshape to calculate cov matrix
        segmented_data = np.array(segmented_data).reshape(-1, C, K)
        print(segmented_data.shape)
        # calculate pearson corvariance matrix
        segmented_corr = tfp.stats.correlation(
            segmented_data, y = None, sample_axis=-1, event_axis=-2, keepdims=False, name=None
        )
        # output = np.array(output).reshape(-1, C*C)
        all_segmented_data.append(np.array(segmented_corr))

    all_segmented_data = np.concatenate(all_segmented_data, axis = 0 )
    upper_indices = np.triu_indices_from(np.ndarray((C, C)), k=1)
    sample_matrix = np.arange(0, C*C).reshape((C, C))
    upper_values  = list(sample_matrix[upper_indices])
    all_segmented_data = all_segmented_data.reshape(-1, N, C*C)
    output = all_segmented_data[:, :, upper_values]
    print(output.shape)
    # output = []

    # for i in range(len(all_segmented_data)):
    #     # get only the upper triangular of the covariance matrix)
    #     cov_matrix = all_segmented_data[i]
    #     output.append(list(cov_matrix[upper_indices]))

    # output = np.array(output).reshape(int(len(all_segmented_data)/N), N, -1)

    if task == 'C':
        labels = np.where(labels > 5, 1, 0)
    segmented_labels = np.repeat(labels, repeats = int(output.shape[0]/len(labels)), axis = 0)
    print(segmented_labels)
    return output, np.array(segmented_labels)

In [9]:
import tensorflow_datasets as tfds
!pip install tensorflow-addons
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
from tensorflow.keras.layers import Dense, Flatten, LSTM, Dropout, Input
from tensorflow.keras import Sequential, Model
from sklearn.model_selection import train_test_split
#from dataset_prepare import load_DEAP, feature_extraction
import matplotlib.pyplot as plt

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.14.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[?25l[K     |▎                               | 10 kB 34.0 MB/s eta 0:00:01[K     |▋                               | 20 kB 34.5 MB/s eta 0:00:01[K     |▉                               | 30 kB 18.7 MB/s eta 0:00:01[K     |█▏                              | 40 kB 16.0 MB/s eta 0:00:01[K     |█▌                              | 51 kB 8.5 MB/s eta 0:00:01[K     |█▊                              | 61 kB 8.5 MB/s eta 0:00:01[K     |██                              | 71 kB 8.7 MB/s eta 0:00:01[K     |██▍                             | 81 kB 9.7 MB/s eta 0:00:01[K     |██▋                             | 92 kB 10.2 MB/s eta 0:00:01[K     |███                             | 102 kB 8.2 MB/s eta 0:00:01[K     |███▎                            | 112 kB 8.2 MB/s eta 0:00:01[K     |███▌                            | 122 kB 8.2 MB/s eta 0:00:01[K     |█

In [23]:

def generate_model():
  model = tf.keras.models.Sequential([
    tf.keras.layers.Input((10,91)),
    tf.keras.layers.LSTM(units = 256 ,activation= 'relu', kernel_initializer=tf.keras.initializers.VarianceScaling(), return_sequences= True),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(units = 256 ,activation= 'relu',kernel_initializer=tf.keras.initializers.VarianceScaling(), return_sequences= False),
    # Softmax should be done in float32 for numeric stability.
    #tf.keras.layers.Activation('sigmoid', dtype='float32'),
    tf.keras.layers.Dense(1, activation = "sigmoid", kernel_initializer = tf.keras.initializers.GlorotNormal())
  ])

  return model

In [97]:
data.shape

(1280, 14, 7680)

In [11]:
task = "C"
data, labels = load_DEAP(deap_dir, n_subjects = 32)
output, out_labels = feature_extraction(data, labels, label_type = 'valence', task = task, L = 8)

32
data shape:  (1280, 14, 7680)
256 1264
(281600, 14, 8)
512 1264
(281600, 14, 8)
768 1264
(281600, 14, 8)
1024 1264
(281600, 14, 8)
1280 1264
(264000, 14, 8)
(139040, 10, 91)
[1 1 1 ... 1 1 1]


In [12]:
X_train, X_test, y_train, y_test = train_test_split(output, out_labels, stratify = out_labels, test_size = 0.2, shuffle = True, random_state = 42)

In [18]:
def compile_model(model):
  opt = tf.keras.optimizers.Adam(learning_rate = 1e-3)
  model.compile(loss = "binary_crossentropy",metrics = ["accuracy"], optimizer = opt)
  return model


def train_model(model, x_train, y_train, x_test, y_test, epochs=50):
  model.fit(x_train, y_train, batch_size=256, epochs=epochs, validation_data=(x_test, y_test), shuffle=True)

def warmup(model, x_train, y_train, x_test, y_test):
  # Warm up the JIT, we do not wish to measure the compilation time.
  initial_weights = model.get_weights()
  train_model(model, x_train, y_train, x_test, y_test, epochs=1)
  model.set_weights(initial_weights)



In [24]:
tf.keras.backend.clear_session()
tf.config.optimizer.set_jit(True) # Enable XLA.
model = compile_model(generate_model())



In [25]:
assert(tf.test.gpu_device_name())

In [None]:
warmup(model, X_train, y_train, X_test, y_test)
%time train_model(model, X_train, y_train, X_test, y_test)

In [27]:
import pandas as pd

In [108]:
from scipy import signal

In [109]:
def bandpass_filter(data, fs=128, low=4, high=45): #no reference for freq    
    nyq = 0.5 * fs
    low = low / nyq
    high = high / nyq
    
    try:
        assert len(data.shape) == 2
    except:
        print("Error: please check data shape, it should be 2D array of a raw signal (nsamples, 3000).")
        raise ValueError

    order = 2
    b, a = signal.butter(order, [low, high], btype='band')
    tmp = signal.filtfilt(b, a, np.concatenate(data))
    
    return tmp.reshape((data.shape[0], data.shape[1]))

In [110]:
my_data = []
for i in range(12):
  mydata = pd.read_excel("/content/drive/MyDrive/EEG Raw Data/Trail"+str(i+1)+".xlsx",header=1)
  a = bandpass_filter(np.array(mydata[ch])[:7428]).tolist()
  
  my_data.append(a)


In [44]:
ch = ['EEG.AF3','EEG.F3','EEG.F7','EEG.FC5','EEG.T7','EEG.P7','EEG.O1','EEG.AF4','EEG.F4','EEG.F8','EEG.FC6','EEG.T8','EEG.P8','EEG.O2']

In [111]:
me_data = np.array(my_data)

In [88]:
z=pd.read_csv("/content/drive/MyDrive/selfassessmentmanikin/data/selfassessmentmanikin_raw_2021.08.18_2021-08-18-06-40-12-878.iqdat",sep="\t")

In [89]:
z.columns

Index(['build', 'computer.platform', 'date', 'time', 'subject', 'group',
       'sessionid', 'blockcode', 'blocknum', 'trialcode', 'trialnum',
       'trialCount', 'targetIndex', 'target', 'valenceSelected',
       'arousalSelected', 'rt'],
      dtype='object')

In [94]:
label = np.array(z[["valenceSelected","arousalSelected"]])

In [114]:
output, out_labels = feature_extraction(me_data1, label, label_type = 'valence', task = task, L = 8)

256 10
(10600, 14, 8)
(1060, 10, 91)
[0 0 0 ... 0 0 0]


In [113]:
me_data1 = me_data.transpose(0,2,1)

In [103]:
output.shape

(1060, 10, 91)

In [104]:
out_labels.shape

(1060,)

In [115]:
model.evaluate(output,out_labels)



[6.240941524505615, 0.5896226763725281]