### Edgar Moises Hernandez-Gonzalez
#### 26/06/20
#### Clasificacion de calculo mental en EEG con CNN + LSTM
##### Caracteristicas = Espectrogramas STFT (3D)
##### Resize proporcional de (45 x n_canales) x 27 [1440x27]
##### Busqueda cuadricula

In [1]:
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
from scipy.signal import spectrogram
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import (TimeDistributed, Conv2D, MaxPool2D, Flatten, LSTM,
                          Dense, Dropout)
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier
from keras import backend as K

Using TensorFlow backend.


In [2]:
# para que este script funcione debe de decir 'channels_last', si dice 'channels_first' no sirve
K.image_data_format()

'channels_last'

In [3]:
# todos los canales
x = pd.read_csv("/content/drive/My Drive/MentalCalculation/Datos/MeCa_car_15_3.csv", header=None)

# etiquetas
y = pd.read_csv("/content/drive/My Drive/MentalCalculation/Datos/EtiquetasMeCa15RMFH-03.csv", header=None).values

In [4]:
print(x.shape)
print(y.shape)

(304, 43008)
(304, 1)


In [5]:
# en caso de no tener el y_test se puede utilizar el x_test
n_samples = len(y)

print("n_samples:", n_samples)

n_samples: 304


In [6]:
# esto se podria calcular asi n_clases = len(np.unique(y)), la y puede ser train o test
n_classes = len(np.unique(y))

print("n_classes:", n_classes)

n_classes: 2


In [7]:
# unir 32 canales y resize de tamaño proporcional
def unir_espectrogramas_vertical(data):
  #dim = (4, 352) #ancho, alto
  fs = 512
  datos = np.zeros((data.shape[0],1440,27))
  temporal = np.zeros((1440,27))
  for i in range(data.shape[0]): #n muestras
    for j in range(32): #n canales
      sig = data.iloc[i, j*1344:(j+1)*1344]
      #sig = sig[160:1184]
      #espectrograma de STFFT
      f, t, Sxx = spectrogram(sig, fs=512, window='hann', nperseg=512,
                              noverlap=480, nfft=1024, scaling='spectrum')
      temporal[j*45:(j+1)*45, :] = Sxx[16:61, :]
    #resized = cv2.resize(temporal, dim, interpolation=cv2.INTER_AREA)
    datos[i] = temporal
    if i % 100 == 0: #esto solo es para ver como avanza
      print(i)
  return datos

In [8]:
# llamar a unir_espectrogramas_vertical
inicio = time.time()

x = unir_espectrogramas_vertical(x)

fin = time.time()
print("Tiempo:", fin - inicio)

0
100
200
300
Tiempo: 9.753453731536865


In [9]:
print(x.shape)

(304, 1440, 27)


In [10]:
print(np.max(x))
print(np.min(x))
print(np.mean(x))

215.89295406746834
1.4105608913200777e-08
0.7601762349884014


In [11]:
# convertir a float
x = x.astype('float32')

# escalar los valores en un rango de 0 a 1 (normalizar)
x /= np.ceil(np.max(x))

In [12]:
print(np.max(x))
print(np.min(x))
print(np.mean(x))

0.99950445
6.530375e-11
0.0035193355


In [13]:
# convertir de 3D a 5D
x = x.reshape((x.shape[0], 1, x.shape[1], x.shape[2], 1))

print(x.shape)

(304, 1, 1440, 27, 1)


In [14]:
def CNN_LSTM(n_unidades):
  model = Sequential()
  model.add(TimeDistributed(Conv2D(2, (3,3), activation='relu', padding='same'),
                            input_shape=x.shape[1:]))
  model.add(TimeDistributed(MaxPool2D((2,2))))
  model.add(TimeDistributed(Conv2D(2, (3,3), activation='relu', padding='same')))
  model.add(TimeDistributed(MaxPool2D((2,2))))
  model.add(TimeDistributed(Flatten()))
  model.add(LSTM(n_unidades, activation='tanh', dropout=0.5))
  model.add(Dense(n_classes, activation='softmax'))

  optimizer = Adam(lr=1e-4)
  model.compile(optimizer = optimizer,
                loss = 'sparse_categorical_crossentropy',
                metrics = ['accuracy'])
  return model

In [14]:
def CNN_LSTM_2(n_unidades, n_neuronas):
  model = Sequential()
  model.add(TimeDistributed(Conv2D(2, (3,3), activation='relu', padding='same'),
                            input_shape=x.shape[1:]))
  model.add(TimeDistributed(MaxPool2D((2,2))))
  model.add(TimeDistributed(Conv2D(2, (3,3), activation='relu', padding='same')))
  model.add(TimeDistributed(MaxPool2D((2,2))))
  model.add(TimeDistributed(Flatten()))
  model.add(LSTM(n_unidades, activation='tanh', dropout=0.5))
  model.add(Dense(n_neuronas, activation='relu'))
  model.add(Dense(n_classes, activation='softmax'))

  optimizer = Adam(lr=1e-4)
  model.compile(optimizer = optimizer,
                loss = 'sparse_categorical_crossentropy',
                metrics = ['accuracy'])
  return model

In [15]:
#clf = KerasClassifier(build_fn = CNN_LSTM, epochs=400, batch_size=20, verbose=0)
clf = KerasClassifier(build_fn = CNN_LSTM_2, epochs=400, batch_size=20, verbose=0)

In [16]:
#n_unidades = [2, 4, 8, 16, 32, 64]
#hiperparametros = dict(n_unidades = n_unidades)

n_unidades = [4, 8]
n_neuronas = [16, 32]
hiperparametros = dict(n_unidades = n_unidades,
                       n_neuronas = n_neuronas)

In [17]:
# 3 redes = 460 seg
# 6 redes = 862 seg
3*2

6

In [18]:
grid = GridSearchCV(clf, hiperparametros, scoring='f1_macro', n_jobs=-1, cv=5)

In [19]:
inicio = time.time()

grid.fit(x, y)

fin = time.time()
print("Tiempo:", fin - inicio)



Tiempo: 909.1452016830444


In [20]:
print(grid.best_params_)
print(grid.best_score_)

{'n_neuronas': 16, 'n_unidades': 4}
0.8451064893953047


In [21]:
means = grid.cv_results_['mean_test_score']
stds = grid.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, grid.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
        % (mean, std * 2, params))

0.845 (+/-0.065) for {'n_neuronas': 16, 'n_unidades': 4}
0.834 (+/-0.102) for {'n_neuronas': 16, 'n_unidades': 8}
0.837 (+/-0.098) for {'n_neuronas': 32, 'n_unidades': 4}
0.838 (+/-0.069) for {'n_neuronas': 32, 'n_unidades': 8}


In [30]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime → "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Fri Jun 26 16:15:35 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.36.06    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   47C    P0    33W / 250W |   1358MiB / 16280MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces