In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
from scipy import signal as sp_signal
import biosignalsnotebooks as bsnb
from statistics import mode
import pickle

In [2]:
%matplotlib inline
pd.set_option("display.max.columns", None)

In [3]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

In [4]:
data

Unnamed: 0,Time,C 1,C 3,Timestamp,Lecture code
0,0.000000,-17.213388,-3.959528,1.600423e+12,0
1,0.003906,-1.411259,-5.499511,1.600423e+12,0
2,0.007812,43.181385,-0.561774,1.600423e+12,0
3,0.011719,57.539547,3.926640,1.600423e+12,0
4,0.015625,3.823523,1.838967,1.600423e+12,0
...,...,...,...,...,...
116579,455.386719,2.641475,-1.816089,1.600424e+12,0
116580,455.390625,2.179395,-1.122937,1.600424e+12,0
116581,455.394531,1.176698,-0.354741,1.600424e+12,0
116582,455.398438,-1.783906,0.820132,1.600424e+12,0


In [7]:
class SignalProcessing:

    def __init__(self, df, columnas):
        self.df = df
        self.columnas = columnas
        self.matriz = None
        self.periods = None
        self.psd_matriz = None
        self.signals = None

    def get_periodos(self): # List[List[int]]
        lecture_codes = self.df['Lecture code'].values
        times = self.df['Time'].values
        periods = []
        size = len(times)
        active_signal = False
        current_period = []
        for i in range(size):
            code = int(lecture_codes[i])
            time = int(times[i])

            if code>=100 and code <=110:
                active_signal = True

            if active_signal and len(current_period)==0:
                current_period.append(i) # time

            if code >= 200 and active_signal:
                active_signal = False
                current_period.append(i) # time
                periods.append(tuple(current_period))
                current_period = []
        self.periods = periods
        return periods

    def get_periodos_segundos(self): # List[List[int]]
        lecture_codes = self.df['Lecture code'].values
        times = self.df['Time'].values
        periods = []
        size = len(times)
        active_signal = False
        current_period = []
        for i in range(size):
            code = int(lecture_codes[i])
            time = int(times[i])

            if code>=100 and code <=110:
                active_signal = True

            if active_signal and len(current_period)==0:
                current_period.append(time) # time

            if code >= 200 and active_signal:
                active_signal = False
                current_period.append(time) # time
                periods.append(tuple(current_period))
                current_period = []
        return periods
    
    def get_all_periods(self, periodos):
        last_end = -1
        new_periodos = []
        for i in range(len(periodos)): #asumiendo que nunca empezamos en 0
            init,end = periodos[i]
            new_period = [last_end+1, init]
            last_end = end
            new_periodos.append(new_period)
            new_periodos.append(periodos[i])
        return new_periodos

    def get_elements_per_window(self, periodos = None): # int
        if not periodos:
            periodos = self.get_periodos()
        difs = []
        for init,end in periodos:
            diferencia = end - init
            difs.append(diferencia)
        return mode(difs)
    
    def get_elements_per_second(self, window_size = 15):
        return int(self.get_elements_per_window()*window_size/15)

    def get_matriz(self, size_ventana = 15):
        if self.matriz:
            return self.matriz
        self.create_matriz(size_ventana)
        return self.matriz

    def create_matriz(self, size_ventana = 15):
        df = self.df
        columnas = self.columnas
        lecture_codes = self.df['Lecture code'].values
        periodos = self.get_periodos() #periodos con codigos.. 101 102 5000 9000
        elements_per_second = self.get_elements_per_window(periodos)
        matriz = {} # signal_code: columnas x time x epochs
        elementos_por_ventana = int(elements_per_second*size_ventana/15)
        all_periods = self.get_all_periods(periodos)# [5000 9000] [0 5000, 5000 9000, 90001 ...]
#         print(all_periods)
        for nombre_columna in columnas:
            columna = df[nombre_columna].values
            epochs = []
            epochs_per_signal = {}
            for init,end in periodos: # TODO periodos tendran que ser TODOS los periodos
                current_epoch = []
                current_signal = lecture_codes[init]
#                 print('soy un periodo', init,end, current_signal)
                for i in range(init,end):
                    freq = columna[i]
                    current_epoch.append(freq)
                    if len(current_epoch) == elementos_por_ventana:
                        if current_signal not in epochs_per_signal:
                            epochs_per_signal[current_signal] = []
                        epochs_per_signal[current_signal].append(current_epoch)
                        epochs.append(current_epoch)
                        current_epoch = []
#             At this point we have epochs_per_signal like this, for just 1 chanel
#             101: [[],[]]
#             102: [[],[]]
            for signal,epochs in epochs_per_signal.items():
                if signal not in matriz:
                    matriz[signal] = []
                matriz[signal].append(epochs) #agregamos los epochs del canal de la señal
        self.matriz = matriz

    def apply_emg_filter(self):
        for columna in self.columnas:   
            self.df[columna], _, _ = self.filter_emg(self.df[columna])

    def filter_emg(self, emg, low_pass=10, sfreq = 256, high_band = 4, low_band = 40):
        # Normalize cut off freq to sampling freq
        high_band = high_band/(sfreq/2)
        low_band = low_band/(sfreq/2)

        # Create bandpass filter for emg
        b1, a1 = sp.signal.butter(4, [high_band, low_band], btype='bandpass')

        # process emg signal: filter emg
        emg_filtered = sp.signal.filtfilt(b1,a1,emg)

        # Process emg signal recify
        emg_rectified = abs(emg_filtered)

        # low pass filter and apply to rectified signal to get emg envelope
        low_pass = low_pass/(sfreq/2)
        b2, a2 = sp.signal.butter(4, low_pass, btype='lowpass')
        emg_envelope= sp.signal.filtfilt(b2, a2, emg_rectified)
        return emg_filtered, emg_rectified, emg_envelope

    
    def plot_df(self, canales, init = None, end = None):
        self.df.iloc[init:end].plot(x='Time', y=canales, figsize=(20,8))
    
    def plot_epoch(self, epoch, canales):
        init, end = self.periods[epoch]
        self.df.iloc[init:end].plot(x='Time', y=canales, figsize=(20,8))

    def plot_all_epochs(self, canal, inicio = None, fin = None):
        total_rows = len(self.df[self.columnas[0]].values)
        temp_arr = []
        for i in range(total_rows):
            temp_arr.append(0)
        periodos = None
        if self.periods:
            periodos = self.periods
        else:
            periodos = self.get_periodos()
        for init,end in periodos:
            for i in range(init,end):
                temp_arr[i] = 30
        temp_df = self.df.copy()
        temp_df['Epochs'] = temp_arr
        canales = [canal, 'Epochs']
        inicio = inicio if inicio else 0
        fin = fin if fin else total_rows
        temp_df.iloc[inicio:fin].plot(x='Time', y=canales, figsize=(20,8))
    
    def generate_psd_matrix(self):
        psd_matriz = {}
        for signal,canales in self.matriz.items():
            current_matriz = []
            for canal in canales:
                cnl = []
                for epoch in canal:
                    freqs, psd = sp_signal.periodogram(epoch, 256) # fs
#                     freqs, psd = sp_signal.welch(epoch)
#                     print(freqs,psd)
                    cnl.append([freqs,psd])
                current_matriz.append(cnl)
            # current matriz tiene la info de una señan
            psd_matriz[signal] = current_matriz
        self.psd_matriz = psd_matriz
    
    def plot_epoch_psd(self, signal, canal, epoch):
        freqs = self.psd_matriz[signal][canal][epoch][0]
        psd = self.psd_matriz[signal][canal][epoch][1]
        plt.figure(figsize=(5, 4))
        plt.semilogx(freqs, psd)
        plt.title('PSD: power spectral density')
        plt.xlabel('Frequency')
        plt.ylabel('Power')
        plt.tight_layout()
    
    def plot_signal_promedio(self, signal, canal):
        
        frecuencias = np.copy(self.psd_matriz[signal][canal][0])[0] #primer epoch y sumo los demas
        psds = np.copy(self.psd_matriz[signal][canal][0])[1] #primer epoch y sumo los demas
    
        for pos in range(1,len(self.psd_matriz[signal][canal])):
            current_epoch = np.copy(self.psd_matriz[signal][canal][pos])
            frecuencias+= current_epoch[0]
            psds += current_epoch[1]
        frecuencias /= len(self.psd_matriz[signal][canal])
        psds /= len(self.psd_matriz[signal][canal])        
        plt.figure(figsize=(5, 4))
        plt.semilogx(frecuencias, psds)
        plt.title('PSD: power spectral density')
        plt.xlabel('Frequency')
        plt.ylabel('Power')
        plt.tight_layout()
    
    def create_df(self):
        rows_matriz = []
        for signal,canales in self.psd_matriz.items():
            for i in range(len(canales[0])):
                # tomar los x epochs en i de los x canales
                #iterar en canales
                epochs_per_chanel = []
                for canal in canales:
                    epochs_per_chanel.extend(canal[i][1]) #1 es psd
                epochs_per_chanel.append(signal)
                rows_matriz.append(epochs_per_chanel)  
        #         df['column_name']=pd.Series(canal)
        self.df_psd = pd.DataFrame.from_records(rows_matriz)
    
    def create_mlp_classifier(self, size = 15):
        self.apply_emg_filter()
        self.create_matriz(size)
        self.generate_psd_matrix()
        self.create_df()

        X = self.df_psd.iloc[:,:-1].values
        y = [element[0] for element in self.df_psd.iloc[:,-1:].values]

        # Create classifier
        clf = MLPClassifier(max_iter=1000)
        clf.fit(X, y)
        filename = 'signal_clf.sav'
        pickle.dump(clf, open(filename, 'wb'))

In [9]:
# canales = ['C 1', 'C 3']

# signal_processing = SignalProcessing(data, canales)
# signal_processing.create_mlp_classifier(.2)

In [None]:
canales = ['C 1', 'C 3']
signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(1)

In [None]:
canales = ['C 1', 'C 3']
signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.2)
signal_processing.generate_psd_matrix()
signal_processing.create_df()

In [None]:
signal_processing.df.head()

In [None]:
# signal_processing.plot_df('C 1', 14000, 15000)

In [None]:
# signal_processing.apply_emg_filter()

In [None]:
# signal_processing.plot_df('C 1', 14000, 15000)

In [None]:
# signal_processing.create_matriz(.5)

In [None]:
# print(len(matriz), len(matriz[101]), len(matriz[101][0]),len(matriz[101][0][0]))

In [None]:
# signal_processing.get_elements_per_window()

In [None]:
# signal_processing.get_periodos_segundos()

In [None]:
# signal_processing.plot_all_epochs('C 3')

In [None]:
# signal_processing.generate_psd_matrix()

In [None]:
# signal_processing.psd_matriz[101][0][0]

In [None]:
# signal_processing.plot_epoch_psd(101,0,0) # signal, canal, epoch

In [None]:
# signal_processing.plot_epoch_psd(102,1,2) # signal, canal, epoch todo freq

In [None]:
# signal_processing.plot_signal_promedio(101,0) # señal 101 canal 0

In [None]:
# signal_processing.plot_signal_promedio(101,1) # señal 101 canal 1

In [None]:
# signal_processing.plot_signal_promedio(102,0) # señal 102 canal 0

In [None]:
# signal_processing.plot_signal_promedio(102,1) # señal 101 canal 0

In [None]:
# len(signal_processing.psd_matriz[101][0])

In [None]:
# Preparar datos para clasificador

In [None]:
# signal_processing.psd_matriz.keys()

In [None]:
""" 
Formato deseado
df
canal1, canal2, señal
1,       2,      101 epoch 1 del 101
1,       2,      101 epoch 2 del 101
....
2,       4,      102 epoch 1 del 102
2,       4,      102 epoch 2 del 102
"""

In [None]:
# Classify time

In [10]:
import pandas as pd
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import PCA

In [None]:
ventanas = [.05,.1, .2, .5, 1, 2, 5, 10, 15]
modelos = ['SVC linear .25', 'SVC', 'KNeighbors', 'Decision Tree', 'Multi layer perceptron', 'Naive Bayes']
canales = ['C 1', 'C 3']
resultados_ventanas = []

for ventana in ventanas:
    print(ventana)
    # set up matriz and create format for classification
    signal_processing = SignalProcessing(data, canales)
    signal_processing.apply_emg_filter()
    signal_processing.create_matriz(ventana)
    signal_processing.generate_psd_matrix()
    signal_processing.create_df()

#     Prepare data
    df = signal_processing.df
    X = df.iloc[:,:-1].values
    y = [element[0] for element in df.iloc[:,-1:].values]
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    pca = PCA(.95)
    pca.fit(X)
    X_pca = pca.transform(X)
#   Classify
    models = [
                svm.SVC(kernel='linear', C=.025, random_state=42), 
                svm.SVC(), 
                KNeighborsClassifier(n_neighbors=len(np.unique(y))), 
                DecisionTreeClassifier(random_state=0),
                MLPClassifier(max_iter=1000),
                GaussianNB()
            ]
    classifiers_scores = []
    for clf in models:
        scores = cross_val_score(clf, X_pca, y, cv=5)
        classifiers_scores.append(scores.mean())
    resultados_ventanas.append(classifiers_scores)

In [None]:
pd.DataFrame(   data=resultados_ventanas,    # values
                index=ventanas,    # 1st column as index
                columns=modelos)

In [None]:
# Escalar los datos?

### Parametros para MLP finding

In [None]:
from sklearn.model_selection import GridSearchCV

signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(2)
signal_processing.generate_psd_matrix()
signal_processing.create_df()

#     Prepare data
df = signal_processing.df
X = df.iloc[:,:-1].values
y = [element[0] for element in df.iloc[:,-1:].values]

param_grid = [
        {
            'activation' : ['identity', 'logistic', 'tanh', 'relu'],
            'solver' : ['lbfgs', 'sgd', 'adam'],
            'hidden_layer_sizes': [
             (1,),(2,),(3,),(4,),(5,),(6,),(7,),(8,),(9,),(10,),(11,), (12,),(13,),(14,),(15,),(16,),(17,),(18,),(19,),(20,),(21,)
             ]
        }
       ]

# clf = GridSearchCV(MLPClassifier(max_iter=1000), param_grid, cv=3,
#                            scoring='accuracy')
# clf.fit(X,y)


# print("Best parameters set found on development set:")
# print(clf.best_params_)

clf = svm.SVC(kernel='linear', C=.025, random_state=42)
scores = cross_val_score(clf, X, y, cv=5)
scores.mean()

### Sin PCA parece que funciona bien, veamos

In [None]:
ventanas = [.05,.1, .2, .5, 1, 2, 5, 10, 15]
modelos = ['SVC linear .25', 'SVC', 'KNeighbors', 'Decision Tree', 'Multi layer perceptron', 'Naive Bayes']
canales = ['C 1', 'C 3']
resultados_ventanas = []

for ventana in ventanas:
    # set up matriz and create format for classification
    signal_processing = SignalProcessing(data, canales)
    signal_processing.apply_emg_filter()
    signal_processing.create_matriz(ventana)
    signal_processing.generate_psd_matrix()
    signal_processing.create_df()

#     Prepare data
    df = signal_processing.df
    X = df.iloc[:,:-1].values
    y = [element[0] for element in df.iloc[:,-1:].values]
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
#   Classify
    models = [
                svm.SVC(kernel='linear', C=.025, random_state=42), 
                svm.SVC(), 
                KNeighborsClassifier(n_neighbors=len(np.unique(y))), 
                DecisionTreeClassifier(random_state=0),
                MLPClassifier(max_iter=1000),
                GaussianNB()
            ]
    classifiers_scores = []
    for clf in models:
        scores = cross_val_score(clf, X, y, cv=5)
        classifiers_scores.append(scores.mean())
    resultados_ventanas.append(classifiers_scores)

In [None]:
pd.DataFrame(   data=resultados_ventanas,    # values
                index=ventanas,    # 1st column as index
                columns=modelos)

In [None]:
for i in range(len(modelos)):
    modelo = modelos[i]
    y = []
    for resultados in resultados_ventanas:
        y.append(resultados[i])

    plt.plot(ventanas, y)
    plt.title(modelo)
    plt.xlabel('Ventana')
    plt.ylabel('Score')
    plt.show()

### Unos mejoran y otros empeoran, usaré un multi layer perceptron con ventana de .1

In [11]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']

signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.1)
signal_processing.generate_psd_matrix()
signal_processing.create_df()

#     Prepare data
df = signal_processing.df
X = df.iloc[:,:-1].values
y = [element[0] for element in df.iloc[:,-1:].values]

# Create classifier
clf = MLPClassifier(max_iter=1000)
clf.fit(X, y)
# clf.score(X_test, y_test)

MLPClassifier(max_iter=1000)

### Open new file

In [12]:
archivo = 'Izquierda - Derecha - Cerrado 2.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']

signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.1)
signal_processing.generate_psd_matrix()
signal_processing.create_df()

In [13]:
df = signal_processing.df
X = df.iloc[:,:-1].values
y = [element[0] for element in df.iloc[:,-1:].values]

In [14]:
scores = cross_val_score(clf, X, y, cv=5)
scores.mean()

0.9992278654770075

### Let's try with abierto cerrado

In [None]:
archivo = 'Abierto - Cerrado - Normal 1.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

signal_processing = SignalProcessing(data, canales)
signal_processing.create_mlp_classifier(.2)
# signal_processing.apply_emg_filter()
# signal_processing.create_matriz(.1)
# signal_processing.generate_psd_matrix()
# signal_processing.create_df()

# #     Prepare data
# df = signal_processing.df
# X = df.iloc[:,:-1].values
# y = [element[0] for element in df.iloc[:,-1:].values]

# # Create classifier
# clf = MLPClassifier(max_iter=1000)
# clf.fit(X, y)
# create_mlp_classifier

### Let's test with abierto cerrado normal 2

In [None]:
archivo = 'Abierto - Cerrado - Normal 2.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.1)
signal_processing.generate_psd_matrix()
signal_processing.create_df()

#     Prepare data
df = signal_processing.df
X = df.iloc[:,:-1].values
y = [element[0] for element in df.iloc[:,-1:].values]

scores = cross_val_score(clf, X, y, cv=5)
scores.mean()

### Pseudo online

In [None]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.2)
signal_processing.generate_psd_matrix()
signal_processing.create_df()

#     Prepare data
df = signal_processing.df
X = df.iloc[:,:-1].values
y = [element[0] for element in df.iloc[:,-1:].values]

# Create classifier
clf = MLPClassifier(max_iter=1000)
clf.fit(X, y)
# clf.score(X_test, y_test)