In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
from scipy import signal as sp_signal
import biosignalsnotebooks as bsnb
from statistics import mode
import pickle

In [2]:
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import PCA

In [3]:
def open_file(archivo, canales):
    df = pd.read_csv(archivo, sep='\t', header=None)
    df.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
    unwanted = ['C 2', 'C 4']
    del df['Dummy']
    for column in unwanted:
        del df[column]
    return df

In [4]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']
data.head()

Unnamed: 0,Time,C 1,C 3,Timestamp,Lecture code
0,0.0,-17.213388,-3.959528,1600423000000.0,0
1,0.003906,-1.411259,-5.499511,1600423000000.0,0
2,0.007812,43.181385,-0.561774,1600423000000.0,0
3,0.011719,57.539547,3.92664,1600423000000.0,0
4,0.015625,3.823523,1.838967,1600423000000.0,0


In [5]:
class SignalProcessing:
    def __init__(self, df, columnas):
        self.df = df
        self.columnas = columnas
        self.matriz = None
        self.periodos = None
        self.psd_matriz = None
        self.signals = None

    def apply_emg_filter(self):
        for columna in self.columnas:   
            self.df[columna], _, _ = self.filter_emg(self.df[columna])

    def filter_emg(self, emg, low_pass=10, sfreq = 256, high_band = 4, low_band = 40):
        # Normalize cut off freq to sampling freq
        high_band = high_band/(sfreq/2)
        low_band = low_band/(sfreq/2)

        # Create bandpass filter for emg
        b1, a1 = sp.signal.butter(4, [high_band, low_band], btype='bandpass')

        # process emg signal: filter emg
        emg_filtered = sp.signal.filtfilt(b1,a1,emg)

        # Process emg signal recify
        emg_rectified = abs(emg_filtered)

        # low pass filter and apply to rectified signal to get emg envelope
        low_pass = low_pass/(sfreq/2)
        b2, a2 = sp.signal.butter(4, low_pass, btype='lowpass')
        emg_envelope= sp.signal.filtfilt(b2, a2, emg_rectified)
        return emg_filtered, emg_rectified, emg_envelope

    def get_periodos(self, per_time = False): # List[Tuple[int]]
        lecture_codes = self.df['Lecture code'].values
        times = self.df['Time'].values
        periodos = []
        size = len(times)
        active_signal = False
        current_period = []
        for i in range(size):
            code = int(lecture_codes[i])
            time = int(times[i])
            append = i if not per_time else times[i]

            if code>=100 and code <=110:
                active_signal = True

            if active_signal and len(current_period)==0:
                current_period.append(append) # time

            if code >= 200 and active_signal:
                active_signal = False
                current_period.append(append) # time
                periodos.append(tuple(current_period))
                current_period = []
        self.periodos = periodos
        return self.periodos
    
    def get_elements_per_window(self): # int
        if  self.periodos:
            self.get_periodos()
        periodos = self.periodos
        difs = []
        for init,end in periodos:
            diferencia = end - init
            difs.append(diferencia)
        return mode(difs)

    def create_matriz(self, size_ventana = 15):
        df = self.df
        columnas = self.columnas
        lecture_codes = self.df['Lecture code'].values
        periodos = self.get_periodos() #periodos con codigos.. 101 102 5000 9000
        elements_per_second = self.get_elements_per_window()
        matriz = {} # signal_code: columnas x time x epochs
        elementos_por_ventana = int(elements_per_second*size_ventana/15)
#         print(all_periods)
        for nombre_columna in columnas:
            columna = df[nombre_columna].values
            epochs = []
            epochs_per_signal = {}
            for init,end in periodos: # TODO periodos tendran que ser TODOS los periodos
                current_epoch = []
                current_signal = lecture_codes[init]
#                 print('soy un periodo', init,end, current_signal)
                for i in range(init,end):
                    freq = columna[i]
                    current_epoch.append(freq)
                    if len(current_epoch) == elementos_por_ventana:
                        if current_signal not in epochs_per_signal:
                            epochs_per_signal[current_signal] = []
                        epochs_per_signal[current_signal].append(current_epoch)
                        epochs.append(current_epoch)
                        current_epoch = []
#             At this point we have epochs_per_signal like this, for just 1 chanel
#             101: [[],[]]
#             102: [[],[]]
            for signal,epochs in epochs_per_signal.items():
                if signal not in matriz:
                    matriz[signal] = []
                matriz[signal].append(epochs) #agregamos los epochs del canal de la señal
        self.matriz = matriz
    
    def generate_psd_matrix(self):
        psd_matriz = {}
        for signal,canales in self.matriz.items():
            current_matriz = []
            for canal in canales:
                cnl = []
                for epoch in canal:
                    freqs, psd = sp_signal.periodogram(epoch, 256) # fs
#                     freqs, psd = sp_signal.welch(epoch)
#                     print(freqs,psd)
                    cnl.append(psd)
                current_matriz.append(cnl)
            # current matriz tiene la info de una señan
            psd_matriz[signal] = current_matriz
        self.psd_matriz = psd_matriz

    def create_df(self):
        rows_matriz = []
        for signal,canales in self.psd_matriz.items():
            current_epoch = 0
            total_epochs = len(canales[0])
            while(current_epoch<total_epochs):
                epoch_per_signal = []
                for canal in canales:
                    epoch_per_signal.extend(canal[current_epoch])
                epoch_per_signal.append(signal)
                rows_matriz.append(epoch_per_signal)  
                current_epoch+=1
#         return pd.DataFrame.from_records(rows_matriz)
        self.df_psd = pd.DataFrame.from_records(rows_matriz)


In [6]:
data.head()

Unnamed: 0,Time,C 1,C 3,Timestamp,Lecture code
0,0.0,-17.213388,-3.959528,1600423000000.0,0
1,0.003906,-1.411259,-5.499511,1600423000000.0,0
2,0.007812,43.181385,-0.561774,1600423000000.0,0
3,0.011719,57.539547,3.92664,1600423000000.0,0
4,0.015625,3.823523,1.838967,1600423000000.0,0


In [7]:
canales = ['C 1', 'C 3']
signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.2)
signal_processing.generate_psd_matrix()
signal_processing.create_df()

In [8]:
signal_processing.df.head()

Unnamed: 0,Time,C 1,C 3,Timestamp,Lecture code
0,0.0,0.18042,-0.536715,1600423000000.0,0
1,0.003906,36.072495,1.873235,1600423000000.0,0
2,0.007812,49.487203,3.505922,1600423000000.0,0
3,0.011719,34.852487,3.64157,1600423000000.0,0
4,0.015625,8.28647,2.019936,1600423000000.0,0


In [9]:
signal_processing.get_periodos()

[(2568, 6408),
 (8976, 12816),
 (15376, 19224),
 (21784, 25632),
 (28192, 32032),
 (34600, 38432),
 (41000, 44848),
 (47408, 51248),
 (53816, 57656),
 (60216, 64064),
 (66624, 70464),
 (73032, 76872),
 (79440, 83280),
 (85840, 89680),
 (92240, 96088),
 (98648, 102488),
 (105048, 108888),
 (111464, 115296)]

In [10]:
len(signal_processing.get_periodos())

18

In [11]:
signal_processing.get_elements_per_window()

3840

In [12]:
matriz = signal_processing.matriz

In [13]:
matriz.keys()

dict_keys([101, 103, 102])

In [14]:
len(matriz[101]),len(matriz[101][0]),len(matriz[101][0][0]),

(2, 450, 51)

In [15]:
len(matriz[102]),len(matriz[102][0]),len(matriz[102][0][0]),

(2, 450, 51)

In [16]:
len(matriz[103]),len(matriz[103][0]),len(matriz[103][0][0]),

(2, 450, 51)

In [17]:
psd_matriz = signal_processing.psd_matriz

In [18]:
psd_matriz.keys()

dict_keys([101, 103, 102])

In [19]:
len(psd_matriz[101]),len(psd_matriz[101][0]),len(psd_matriz[101][0][0]),

(2, 450, 26)

In [20]:
len(psd_matriz[102]),len(psd_matriz[102][0]),len(psd_matriz[102][0][0]),

(2, 450, 26)

In [21]:
df_psd = signal_processing.df_psd

In [22]:
df_psd.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,43,44,45,46,47,48,49,50,51,52
0,1.142341e-34,0.021134,0.042248,0.01872,0.153239,0.056538,0.012819,0.039911,0.021316,0.004667,...,0.000299,0.000278,0.000262,0.000249,0.000239,0.000231,0.000225,0.000222,0.00022,101
1,1.510533e-35,0.511206,0.135747,0.214426,0.057022,0.09769,0.025796,0.20194,0.02928,0.002306,...,0.00042,0.000393,0.000372,0.000354,0.000341,0.00033,0.000323,0.000318,0.000316,101
2,0.0,0.341286,0.28089,0.224551,0.36513,0.134873,0.055329,0.002446,0.003636,0.001134,...,0.000331,0.000284,0.000249,0.000224,0.000205,0.000191,0.000182,0.000176,0.000173,101
3,3.776333e-36,0.096983,0.009131,0.041878,0.025238,0.08739,0.004583,0.082856,0.038574,0.007825,...,0.000138,0.000126,0.000117,0.00011,0.000105,0.000101,9.8e-05,9.6e-05,9.5e-05,101
4,1.720592e-34,0.095673,0.527565,0.010894,0.113393,0.058851,0.180729,0.052457,0.013758,0.00069,...,0.000109,9.6e-05,8.6e-05,7.8e-05,7.3e-05,6.9e-05,6.6e-05,6.4e-05,6.4e-05,101


In [23]:
df_psd.shape

(1350, 53)

In [24]:
type(df_psd.iloc[:,-1])

pandas.core.series.Series

In [25]:
dic = {101:0,102:0,103:0}

for element in signal_processing.df_psd.iloc[:,-1]:
    dic[element]+=1
dic

{101: 450, 102: 450, 103: 450}

In [26]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

In [27]:
ventanas = [.1, .2, .5, 1, 2, 5, 10, 15]
modelos = ['SVC linear .25', 'SVC', 'KNeighbors', 'Decision Tree', 'Multi layer perceptron', 'Naive Bayes']
resultados_ventanas = []

for ventana in ventanas:
    signal_processing = SignalProcessing(data, canales)
    signal_processing.apply_emg_filter()
    signal_processing.create_matriz(ventana)
    signal_processing.generate_psd_matrix()
    signal_processing.create_df()
    df_psd = signal_processing.df_psd
    X = df_psd.iloc[:,:-1].values
    y = [element[0] for element in df_psd.iloc[:,-1:].values]

#   Classify
    models = [
                svm.SVC(kernel='linear', C=.025, random_state=42), 
                svm.SVC(), 
                KNeighborsClassifier(n_neighbors=len(np.unique(y))), 
                DecisionTreeClassifier(random_state=0),
                MLPClassifier(max_iter=1000),
                GaussianNB()
            ]
    classifiers_scores = []
    for clf in models:
        scores = cross_val_score(clf, X, y, cv=5)
        classifiers_scores.append(scores.mean())
    resultados_ventanas.append(classifiers_scores)

In [28]:
pd.DataFrame(   data=resultados_ventanas,    # values
                index=ventanas,    # 1st column as index
                columns=modelos)

Unnamed: 0,SVC linear .25,SVC,KNeighbors,Decision Tree,Multi layer perceptron,Naive Bayes
0.1,0.967325,0.97168,0.951713,0.954614,0.986203,0.965512
0.2,0.984444,0.981481,0.955556,0.927407,0.98963,0.97037
0.5,0.988872,0.983316,0.961059,0.935029,0.988872,0.981395
1.0,0.985185,0.985185,0.962893,0.932914,0.974074,0.985045
2.0,1.0,0.992,0.928923,0.920308,0.976923,1.0
5.0,1.0,1.0,0.830909,0.749091,0.945455,0.96
10.0,1.0,0.95,0.683333,1.0,0.5,0.8
15.0,0.95,1.0,0.7,0.6,0.483333,0.866667


In [29]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.2)
signal_processing.generate_psd_matrix()
signal_processing.create_df()
df_psd = signal_processing.df_psd
X = df_psd.iloc[:,:-1].values
y = [element[0] for element in df_psd.iloc[:,-1:].values]

In [30]:
dic = {101:0,102:0,103:0}

for element in y:
    dic[element]+=1
dic

{101: 450, 102: 450, 103: 450}

In [31]:
clf = MLPClassifier(max_iter=1000)
score = cross_val_score(clf, X, y, cv=5)
np.mean(score)

0.9896296296296295

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [33]:
clf = MLPClassifier(max_iter=1000)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
np.mean(score)

1.0

In [34]:
clf = MLPClassifier(max_iter=1000)
clf.fit(X_train, y_train)
total = len(X_test)
correct = 0
for i in range(0,total):
    prediction = clf.predict([X_test[i]])
    if prediction == y_test[i]:
        correct+=1
print(correct*100/total)

100.0


In [35]:
clf = MLPClassifier(max_iter=1000)
clf.fit(X, y)

MLPClassifier(max_iter=1000)

In [36]:
archivo = 'Izquierda - Derecha - Cerrado 2.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.2)
signal_processing.generate_psd_matrix()
signal_processing.create_df()
df_psd = signal_processing.df_psd
X = df_psd.iloc[:,:-1].values
y = [element[0] for element in df_psd.iloc[:,-1:].values]

In [37]:
total = len(X)
correct = 0
for i in range(0,total):
    prediction = clf.predict([X[i]])
    if prediction == y[i]:
        correct+=1
print(correct*100/total, correct, 'over', total)

99.33333333333333 1341 over 1350


### Lets do pseudo-online

In [38]:
class SignalProcessingOnline:

    def __init__(self, df, columnas):
        self.df = df
        self.columnas = columnas

    def apply_emg_filter(self):
        for columna in self.columnas:   
            self.df[columna], _, _ = self.filter_emg(self.df[columna])

    def filter_emg(self, emg, low_pass=10, sfreq = 256, high_band = 4, low_band = 40):
        high_band = high_band/(sfreq/2)
        low_band = low_band/(sfreq/2)
        b1, a1 = sp.signal.butter(4, [high_band, low_band], btype='bandpass')
        emg_filtered = sp.signal.filtfilt(b1,a1,emg)
        emg_rectified = abs(emg_filtered)
        low_pass = low_pass/(sfreq/2)
        b2, a2 = sp.signal.butter(4, low_pass, btype='lowpass')
        emg_envelope= sp.signal.filtfilt(b2, a2, emg_rectified)
        return emg_filtered, emg_rectified, emg_envelope

    def get_periodos(self, per_time = False):
        lecture_codes = self.df['Lecture code'].values
        times = self.df['Time'].values
        periodos = []
        size = len(times)
        active_signal = False
        current_period = []
        for i in range(size):
            code = int(lecture_codes[i])
            time = int(times[i])
            append = i if not per_time else times[i]

            if code>=100 and code <=110:
                active_signal = True

            if active_signal and len(current_period)==0:
                current_period.append(append) # time

            if code >= 200 and active_signal:
                active_signal = False
                current_period.append(append) # time
                periodos.append(tuple(current_period))
                current_period = []
        self.periodos = periodos
        return self.periodos

    def process_data(self):
        data = []
        new_canales = []
        for canal in self.columnas:
            arr = self.df[canal].values.tolist()
            data.extend(arr)

        for canal in self.columnas:
            arr = self.df[canal].values.tolist()
            freqs, psd = sp_signal.periodogram(arr, 256)
            new_canales.extend(psd)
            
#         print(data)
#         print(data)
        freqs, psd = sp_signal.periodogram(data, 256)
        freqs2, psd2 = sp_signal.periodogram(new_canales, 256)
#         print(len(psd))
#         print(psd)
#         print('//')
#         print(len(psd2))
#         print(psd2)
        self.psd = [psd] #.reshape(-1,1)
    
    

In [39]:
pd.options.mode.chained_assignment = None  # default='warn'

In [40]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.2)
signal_processing.generate_psd_matrix()
signal_processing.create_df()
df_psd = signal_processing.df_psd
X = df_psd.iloc[:,:-1].values
y = [element[0] for element in df_psd.iloc[:,-1:].values]

clf = MLPClassifier(max_iter=1000)
clf.fit(X, y)

MLPClassifier(max_iter=1000)

In [41]:
size_window = .2
items_per_window = int(3840 * size_window/15)
archivo = 'Izquierda - Derecha - Cerrado 2.txt'
canales = ['C 1', 'C 3']
df = open_file(archivo, canales)
# fill df with actual values

In [42]:
lecture_codes = df['Lecture code'].values
new_lecture_codes = []
expected = {101:0,102:0,103:0, 0:0}
size = len(lecture_codes)
active_signal = False
last_signal = 0
for i in range(size):
    code = int(lecture_codes[i])

    if code>=100 and code <=110:
        expected[code]+=1
        active_signal = True
        last_signal = code

    elif code >= 200 and active_signal:
        active_signal = False
        last_signal = 0
    new_lecture_codes.append(last_signal)
df['Lecture code'] = new_lecture_codes

In [43]:
df.shape[0], items_per_window, expected

(116560, 51, {101: 6, 102: 6, 103: 6, 0: 0})

In [44]:
for key,val in expected.items():
    print(key, 'aprox', val*items_per_window)

101 aprox 306
102 aprox 306
103 aprox 306
0 aprox 0


In [45]:
dic = {}
vals = np.unique(df['Lecture code'].values)
for val in vals:
    dic[val] = {}
    for rep_val in vals:
        dic[val][rep_val] = 0

current_items = 0
initial = 0

for i in range(df.shape[0]):
    if current_items < items_per_window:
        current_items +=1
    else: #current element, ya es parte del nuevo
        current_df = df.iloc[initial:i, :-1]
        current_actual = df.iloc[initial:initial+1, -1].to_numpy()[0]
        signal_processing = SignalProcessingOnline(current_df, canales)
        signal_processing.apply_emg_filter()
        signal_processing.process_data()
        predicted = clf.predict(signal_processing.psd)[0]
        dic[current_actual][predicted]+=1
#         print(current_actual, predicted)
        current_items = 0
        initial=i
        current_items+=1

In [46]:
rows = []
for key,val in dic.items():
    row = []
    for k,v in val.items():
        row.append(v)
    rows.append(row)
df_results = pd.DataFrame(rows, index=vals, columns=vals)
df_results

Unnamed: 0,0,101,102,103
0,0,570,360,0
101,0,432,18,0
102,0,0,451,0
103,0,0,454,0


In [47]:
df_results = pd.DataFrame(rows, index=vals, columns=vals)

In [48]:
dic

{0: {0: 0, 101: 570, 102: 360, 103: 0},
 101: {0: 0, 101: 432, 102: 18, 103: 0},
 102: {0: 0, 101: 0, 102: 451, 103: 0},
 103: {0: 0, 101: 0, 102: 454, 103: 0}}

In [49]:
df_results # el que es -> el que se predijo

Unnamed: 0,0,101,102,103
0,0,570,360,0
101,0,432,18,0
102,0,0,451,0
103,0,0,454,0


### Por que no predecimos 103?

In [50]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
df = open_file(archivo, canales)

signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.2)
signal_processing.generate_psd_matrix()
signal_processing.create_df()
df_psd = signal_processing.df_psd
X = df_psd.iloc[:,:-1].values
y = [element[0] for element in df_psd.iloc[:,-1:].values]

# clf = MLPClassifier(max_iter=1000)
# clf.fit(X, y)

In [51]:
df_psd

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,43,44,45,46,47,48,49,50,51,52
0,3.776333e-36,0.005698,0.045069,0.017938,0.147490,0.053072,0.009207,0.032899,0.006309,0.000423,...,0.000208,0.000195,0.000184,0.000175,0.000169,0.000164,0.000160,0.000158,0.000156,101
1,3.776333e-34,0.423771,0.130209,0.207765,0.051267,0.091826,0.019561,0.120557,0.009963,0.000701,...,0.000531,0.000496,0.000469,0.000446,0.000429,0.000416,0.000406,0.000400,0.000397,101
2,1.510533e-35,0.403248,0.262753,0.214964,0.355648,0.142109,0.045229,0.003291,0.001728,0.000069,...,0.000422,0.000374,0.000337,0.000310,0.000290,0.000275,0.000264,0.000257,0.000254,101
3,3.398700e-35,0.072276,0.008411,0.043142,0.024827,0.086455,0.005744,0.043517,0.018495,0.003812,...,0.000129,0.000118,0.000110,0.000104,0.000099,0.000095,0.000092,0.000091,0.000090,101
4,3.988752e-35,0.061281,0.547886,0.009341,0.112313,0.055645,0.164605,0.030041,0.005738,0.000463,...,0.000056,0.000049,0.000044,0.000040,0.000037,0.000035,0.000034,0.000033,0.000033,101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1345,2.664581e-32,0.367963,0.481413,1.103113,0.010149,1.428832,4.422120,19.977472,0.073639,0.078420,...,0.001702,0.001563,0.001456,0.001372,0.001307,0.001259,0.001224,0.001201,0.001191,102
1346,6.042133e-33,0.003174,0.854397,0.229049,1.901573,3.797868,7.615096,0.883128,0.231851,0.051759,...,0.001726,0.001575,0.001458,0.001369,0.001300,0.001249,0.001212,0.001189,0.001178,102
1347,2.960645e-33,0.737420,0.143587,1.178977,1.162092,0.350245,1.908359,3.562173,0.425687,0.099909,...,0.001413,0.001312,0.001231,0.001168,0.001119,0.001081,0.001054,0.001037,0.001028,102
1348,5.437920e-34,0.186893,0.690969,2.523649,4.604972,2.772378,1.539438,1.566272,0.031911,0.065341,...,0.000086,0.000065,0.000050,0.000039,0.000031,0.000025,0.000022,0.000019,0.000018,102


In [52]:
df_psd[df_psd[52]==103]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,43,44,45,46,47,48,49,50,51,52
450,2.416853e-34,0.042389,1.085341,1.274262,0.692661,0.641395,1.599308,0.121032,0.101325,0.035597,...,0.001296,0.001116,0.000985,0.000889,0.000819,0.000768,0.000732,0.000710,0.000699,103
451,6.042133e-35,0.004499,0.140678,0.079313,0.129862,0.636304,0.257948,0.135004,0.212969,0.015867,...,0.000919,0.000847,0.000790,0.000746,0.000712,0.000687,0.000668,0.000656,0.000651,103
452,9.667413e-34,0.110433,0.162583,1.287705,1.580436,0.992751,5.311230,0.309964,0.049739,0.002877,...,0.000427,0.000346,0.000289,0.000248,0.000219,0.000198,0.000184,0.000175,0.000170,103
453,0.000000e+00,0.163074,0.563854,2.839565,0.881028,0.511309,4.102357,0.390121,0.100039,0.073312,...,0.012890,0.011809,0.010970,0.010320,0.009820,0.009445,0.009178,0.009006,0.008921,103
454,1.359480e-34,0.219686,0.179213,1.598798,0.242739,0.348052,1.723487,0.296930,0.039751,0.048248,...,0.000405,0.000286,0.000203,0.000144,0.000103,0.000074,0.000055,0.000043,0.000037,103
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,1.510533e-33,0.050580,1.005463,0.482324,0.542986,0.513163,1.803344,2.323786,0.177108,0.027721,...,0.029022,0.026691,0.024870,0.023450,0.022355,0.021531,0.020942,0.020562,0.020376,103
896,6.042133e-35,0.362404,0.394104,0.676869,0.588015,0.399932,0.352485,0.068002,0.380282,0.037832,...,0.000412,0.000361,0.000322,0.000294,0.000272,0.000256,0.000245,0.000238,0.000235,103
897,1.510533e-33,0.378125,0.070465,1.433291,2.319717,0.014033,0.435983,1.005683,0.361797,0.016046,...,0.012435,0.011414,0.010618,0.009999,0.009521,0.009163,0.008907,0.008741,0.008660,103
898,9.667413e-34,0.289740,0.080393,1.306972,1.011859,6.542231,0.040114,0.152457,0.135493,0.080790,...,0.000116,0.000080,0.000055,0.000037,0.000024,0.000014,0.000008,0.000004,0.000002,103


### Lets do pseudo code but count until we have valid code

In [53]:
size_window = .2
items_per_window = int(3840 * size_window/15)
archivo = 'Izquierda - Derecha - Cerrado 2.txt'
canales = ['C 1', 'C 3']
df = open_file(archivo, canales)

In [54]:
dic = {}
vals = [101,102,103,0]
for val in vals:
    dic[val] = {}
    for rep_val in vals:
        dic[val][rep_val] = 0

lecture_codes = df['Lecture code'].values

size = len(lecture_codes)
active_signal = False
last_signal = 0

current_count = 0
initial_pos = 0
total = 0
for i in range(size):
    code = int(lecture_codes[i])

    if code>=100 and code <=110 and not active_signal:
        initial_pos = i
        current_count+=1
        active_signal = True
        last_signal = code
    elif code<200 and active_signal and current_count==items_per_window:
        current_df = df.iloc[initial_pos:i, :-1]
        current_actual = df.iloc[initial_pos:initial_pos+1, -1].to_numpy()[0]
        signal_processing = SignalProcessingOnline(current_df, canales)
        signal_processing.apply_emg_filter()
        signal_processing.process_data()
        predicted = clf.predict(signal_processing.psd)[0]
        dic[last_signal][predicted]+=1
#         print('actual vs pred, act', last_signal, predicted, current_actual)
        initial_pos = i
        current_count = 1
        total+=1
    elif code<200 and active_signal:
        current_count+=1
    elif code >= 200 and active_signal:
        active_signal = False
        initial_pos = i
        last_signal = 0
        current_count = 0


In [55]:
dic

{101: {101: 431, 102: 19, 103: 0, 0: 0},
 102: {101: 0, 102: 450, 103: 0, 0: 0},
 103: {101: 0, 102: 450, 103: 0, 0: 0},
 0: {101: 0, 102: 0, 103: 0, 0: 0}}

In [56]:
rows = []
for key,val in dic.items():
    row = []
    for k,v in val.items():
        row.append(v)
    rows.append(row)
df_results = pd.DataFrame(rows, index=vals, columns=vals)
df_results

Unnamed: 0,101,102,103,0
101,431,19,0,0
102,0,450,0,0
103,0,450,0,0
0,0,0,0,0


In [None]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
df = open_file(archivo, canales)

signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.2)
signal_processing.generate_psd_matrix()
signal_processing.create_df()
df_psd = signal_processing.df_psd
X = df_psd.iloc[:,:-1].values
y = [element[0] for element in df_psd.iloc[:,-1:].values]

# clf = MLPClassifier(max_iter=1000)
# clf.fit(X, y)

In [57]:
archivo = 'Izquierda - Derecha - Cerrado 2.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(.2)
signal_processing.generate_psd_matrix()
signal_processing.create_df()
df_psd = signal_processing.df_psd
X = df_psd.iloc[:,:-1].values
y = [element[0] for element in df_psd.iloc[:,-1:].values]

dic = {}
vals = [101,102,103,0]
for val in vals:
    dic[val] = {}
    for rep_val in vals:
        dic[val][rep_val] = 0

total = len(X)
correct = 0
for i in range(0,total):
    prediction = clf.predict([X[i]])[0]
    actual = int(y[i])
    dic[actual][prediction]+=1
    if prediction == y[i]:
        correct+=1
print(correct*100/total, correct, 'over', total)

99.33333333333333 1341 over 1350


In [58]:
rows = []
for key,val in dic.items():
    row = []
    for k,v in val.items():
        row.append(v)
    rows.append(row)
df_results = pd.DataFrame(rows, index=vals, columns=vals)
df_results

Unnamed: 0,101,102,103,0
101,450,0,0,0
102,3,447,0,0
103,0,6,444,0
0,0,0,0,0


### Parece estar bien si lo hago con signal processing

In [59]:
size_window = .2
items_per_window = int(3840 * size_window/15)
archivo = 'Izquierda - Derecha - Cerrado 2.txt'
canales = ['C 1', 'C 3']
df = open_file(archivo, canales)

In [60]:
lecture_codes = df['Lecture code'].values
new_lecture_codes = []
expected = {101:0,102:0,103:0, 0:0}
size = len(lecture_codes)
active_signal = False
last_signal = 0
for i in range(size):
    code = int(lecture_codes[i])

    if code>=100 and code <=110:
        expected[code]+=1
        active_signal = True
        last_signal = code

    elif code >= 200 and active_signal:
        active_signal = False
        last_signal = 0
    new_lecture_codes.append(last_signal)
df['Lecture code'] = new_lecture_codes

In [61]:
df[df['Lecture code']==103]

Unnamed: 0,Time,C 1,C 3,Timestamp,Lecture code
8968,35.031250,3.681093,3.738633,1.600424e+12,103
8969,35.035156,-1.410116,19.274263,1.600424e+12,103
8970,35.039062,-1.345897,34.137524,1.600424e+12,103
8971,35.042969,14.263177,15.115622,1.600424e+12,103
8972,35.046875,26.993279,-25.756941,1.600424e+12,103
...,...,...,...,...,...
108883,425.324219,-6.457112,-2.883539,1.600424e+12,103
108884,425.328125,-1.587267,-1.080359,1.600424e+12,103
108885,425.332031,9.740116,-4.414288,1.600424e+12,103
108886,425.335938,11.458064,-6.047953,1.600424e+12,103


### Lets try svc

In [62]:
class SignalProcessingOnline:

    def __init__(self, df, columnas):
        self.df = df
        self.columnas = columnas

    def apply_emg_filter(self):
        for columna in self.columnas:   
            self.df[columna], _, _ = self.filter_emg(self.df[columna])

    def filter_emg(self, emg, low_pass=10, sfreq = 256, high_band = 4, low_band = 40):
        high_band = high_band/(sfreq/2)
        low_band = low_band/(sfreq/2)
        b1, a1 = sp.signal.butter(4, [high_band, low_band], btype='bandpass')
        emg_filtered = sp.signal.filtfilt(b1,a1,emg)
        emg_rectified = abs(emg_filtered)
        low_pass = low_pass/(sfreq/2)
        b2, a2 = sp.signal.butter(4, low_pass, btype='lowpass')
        emg_envelope= sp.signal.filtfilt(b2, a2, emg_rectified)
        return emg_filtered, emg_rectified, emg_envelope

    def get_periodos(self, per_time = False):
        lecture_codes = self.df['Lecture code'].values
        times = self.df['Time'].values
        periodos = []
        size = len(times)
        active_signal = False
        current_period = []
        for i in range(size):
            code = int(lecture_codes[i])
            time = int(times[i])
            append = i if not per_time else times[i]

            if code>=100 and code <=110:
                active_signal = True

            if active_signal and len(current_period)==0:
                current_period.append(append) # time

            if code >= 200 and active_signal:
                active_signal = False
                current_period.append(append) # time
                periodos.append(tuple(current_period))
                current_period = []
        self.periodos = periodos
        return self.periodos

    def process_data(self):
        data = []
        new_epochs = []
        for canal in self.columnas:
            arr = self.df[canal].values.tolist()
            data.extend(arr)

        for canal in self.columnas:
            arr = self.df[canal].values.tolist()
            freqs, psd = sp_signal.periodogram(arr, 256)
            new_epochs.append(psd)
#             print('before, after psd', len(arr), len(psd))
        new_data = []
        for epoch in new_epochs:
            new_data.extend(epoch)
#         print('new data', len(new_data))
#         print('data vs new epochs', len(data), len(new_epochs), len(new_epochs[0]), len(new_epochs[1]))
#         print(data)
        freqs, psd = sp_signal.periodogram(data, 256)
        freqs2, psd2 = sp_signal.periodogram(new_data, 256)

        self.psd = [new_data] #.reshape(-1,1)
    
    

In [63]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

size_window = .2
signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(size_window)
signal_processing.generate_psd_matrix()
signal_processing.create_df()
df_psd = signal_processing.df_psd
X = df_psd.iloc[:,:-1].values
y = [element[0] for element in df_psd.iloc[:,-1:].values]

clf = svm.SVC()
clf.fit(X, y)

SVC()

In [64]:
len(signal_processing.psd_matriz[101][0][0])

26

In [65]:
len(signal_processing.psd_matriz[101][1][0])

26

In [66]:
len(signal_processing.psd_matriz[101][2][0])

IndexError: list index out of range

In [67]:
signal_processing.df_psd.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,43,44,45,46,47,48,49,50,51,52
0,1.142341e-34,0.021134,0.042248,0.01872,0.153239,0.056538,0.012819,0.039911,0.021316,0.004667,...,0.000299,0.000278,0.000262,0.000249,0.000239,0.000231,0.000225,0.000222,0.00022,101
1,1.510533e-35,0.511206,0.135747,0.214426,0.057022,0.09769,0.025796,0.20194,0.02928,0.002306,...,0.00042,0.000393,0.000372,0.000354,0.000341,0.00033,0.000323,0.000318,0.000316,101
2,0.0,0.341286,0.28089,0.224551,0.36513,0.134873,0.055329,0.002446,0.003636,0.001134,...,0.000331,0.000284,0.000249,0.000224,0.000205,0.000191,0.000182,0.000176,0.000173,101
3,3.776333e-36,0.096983,0.009131,0.041878,0.025238,0.08739,0.004583,0.082856,0.038574,0.007825,...,0.000138,0.000126,0.000117,0.00011,0.000105,0.000101,9.8e-05,9.6e-05,9.5e-05,101
4,1.720592e-34,0.095673,0.527565,0.010894,0.113393,0.058851,0.180729,0.052457,0.013758,0.00069,...,0.000109,9.6e-05,8.6e-05,7.8e-05,7.3e-05,6.9e-05,6.6e-05,6.4e-05,6.4e-05,101


In [68]:
size_window = .2
items_per_window = int(3840 * size_window/15)
print(items_per_window)
archivo = 'Izquierda - Derecha - Cerrado 2.txt'
canales = ['C 1', 'C 3']
df = open_file(archivo, canales)

51


In [69]:
lecture_codes = df['Lecture code'].values
new_lecture_codes = []
expected = {101:0,102:0,103:0, 0:0}
size = len(lecture_codes)
active_signal = False
last_signal = 0
for i in range(size):
    code = int(lecture_codes[i])

    if code>=100 and code <=110:
        expected[code]+=1
        active_signal = True
        last_signal = code

    elif code >= 200 and active_signal:
        active_signal = False
        last_signal = 0
    new_lecture_codes.append(last_signal)
df['Lecture code'] = new_lecture_codes

In [70]:
dic = {}
vals = np.unique(df['Lecture code'].values)
for val in vals:
    dic[val] = {}
    for rep_val in vals:
        dic[val][rep_val] = 0

current_items = 0
initial = 0

for i in range(df.shape[0]):
    if current_items < items_per_window:
        current_items +=1
    else: #current element, ya es parte del nuevo
        current_df = df.iloc[initial:i, :-1]
        current_actual = df.iloc[initial:initial+1, -1].to_numpy()[0]
        signal_processing = SignalProcessingOnline(current_df, canales)
        signal_processing.apply_emg_filter()
        signal_processing.process_data()
        predicted = clf.predict(signal_processing.psd)[0]
        dic[current_actual][predicted]+=1
#         print(current_actual, predicted)
        current_items = 0
        initial=i
        current_items+=1

rows = []
for key,val in dic.items():
    row = []
    for k,v in val.items():
        row.append(v)
    rows.append(row)
df_results = pd.DataFrame(rows, index=vals, columns=vals)
df_results

In [71]:
rows = []
for key,val in dic.items():
    row = []
    for k,v in val.items():
        row.append(v)
    rows.append(row)
df_results = pd.DataFrame(rows, index=vals, columns=vals)
df_results

Unnamed: 0,0,101,102,103
0,0,646,212,72
101,0,449,1,0
102,0,4,447,0
103,0,2,36,416


### Markov time :D

In [72]:
from hmmlearn import hmm


In [73]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

size_window = .2
signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(size_window)
signal_processing.generate_psd_matrix()
signal_processing.create_df()
df_psd = signal_processing.df_psd
X = df_psd.iloc[:,:-1].values
y = [element[0] for element in df_psd.iloc[:,-1:].values]

In [74]:
codes = [101,102,103]
hmm_models = []
last_index = df_psd.shape[1]-1
for code in codes:
    current_df = df_psd[df_psd[last_index]==code]
    current_X = current_df.iloc[:,:-1].values
    current_X = np.nan_to_num(current_X)
#     for i in range(len(current_X)):
#         for j in range(len(current_X[i])):
#             current_X[i][j] = float(current_X[i][j])
#     print(current_X.shape)
#     print(current_X[0][0])
#     print(type(float(current_X[0][0])))
#     print(float(current_X[0][0]))
#     print(current_df.iloc[:,:])
#     print(np.isfinite(current_X).all())
    clf = hmm.GaussianHMM(n_components=1,covariance_type='diag',n_iter=1000)
    clf.fit(current_X)
    hmm_models.append((clf, code))
#     break
#     break
#     hmm_trainer.train(X)
#     hmm_models.append((hmm_trainer, label))

In [75]:
hmm_models[0][0].transmat_

array([[1.]])

In [76]:
size_window = .2
items_per_window = int(3840 * size_window/15)
print(items_per_window)
archivo = 'Izquierda - Derecha - Cerrado 2.txt'
canales = ['C 1', 'C 3']
df = open_file(archivo, canales)

51


In [77]:
lecture_codes = df['Lecture code'].values
new_lecture_codes = []
expected = {101:0,102:0,103:0, 0:0}
size = len(lecture_codes)
active_signal = False
last_signal = 0
for i in range(size):
    code = int(lecture_codes[i])

    if code>=100 and code <=110:
        expected[code]+=1
        active_signal = True
        last_signal = code

    elif code >= 200 and active_signal:
        active_signal = False
        last_signal = 0
    new_lecture_codes.append(last_signal)
df['Lecture code'] = new_lecture_codes

In [78]:
dic = {}
vals = np.unique(df['Lecture code'].values)
for val in vals:
    dic[val] = {}
    for rep_val in vals:
        dic[val][rep_val] = 0

current_items = 0
initial = 0

for i in range(df.shape[0]):
    if current_items < items_per_window:
        current_items +=1
    else: #current element, ya es parte del nuevo
        current_df = df.iloc[initial:i, :-1]
        current_actual = df.iloc[initial:initial+1, -1].to_numpy()[0]
        signal_processing = SignalProcessingOnline(current_df, canales)
        signal_processing.apply_emg_filter()
        signal_processing.process_data()
        max_score = -9999999999999999999
        output_label = None
        for item in hmm_models:
            hmm_model, label = item
#             print(label)
            score = hmm_model.score(signal_processing.psd)
#             print(score)
            if score > max_score:
                max_score = score
                output_label = label
        predicted = output_label
#         predicted = clf.predict(signal_processing.psd)[0]
        dic[current_actual][predicted]+=1
#         print(current_actual, predicted)
        current_items = 0
        initial=i
        current_items+=1


In [79]:
rows = []
for key,val in dic.items():
    row = []
    for k,v in val.items():
        row.append(v)
    rows.append(row)
df_results = pd.DataFrame(rows, index=vals, columns=vals)
df_results

Unnamed: 0,0,101,102,103
0,0,554,217,159
101,0,439,7,4
102,0,0,451,0
103,0,0,22,432


### Final review

In [80]:
archivo = 'Izquierda - Derecha - Cerrado 1.txt'
data = pd.read_csv(archivo, sep='\t', header=None)
data.columns = ['Time', 'C 1', 'C 2', 'C 3', 'C 4', 'Timestamp', 'Lecture code', 'Dummy']
del data['Dummy']
del data['C 2']
del data['C 4']

size_window = .2
signal_processing = SignalProcessing(data, canales)
signal_processing.apply_emg_filter()
signal_processing.create_matriz(size_window)
signal_processing.generate_psd_matrix()
signal_processing.create_df()
df_psd = signal_processing.df_psd
X = df_psd.iloc[:,:-1].values
y = [element[0] for element in df_psd.iloc[:,-1:].values]

clf_svc = svm.SVC()
clf_svc.fit(X, y)

clf_mlp = MLPClassifier(max_iter=1000)
clf_mlp.fit(X_train, y_train)

codes = [101,102,103]
hmm_models = []
last_index = df_psd.shape[1]-1
for code in codes:
    current_df = df_psd[df_psd[last_index]==code]
    current_X = current_df.iloc[:,:-1].values
    current_X = np.nan_to_num(current_X)
    clf = hmm.GaussianHMM(n_components=1,covariance_type='diag',n_iter=1000)
    clf.fit(current_X)
    hmm_models.append((clf, code))

In [83]:
models = [clf_svc, clf_mlp]

In [81]:
size_window = .2
items_per_window = int(3840 * size_window/15)
print(items_per_window)
archivo = 'Izquierda - Derecha - Cerrado 2.txt'
canales = ['C 1', 'C 3']
df = open_file(archivo, canales)

51


In [82]:
lecture_codes = df['Lecture code'].values
new_lecture_codes = []
expected = {101:0,102:0,103:0, 0:0}
size = len(lecture_codes)
active_signal = False
last_signal = 0
for i in range(size):
    code = int(lecture_codes[i])

    if code>=100 and code <=110:
        expected[code]+=1
        active_signal = True
        last_signal = code

    elif code >= 200 and active_signal:
        active_signal = False
        last_signal = 0
    new_lecture_codes.append(last_signal)
df['Lecture code'] = new_lecture_codes

In [95]:
dics = {
    'svc' : {},
    'mlp': {},
    'markov': {}
}


vals = np.unique(df['Lecture code'].values)
for key,obj in dics.items():
    dic = {}
    for val in vals:
        dic[val] = {}
        for rep_val in vals:
            dic[val][rep_val] = 0
    dics[key] = dic

current_items = 0
initial = 0

for i in range(df.shape[0]):
    if current_items < items_per_window:
        current_items +=1
    else: #current element, ya es parte del nuevo
        current_df = df.iloc[initial:i, :-1]
        current_actual = df.iloc[initial:initial+1, -1].to_numpy()[0]
        signal_processing = SignalProcessingOnline(current_df, canales)
        signal_processing.apply_emg_filter()
        signal_processing.process_data()
        predicted_svc = clf_svc.predict(signal_processing.psd)[0]
        predicted_mlp = clf_mlp.predict(signal_processing.psd)[0]
        max_score = -9999999999999999999
        predicted_markov = None
        for item in hmm_models:
            hmm_model, label = item
#             print(label)
            score = hmm_model.score(signal_processing.psd)
#             print(score)
            if score > max_score:
                max_score = score
                predicted_markov = label

        dics['svc'][current_actual][predicted_svc]+=1
        dics['mlp'][current_actual][predicted_mlp]+=1
        dics['markov'][current_actual][predicted_markov]+=1
#         print(current_actual, predicted)
        current_items = 0
        initial=i
        current_items+=1

In [96]:
dfs = []
for name,content in dics.items():
    rows = []
    for key,val in content.items():
        row = []
        for k,v in val.items():
            row.append(v)
        rows.append(row)
    df_results = pd.DataFrame(rows, index=vals, columns=vals)
    dfs.append([df_results, name])

In [97]:
print(dfs[0][1])
dfs[0][0]

svc


Unnamed: 0,0,101,102,103
0,0,646,212,72
101,0,449,1,0
102,0,4,447,0
103,0,2,36,416


In [98]:
print(dfs[1][1])
dfs[1][0]

mlp


Unnamed: 0,0,101,102,103
0,0,661,170,99
101,0,450,0,0
102,0,3,448,0
103,0,20,11,423


In [99]:
print(dfs[2][1])
dfs[2][0]

markov


Unnamed: 0,0,101,102,103
0,0,554,217,159
101,0,439,7,4
102,0,0,451,0
103,0,0,22,432
