In [None]:
%matplotlib inline
import numpy as np
import scipy as sp
from scipy import signal
import scipy.io as io
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from os import path
from sklearn import metrics, svm, preprocessing, datasets, linear_model
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit, cross_val_score, train_test_split

In [None]:
# generamos un electrodo promedio por cada persona 
# (promediando los 5 electrodos del enunciado que se encuentran juntos)
# y los guardamos en disco. De esta manera a partir de ahora vamos a trabajar solo con esos archivos que son 
# mucho más livianos y tiene toda la información que necesitamos, 894 epochs por cada paciente.

electrodos_a_promediar = [7, 43, 79, 130, 184]

for t in ['P', 'S']:
    for i in range(1,11): 
        try:
            data_file = path.join('./data/',t) + "%02d" % (i) + '.mat'
            elec_prom_file = './dumps/elec_prom_' + t + "%02d" % (i) + '.dmp'
            data = io.loadmat(data_file)['data']
            electrodo_promedio = data[:,electrodos_a_promediar,:].mean(axis=1)
            electrodo_promedio.dump(elec_prom_file)
        except Exception as exception:
            print(str(exception))
            pass

In [None]:
def calculate_welchs(data):
    welchs_by_epoch = np.empty((len(data),2,101))

    for i, epoch in enumerate(data):
        f,p = signal.welch(x=epoch, fs=250)
        welchs_by_epoch[i] = np.array([f,p])
    
    return welchs_by_epoch

In [None]:

welchs = []

for i_type, t in enumerate(['P', 'S']):
    for i in range(1,11):
        try:
            file = './dumps/elec_prom_' + t + "%02d" % (i) + '.dmp'
            print(file)
            data = np.load(file)
            welchs.append(calculate_welchs(data)) 
            #print(data.shape)
            #print(repr(calculate_welchs(data)))
        except Exception as exception:
            print(str(exception))
            pass

In [None]:
def calculate_epochs_power_mean(welchs_by_epoch, frequency_range):
    freqs = welchs_by_epoch[0,0,:]
    freqs_idxs = np.where((freqs>=frequency_range[0]) * (freqs<frequency_range[1]))
    return welchs_by_epoch[:,1,freqs_idxs].mean(axis=2)



frequencies_ranges = {"delta": (0,4), "theta": (4,8), "alpha": (8,13), "beta": (13,30), "gamma": (30,125)}
# matriz [paciente,banda_freq,epochs,potencia]

potencias_mean_std = np.zeros((20,5,2))
for i in range(20):
    for freq_idx,frequency_range in enumerate(frequencies_ranges.values()):
        frequency_epochs_power_means = calculate_epochs_power_mean(welchs[i],frequency_range)
        potencias_mean_std[i,freq_idx,0] = frequency_epochs_power_means.mean()
        potencias_mean_std[i,freq_idx,1] = frequency_epochs_power_means.std()

        #frequency_power_means.dump(name + 'power_means.dmp')

In [None]:
column_names_mean = list(map(lambda x: x+"_mean",frequencies_ranges.keys()))
df_mean = pd.DataFrame(preprocessing.scale(potencias_mean_std[:,:,0]),columns=column_names_mean)
column_names_std = list(map(lambda x: x+"_std",frequencies_ranges.keys()))
df_std = pd.DataFrame(preprocessing.scale(potencias_mean_std[:,:,1]),columns=column_names_std)

labels = list("P"*10 + "S"*10)
df_labels = pd.DataFrame({"labels":labels})
tabla = pd.concat([df_mean,df_std,df_labels],axis=1)

tabla

In [None]:
def graficar_roc(table, values_column_name, labels_column_name,positivo):

    fpr, tpr, thresholds = metrics.roc_curve( \
        table[labels_column_name].values, \
        table[values_column_name].values, \
        pos_label=positivo, drop_intermediate=False)
   
    roc_auc = metrics.auc(fpr, tpr)
   
    plt.figure()
    lw = 2
    plt.plot(fpr, tpr, color='darkorange',
             lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC for {0}'.format(values_column_name))
    plt.legend(loc="lower right")
    plt.show()


In [None]:
for label in (column_names_mean + column_names_std):
    graficar_roc(tabla,label, 'labels', 'S')


Logistic Regression

In [106]:
X = tabla.iloc[:,0]
print(X)
Y = tabla.iloc[:, -1]
print(Y)

clf = svm.SVC()

cross_val_score(clf, X,Y)

0     0.398470
1     0.743224
2     2.341435
3     0.555134
4     1.673580
5     0.902070
6     0.970275
7    -0.635269
8     0.099139
9     0.987717
10   -0.805729
11   -0.885972
12   -0.837870
13   -0.859230
14    0.024437
15   -0.464624
16   -1.165436
17   -0.958932
18   -1.131717
19   -0.950703
Name: delta_mean, dtype: float64
0     P
1     P
2     P
3     P
4     P
5     P
6     P
7     P
8     P
9     P
10    S
11    S
12    S
13    S
14    S
15    S
16    S
17    S
18    S
19    S
Name: labels, dtype: object




ValueError: Found input variables with inconsistent numbers of samples: [1, 12]

In [None]:
X = np.array(labels)
y = np.array(labels)
sss = StratifiedShuffleSplit(n_splits=2, test_size=0.1, random_state=0)

for train_index, test_index in sss.split(X, y):
    X_train, X_test, y_train, y_test = train_test_split(tabla['delta_mean'][train_index], tabla['labels'][train_index], test_size=0.4, random_state=0)

    clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
    print(clf.score(X_test, y_test))
#    X_train = tabla['delta_mean'][train_index]
#    Y_train = tabla['labels'][train_index]
#    lasso = linear_model.Lasso()
#    print(cross_val_score(lasso, X_train, Y_train))
    print(X_train)
    print(Y_train)




Support Vector Machine

In [107]:

X = np.array(labels)
y = np.array(labels)
sss = StratifiedShuffleSplit(n_splits=2, test_size=0.1, random_state=0)



for train_index, test_index in sss.split(X, y):
    X_train = tabla.iloc[train_index, :-1]
    X_test = tabla.iloc[test_index, :-1]
    y = tabla.iloc[train_index, -1]
    y_test = tabla.iloc[test_index, -1]
    clf = svm.SVC()
    clf.fit(X_train, y)

    print("TRA:", train_index)
    print("TEST:", test_index)
    print(clf.predict(X_test))
    print(clf.score(X_test,y_test))
#    print("Tra: ", X_train)
    print("X_test: ", X_test)
    print("Y_test: ", y_test)
    #    print("X_train: ", X_train)
#    print("y: ", y)
    print(type(X_train))
#    print(type(y))
#    print(clf.predict(tabla.iloc[test_index, :-1]))

TRA: [ 7 16 15  4 17 19 11  2  9  1 12 10 13  6  8  0 18  3]
TEST: [ 5 14]
['P' 'S']
1.0
X_test:      delta_mean  theta_mean  alpha_mean  beta_mean  gamma_mean  delta_std  \
5     0.902070    0.076607   -0.236513  -0.437895   -0.640285   1.018425   
14    0.024437    2.519334    2.417702   2.797602   -0.344844  -0.063760   

    theta_std  alpha_std  beta_std  gamma_std  
5    0.152077  -0.239414 -0.084829  -0.801907  
14   2.320447   2.850395  3.043605  -0.632244  
Y_test:  5     P
14    S
Name: labels, dtype: object
<class 'pandas.core.frame.DataFrame'>
TRA: [10 12  4  6 13 15  7 11 19  8 16  9  0 18  1  5 17  2]
TEST: [ 3 14]
['P' 'S']
1.0
X_test:      delta_mean  theta_mean  alpha_mean  beta_mean  gamma_mean  delta_std  \
3     0.555134    1.280443   -0.416504  -0.691420   -0.595026   0.680326   
14    0.024437    2.519334    2.417702   2.797602   -0.344844  -0.063760   

    theta_std  alpha_std  beta_std  gamma_std  
3    1.055690  -0.376569 -0.649470  -0.630276  
14   2.320447  

In [109]:


diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]
lasso = linear_model.Lasso()
print(X[0])
print(y[0])
print(cross_val_score(lasso, X, y))  

[ 0.03807591  0.05068012  0.06169621  0.02187235 -0.0442235  -0.03482076
 -0.04340085 -0.00259226  0.01990842 -0.01764613]
151.0
[ 0.33150734  0.08022311  0.03531764]
