In [1]:
#packages for data analysis
import numpy as np
import pandas as pd

import scipy.io
from sklearn import svm

#visual your data
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(font_scale=1.2)

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

%matplotlib inline

In [2]:
# get data
data_complete = scipy.io.loadmat('FeatureMatrices.mat')
labels = data_complete['FMinfo']
test = data_complete['WFM_1']
clas = test[:, 129]

x = np.delete(test, 129, axis=1)
y = clas

print(x)

[[ 1.16435401e-01 -1.08077103e-01 -7.96778474e-04 ...  2.41995733e-01
   1.06491996e+00 -1.65087150e+01]
 [ 8.21688818e-02  9.03154376e-02 -6.85819650e-02 ...  2.41110796e-01
   1.06419912e+00 -1.69006336e+01]
 [ 1.33964552e-01  3.67458529e-02  1.24008526e-01 ...  2.47017470e-01
   1.03231275e+00 -2.79617826e+01]
 ...
 [-1.54932255e-03 -9.86014464e-02  6.91419771e-02 ... -3.90329885e-02
   1.05517372e+00 -4.61726713e+00]
 [ 1.66201959e-01  1.95997625e-02  8.57325449e-03 ... -1.72576560e-02
   1.03076338e+00 -5.24105714e+00]
 [ 4.92604832e-02 -8.13697169e-02 -5.27883007e-02 ... -3.17909040e-02
   1.04372491e+00 -4.80904507e+00]]


In [5]:
# Variables
num_features = 10
dict_features = {}
times = 10

# Create dictionary
for i in range (1, num_features+1):
    dict_features[i] = 0

# Calculating the best model 10 times
for j in range (1, times+1):
    best_accuracy = 0
    best_kernel = ''
    best_features = 0
    for i in range(1, num_features+1):
        # Las k mejores características
        X_new = SelectKBest(chi2, k=num_features).fit_transform(abs(x), y)

        # Split the data for train and test
        X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.3) 
        #print(X_train)

        # RBF, Polynomial and Linear Kernel
        rbf = svm.SVC(kernel='rbf', gamma=0.6, C=1).fit(X_train, y_train)
        poly = svm.SVC(kernel='poly', degree=3, C=1).fit(X_train, y_train)
        linear = svm.SVC(kernel='linear').fit(X_train, y_train)

#         poly_pred_test = poly.predict(X_test)
#         rbf_pred_test = rbf.predict(X_test)
#         linear_pred_test = linear.predict(X_test)

        poly_pred_test = cross_val_predict(poly, X_test, y_test, cv = 5)
        rbf_pred_test = cross_val_predict(rbf, X_test, y_test, cv = 5)
        linear_pred_test = cross_val_predict(linear, X_test, y_test, cv = 5)
        #print(linear_pred_test)

        # Accuracy test
        poly_accuracy = accuracy_score(y_test, poly_pred_test)
        poly_f1 = f1_score(y_test, poly_pred_test, average='weighted')

        rbf_accuracy = accuracy_score(y_test, rbf_pred_test)
        rbf_f1 = f1_score(y_test, rbf_pred_test, average='weighted')

        linear_accuracy = accuracy_score(y_test, linear_pred_test)
        linear_f1 = f1_score(y_test, linear_pred_test, average='weighted')

        if poly_accuracy > rbf_accuracy and poly_accuracy > linear_accuracy and poly_accuracy > best_accuracy:
            best_accuracy = poly_accuracy
            best_kernel = 'Poly'
            best_features = i
        elif rbf_accuracy > poly_accuracy and rbf_accuracy > linear_accuracy and rbf_accuracy > best_accuracy:
            best_accuracy = rbf_accuracy
            best_kernel = 'RBF'
            best_features = i
        elif linear_accuracy > poly_accuracy and linear_accuracy > rbf_accuracy and linear_accuracy > best_accuracy:
            best_accuracy = linear_accuracy
            best_kernel = 'Linear'
            best_features = i
    dict_features[best_features] = dict_features[best_features] + 1
    print(dict_features)
        
    print('Best Accuracy: ', "%.2f" % (best_accuracy*100))
    print('Best Kernel: ', best_kernel)
    print('Number of features: ', best_features)
print('----- Final results -----')
print(dict_features)

{1: 0, 2: 0, 3: 0, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0}
Best Accuracy:  82.02
Best Kernel:  RBF
Number of features:  4
{1: 0, 2: 0, 3: 0, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0}
Best Accuracy:  81.14
Best Kernel:  RBF
Number of features:  4
{1: 0, 2: 0, 3: 0, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 1}
Best Accuracy:  81.58
Best Kernel:  RBF
Number of features:  10
{1: 0, 2: 1, 3: 0, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 1}
Best Accuracy:  82.02
Best Kernel:  RBF
Number of features:  2
{1: 0, 2: 1, 3: 0, 4: 2, 5: 0, 6: 1, 7: 0, 8: 0, 9: 0, 10: 1}
Best Accuracy:  82.89
Best Kernel:  RBF
Number of features:  6
{1: 1, 2: 1, 3: 0, 4: 2, 5: 0, 6: 1, 7: 0, 8: 0, 9: 0, 10: 1}
Best Accuracy:  79.82
Best Kernel:  RBF
Number of features:  1
{1: 1, 2: 2, 3: 0, 4: 2, 5: 0, 6: 1, 7: 0, 8: 0, 9: 0, 10: 1}
Best Accuracy:  82.89
Best Kernel:  RBF
Number of features:  2
{1: 1, 2: 2, 3: 0, 4: 2, 5: 0, 6: 1, 7: 0, 8: 0, 9: 0, 10: 2}
Best Accuracy:  82.89
Best Kernel:  RBF
Number of features:  1

In [4]:
# Las k mejores características
X_new = SelectKBest(chi2, k=8).fit_transform(abs(x), y)

# Split the data for train and test
X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.3) 
print(X_train)

[[2.02486463e-01 2.54274970e-01 2.32483726e-01 ... 5.35495340e-02
  1.03899548e+00 3.89400325e+00]
 [1.09183171e+00 1.22429057e+00 1.45242849e+00 ... 7.02608291e-01
  1.13383250e+00 9.98963754e+00]
 [6.52342141e-02 1.75066378e-02 1.94286851e-02 ... 1.00987616e-01
  1.10628601e+00 1.88318980e+01]
 ...
 [7.54837281e-02 9.42670386e-02 3.74402538e-02 ... 4.27003335e-02
  1.09163226e+00 6.08398634e+00]
 [2.16719276e-02 3.84664378e-02 3.53518681e-02 ... 4.83768706e-02
  1.18592487e+00 5.12516779e+00]
 [1.10729672e-01 1.55616327e-01 7.69010666e-02 ... 8.49812801e-02
  1.19725471e+00 2.93480021e+01]]


In [5]:
# RBF, Polynomial and Linear Kernel
rbf = svm.SVC(kernel='rbf', gamma=0.6, C=1).fit(X_train, y_train)
poly = svm.SVC(kernel='poly', degree=3, C=1).fit(X_train, y_train)
linear = svm.SVC(kernel='linear').fit(X_train, y_train)

In [6]:
# Predict values
# poly_pred_test = poly.predict(X_test)
# rbf_pred_test = rbf.predict(X_test)
# linear_pred_test = linear.predict(X_test)

# poly_pred_train = poly.predict(X_train)
# rbf_pred_train = rbf.predict(X_train)
# linear_pred_train = linear.predict(X_train)

poly_pred_test = cross_val_predict(poly, X_test, y_test, cv = 5)
rbf_pred_test = cross_val_predict(rbf, X_test, y_test, cv = 5)
linear_pred_test = cross_val_predict(linear, X_test, y_test, cv = 5)

poly_pred_train = cross_val_predict(poly, X_train, y_train, cv = 5)
rbf_pred_train = cross_val_predict(rbf, X_train, y_train, cv = 5)
linear_pred_train = cross_val_predict(linear, X_train, y_train, cv = 5)

print(linear_pred_test)

[2. 2. 2. 2. 2. 2. 3. 2. 2. 2. 2. 2. 3. 2. 2. 2. 2. 2. 2. 3. 2. 3. 2. 2.
 3. 2. 2. 2. 2. 2. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 3. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 3. 2. 2. 2. 2. 3. 2. 2.
 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 3. 2.
 3. 2. 2. 2. 3. 2. 2. 2. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 3. 2. 2. 3. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 3. 2. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 3. 2. 2. 2. 2. 2. 3. 2. 2. 2. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 3. 2. 2. 3. 2. 2. 2. 3.]


In [7]:
# Accuracy test
print("-----Accuracy Test-----")
poly_accuracy = accuracy_score(y_test, poly_pred_test)
poly_f1 = f1_score(y_test, poly_pred_test, average='weighted')
print('Accuracy (Polynomial Kernel): ', "%.2f" % (poly_accuracy*100))
print('F1 (Polynomial Kernel): ', "%.2f" % (poly_f1*100), '\n')

rbf_accuracy = accuracy_score(y_test, rbf_pred_test)
rbf_f1 = f1_score(y_test, rbf_pred_test, average='weighted')
print('Accuracy (RBF Kernel): ', "%.2f" % (rbf_accuracy*100))
print('F1 (RBF Kernel): ', "%.2f" % (rbf_f1*100), '\n')

linear_accuracy = accuracy_score(y_test, linear_pred_test)
linear_f1 = f1_score(y_test, linear_pred_test, average='weighted')
print('Accuracy (Linear Kernel): ', "%.2f" % (linear_accuracy*100))
print('F1 (Linear Kernel): ', "%.2f" % (linear_f1*100), '\n')

# Accuracy train
print("-----Accuracy Train-----")
poly_accuracy = accuracy_score(y_train, poly_pred_train)
poly_f1 = f1_score(y_train, poly_pred_train, average='weighted')
print('Accuracy (Polynomial Kernel): ', "%.2f" % (poly_accuracy*100))
print('F1 (Polynomial Kernel): ', "%.2f" % (poly_f1*100), '\n')

rbf_accuracy = accuracy_score(y_train, rbf_pred_train)
rbf_f1 = f1_score(y_train, rbf_pred_train, average='weighted')
print('Accuracy (RBF Kernel): ', "%.2f" % (rbf_accuracy*100))
print('F1 (RBF Kernel): ', "%.2f" % (rbf_f1*100), '\n')

linear_accuracy = accuracy_score(y_train, linear_pred_train)
linear_f1 = f1_score(y_train, linear_pred_train, average='weighted')
print('Accuracy (Linear Kernel): ', "%.2f" % (linear_accuracy*100))
print('F1 (Linear Kernel): ', "%.2f" % (linear_f1*100))

-----Accuracy Test-----
Accuracy (Polynomial Kernel):  60.53
F1 (Polynomial Kernel):  45.64 

Accuracy (RBF Kernel):  75.88
F1 (RBF Kernel):  70.81 

Accuracy (Linear Kernel):  71.93
F1 (Linear Kernel):  65.42 

-----Accuracy Train-----
Accuracy (Polynomial Kernel):  65.16
F1 (Polynomial Kernel):  51.61 

Accuracy (RBF Kernel):  79.66
F1 (RBF Kernel):  75.49 

Accuracy (Linear Kernel):  75.52
F1 (Linear Kernel):  69.80


In [9]:
# get the separating hyperplane
w = linear.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(30, 60)
yy = a * xx - (linear.intercept_[0]) / w[1]

# plot the parallels to the separating hyperplane that pass through the support vectors
b = linear.support_vectors_[0]
yy_down = a * xx + (b[1] - a * b[0])
b = linear.support_vectors_[-1]
yy_up = a * xx + (b[1] - a * b[0])

In [12]:
# plot data
sns.lmplot('Features', 'Clas', data=test, hue='Clas', palette='Set1', fit_reg=False, scatter_kws={"s": 70});
plt.plot(xx, yy, linewidth=2, color='black')
plt.plot(xx, yy_down, 'k--')
plt.plot(xx, yy_up, 'k--')

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices