In [4]:
import pandas as pd
import math
from sklearn import model_selection
from sklearn import svm
import pyswarms as ps
import numpy as np
from sklearn.metrics import mean_squared_error

In [5]:
data = pd.read_csv('parkinsons.csv', delimiter=',')

In [110]:
x = data.drop(columns=['name', 'status']).to_numpy()
y = data['status'].to_numpy()

In [111]:
# four best features as per mid sems
x = data[['HNR','RPDE','DFA','PPE']]
x.shape

# set 1 as per correlation coeff -> 83.58% training, 78.46% testing
# x = data[['MDVP:Jitter(Abs)','MDVP:RAP','MDVP:PPQ','Jitter:DDP']]
# x.shape

(195, 4)

In [112]:
# 60% training set and 40% testing set
x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.4, random_state=0)

In [113]:
# SVM PARAMETER TUNING WITH C=2 
model_svm = svm.SVC(kernel='linear', C=0.87, gamma=0.61, degree=1000).fit(x_train, y_train)

In [114]:
model_svm.score(x_test, y_test)

0.782051282051282

In [115]:
# max_score = 0
# best_c = 1
# for x in range(1,100):
#     model = svm.SVC(C=x, kernel='linear', degree=3).fit(x_train, y_train)
#     score = model.score(x_test, y_test)
#     print(score)
#     if(score > max_score):
#         max_score = score
#         best_c = x

# print(max_score)
# print(best_c)

# 92.30769230769231% at c=2

In [128]:
def fitness_function(position):
    svmClassifier = svm.SVC(kernel='linear', gamma=position[0], C=position[1])
    svmClassifier.fit(x_train, y_train)
    y_train_pred = svmClassifier.predict(x_train)
    y_test_pred = svmClassifier.predict(x_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = math.sqrt(mse_test)
    print('Optimizing the Parameters ..... C = {c}, Gamma = {gamma}'.format(c=position[1], gamma=position[0]))
    mse_train = mean_squared_error(y_train, y_train_pred)
    rmse_train = math.sqrt(mse_train)
    # list(range(mse_f_train, rmse_f_train))
    retVal= [rmse_train, rmse_test] #Return a vector instead of single value. you can use the rmse_test and ignore rmse_train
    return retVal

In [131]:
def f(x):
    n_particles = x.shape[0]
    j = [fitness_function(x[i]) for i in range(n_particles)]
    return np.array(j)

In [132]:
options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}
optimizer = ps.single.GlobalBestPSO(n_particles=100, dimensions=2, options=options)
gamma_opt, C_opt = optimizer.optimize(f, iters=1000)

2020-04-23 16:41:38,996 - pyswarms.single.global_best - INFO - Optimize for 1000 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best:   0%|          |0/1000

Optimizing the Parameters ..... C = 0.19586036677931107, Gamma = 0.961182655730494
Optimizing the Parameters ..... C = 0.7555127779223627, Gamma = 0.7542624403055378
Optimizing the Parameters ..... C = 0.41217698678911174, Gamma = 0.6390273330593582
Optimizing the Parameters ..... C = 0.5643065793245308, Gamma = 0.004194827345897378
Optimizing the Parameters ..... C = 0.9290278959435665, Gamma = 0.6112985538237612
Optimizing the Parameters ..... C = 0.7834853058239132, Gamma = 0.24247660581564368
Optimizing the Parameters ..... C = 0.24652096984551575, Gamma = 0.877697139681332
Optimizing the Parameters ..... C = 0.07736766966031117, Gamma = 0.7102829933984856
Optimizing the Parameters ..... C = 0.3501225239270095, Gamma = 0.5878982615577072
Optimizing the Parameters ..... C = 0.04921613875334818, Gamma = 0.004383196328264338
Optimizing the Parameters ..... C = 0.051170919522069425, Gamma = 0.8149420563491104
Optimizing the Parameters ..... C = 0.18706495904623976, Gamma = 0.7729390138

pyswarms.single.global_best:   0%|          |0/1000

Optimizing the Parameters ..... C = 0.8549216855365214, Gamma = 0.11339924483159358
Optimizing the Parameters ..... C = 0.9640667167211848, Gamma = 0.9053293683271366
Optimizing the Parameters ..... C = 0.2721164597319151, Gamma = 0.06316648818508053
Optimizing the Parameters ..... C = 0.8862296477654785, Gamma = 0.7606312939527624
Optimizing the Parameters ..... C = 0.90395416651939, Gamma = 0.5333005161234853
Optimizing the Parameters ..... C = 0.7196601376189734, Gamma = 0.051264086010061716
Optimizing the Parameters ..... C = 0.5704820005462596, Gamma = 0.1870452356315634
Optimizing the Parameters ..... C = 0.24597772148154384, Gamma = 0.22537437045168407
Optimizing the Parameters ..... C = 0.9743265479814442, Gamma = 0.5765105726400291
Optimizing the Parameters ..... C = 0.8696311997140574, Gamma = 0.511159362641093
Optimizing the Parameters ..... C = 0.03446647004937664, Gamma = 0.8133001684442294
Optimizing the Parameters ..... C = 0.8109076602600024, Gamma = 0.11946058251084057




ValueError: operands could not be broadcast together with shapes (100,2) (100,) 

In [79]:
# PARAMETER TUNING USING GRID SEARCH CV
from sklearn.model_selection import GridSearchCV

In [80]:
model2 = svm.SVC(C=10, kernel='rbf', gamma="auto").fit(x_train, y_train)
model2.score(x_test, y_test)

ValueError: bad input shape (117, 2)

In [None]:
parameters = {'kernel':('rbf','linear'), 'C':range(3,100)}
svc = svm.SVC(gamma="auto")
clf = GridSearchCV(svc, parameters, cv=5)

In [None]:
#solo
parameters = {'C':range(1,1000)}
svc = svm.SVC(gamma="auto", kernel='linear')
clf = GridSearchCV(svc, parameters, cv=5)

In [None]:
clf.fit(x_train, y_train)

In [None]:
sorted(clf.cv_results_.keys())

In [None]:
y_pred = clf.predict(x_test)

In [None]:
x_test.shape

In [None]:
clf.best_params_

In [None]:
clf.predict

In [6]:
# LOGISTIC REGRESSION
from sklearn.linear_model import LogisticRegression

In [7]:
x = data.drop(columns=['name', 'status'])
y = data['status']
data.columns

Index(['name', 'MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)',
       'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP',
       'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5',
       'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'status', 'RPDE', 'DFA',
       'spread1', 'spread2', 'D2', 'PPE'],
      dtype='object')

In [None]:
# four best features as per mid sems
x = data[['HNR','RPDE','DFA','PPE']]
x.shape

# set 1 as per correlation coeff -> 83.58% training, 78.46% testing
x = data[['MDVP:PPQ','MDVP:Shimmer', 'MDVP:Shimmer(dB)',
          'Shimmer:APQ3', 'Shimmer:APQ5','MDVP:APQ','Shimmer:DDA']]
x.shape

In [8]:
# 60% training set and 40% testing set
x_train, x_test, y_train, y_test = model_selection.train_test_split (x, y, random_state=0)

In [None]:
clf = LogisticRegression(random_state=0).fit(x_train, y_train)

In [None]:
clf.score(x_test, y_test)