In [1]:
import pandas as pd
import math
from sklearn import model_selection
from sklearn import svm
import pyswarms as ps
import numpy as np
from sklearn.metrics import mean_squared_error

In [2]:
data = pd.read_csv('parkinsons.csv', delimiter=',')

In [3]:
x = data.drop(columns=['name', 'status']).to_numpy()
y = data['status'].to_numpy()

In [47]:
# four best features as per mid sems
x = data[['HNR','RPDE','DFA','PPE']]
x.shape

# set 1 as per correlation coeff -> 83.58% training, 78.46% testing
# x = data[['MDVP:Jitter(Abs)','MDVP:RAP','MDVP:PPQ','Jitter:DDP']]
# x.shape

(195, 4)

In [4]:
# 60% training set and 40% testing set
x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.4, random_state=0)

In [7]:
# SVM PARAMETER TUNING WITH C=2 
model_svm = svm.SVC(kernel='linear', C=2).fit(x_train, y_train)

In [8]:
model_svm.score(x_test, y_test)

0.9230769230769231

In [None]:
# max_score = 0
# best_c = 1
# for x in range(1,100):
#     model = svm.SVC(C=x, kernel='linear', degree=3).fit(x_train, y_train)
#     score = model.score(x_test, y_test)
#     print(score)
#     if(score > max_score):
#         max_score = score
#         best_c = x

# print(max_score)
# print(best_c)

# 92.30769230769231% at c=2

In [9]:
def fitness_function(position):
    svmClassifier = svm.SVC(kernel='linear', gamma=position[0], C=position[1])
    svmClassifier.fit(x_train, y_train)
    y_train_pred = svmClassifier.predict(x_train)
    y_test_pred = svmClassifier.predict(x_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = math.sqrt(mse_test)
    print('Optimizing the Parameters ..... C = {c}, Gamma = {gamma}'.format(c=position[1], gamma=position[0]))
    mse_train = mean_squared_error(y_train, y_train_pred)
    rmse_train = math.sqrt(mse_train)
    # list(range(mse_f_train, rmse_f_train))
    retVal= [rmse_train, rmse_test] #Return a vector instead of single value. you can use the rmse_test and ignore rmse_train
    return retVal

In [10]:
def f(x):
    n_particles = x.shape[0]
    j = [fitness_function(x[i]) for i in range(n_particles)]
    return np.array(j)

In [11]:
options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}
optimizer = ps.single.GlobalBestPSO(n_particles=100, dimensions=2, options=options)
gamma_opt, C_opt = optimizer.optimize(f, iters=1000)

2020-04-23 16:13:44,529 - pyswarms.single.global_best - INFO - Optimize for 1000 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best:   0%|          |0/1000

Optimizing the Parameters ..... C = 0.9610848336148076, Gamma = 0.3701037820345876
Optimizing the Parameters ..... C = 0.4960609344116881, Gamma = 0.7281990121643953
Optimizing the Parameters ..... C = 0.05752631812694209, Gamma = 0.5444878093867797
Optimizing the Parameters ..... C = 0.6658042765745453, Gamma = 0.54676916706783
Optimizing the Parameters ..... C = 0.7029016131182849, Gamma = 0.4476800516074869
Optimizing the Parameters ..... C = 0.8544701688034184, Gamma = 0.4381313108744742
Optimizing the Parameters ..... C = 0.031090394225949614, Gamma = 0.6529844954131887
Optimizing the Parameters ..... C = 0.39823883780608527, Gamma = 0.19140822638773314
Optimizing the Parameters ..... C = 0.5896823671360459, Gamma = 0.43311291067216195
Optimizing the Parameters ..... C = 0.4053776899469176, Gamma = 0.6800049368814871
Optimizing the Parameters ..... C = 0.2959415144834391, Gamma = 0.6542338329951698
Optimizing the Parameters ..... C = 0.24100811034939706, Gamma = 0.8188724808549329

pyswarms.single.global_best:   0%|          |0/1000

Optimizing the Parameters ..... C = 0.6959809879160533, Gamma = 0.2121806418417559
Optimizing the Parameters ..... C = 0.5031410780847151, Gamma = 0.4046648371098269
Optimizing the Parameters ..... C = 0.046907140464804487, Gamma = 0.5529995815886469
Optimizing the Parameters ..... C = 0.9940730732753108, Gamma = 0.015690724670518463





ValueError: operands could not be broadcast together with shapes (100,2) (100,) 

In [41]:
# PARAMETER TUNING USING GRID SEARCH CV
from sklearn.model_selection import GridSearchCV

In [40]:
model2 = svm.SVC(C=10, kernel='rbf', gamma="auto").fit(x_train, y_train)
model2.score(x_test, y_test)

0.8076923076923077

In [39]:
parameters = {'kernel':('rbf','linear'), 'C':range(3,100)}
svc = svm.SVC(gamma="auto")
clf = GridSearchCV(svc, parameters, cv=5)

In [63]:
#solo
parameters = {'C':range(1,1000)}
svc = svm.SVC(gamma="auto", kernel='linear')
clf = GridSearchCV(svc, parameters, cv=5)

In [None]:
clf.fit(x_train, y_train)

In [58]:
sorted(clf.cv_results_.keys())

['mean_fit_time',
 'mean_score_time',
 'mean_test_score',
 'param_C',
 'params',
 'rank_test_score',
 'split0_test_score',
 'split1_test_score',
 'split2_test_score',
 'split3_test_score',
 'split4_test_score',
 'std_fit_time',
 'std_score_time',
 'std_test_score']

In [59]:
y_pred = clf.predict(x_test)

In [60]:
x_test.shape

(78, 22)

In [61]:
clf.best_params_

{'C': 1}

In [62]:
clf.predict

<function sklearn.model_selection._search.BaseSearchCV.predict(self, X)>

In [42]:
# LOGISTIC REGRESSION
from sklearn.linear_model import LogisticRegression

In [5]:
x = data.drop(columns=['name', 'status'])
y = data['status']
data.columns

Index(['name', 'MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)',
       'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP',
       'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5',
       'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'status', 'RPDE', 'DFA',
       'spread1', 'spread2', 'D2', 'PPE'],
      dtype='object')

In [230]:
# four best features as per mid sems
x = data[['HNR','RPDE','DFA','PPE']]
x.shape

# set 1 as per correlation coeff -> 83.58% training, 78.46% testing
x = data[['MDVP:PPQ','MDVP:Shimmer', 'MDVP:Shimmer(dB)',
          'Shimmer:APQ3', 'Shimmer:APQ5','MDVP:APQ','Shimmer:DDA']]
x.shape

(195, 7)

In [14]:
# 60% training set and 40% testing set
x_train, x_test, y_train, y_test = model_selection.train_test_split (x, y, test_size=0.33, random_state=0)

In [7]:
clf = LogisticRegression(random_state=0).fit(x_train, y_train)



In [8]:
clf.score(x_test, y_test)

0.8923076923076924