In [37]:
import pandas as pd
import numpy as np
import seaborn as sns

Read the dataset

In [44]:
dataset=pd.read_csv('/content/parkinsons.data')
print(dataset.head())

             name  MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
0  phon_R01_S01_1      119.992       157.302        74.997         0.00784   
1  phon_R01_S01_2      122.400       148.650       113.819         0.00968   
2  phon_R01_S01_3      116.682       131.111       111.555         0.01050   
3  phon_R01_S01_4      116.676       137.871       111.366         0.00997   
4  phon_R01_S01_5      116.014       141.781       110.655         0.01284   

   MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  ...  \
0           0.00007   0.00370   0.00554     0.01109       0.04374  ...   
1           0.00008   0.00465   0.00696     0.01394       0.06134  ...   
2           0.00009   0.00544   0.00781     0.01633       0.05233  ...   
3           0.00009   0.00502   0.00698     0.01505       0.05492  ...   
4           0.00011   0.00655   0.00908     0.01966       0.06425  ...   

   Shimmer:DDA      NHR     HNR  status      RPDE       DFA   spread1  \
0      0.0654

Split the data into Features and target

In [70]:
features=dataset.loc[:,dataset.columns!='status'].values[:,1:]
target=dataset.loc[:,'status'].values

Check for null values

In [46]:
print(dataset.isnull().sum())

name                0
MDVP:Fo(Hz)         0
MDVP:Fhi(Hz)        0
MDVP:Flo(Hz)        0
MDVP:Jitter(%)      0
MDVP:Jitter(Abs)    0
MDVP:RAP            0
MDVP:PPQ            0
Jitter:DDP          0
MDVP:Shimmer        0
MDVP:Shimmer(dB)    0
Shimmer:APQ3        0
Shimmer:APQ5        0
MDVP:APQ            0
Shimmer:DDA         0
NHR                 0
HNR                 0
status              0
RPDE                0
DFA                 0
spread1             0
spread2             0
D2                  0
PPE                 0
dtype: int64


Scale values to preprocess the data for the model

In [73]:
from sklearn.preprocessing import StandardScaler
sc= StandardScaler()
features[:,:]=sc.fit_transform(features[:,:])
print(features)

[[-0.8292996539542079 -0.4361645634265588 -0.9520372925373122 ...
  0.4804768558730098 -0.21053081792189243 0.8688857486179907]
 [-0.7709716899104764 -0.5309740898946587 -0.05772055860053462 ...
  1.3111854578611926 0.2750771165961853 1.8036050267239274]
 [-0.9094763819379087 -0.7231682975604014 -0.10987482900476059 ...
  1.017682362203013 -0.10362861007657276 1.4026614087408522]
 ...
 [0.49557838646769314 0.4701036135835282 -0.968393092884927 ...
  -0.818079310767341 0.7803384839405688 -0.8324101371666857]
 [1.0787611365992884 2.190043978392486 -0.9541796720194646 ...
  -0.22906570588768993 -0.6370029768822506 -0.9261045642103445]
 [1.4548166356769172 0.6922463228485908 -0.8834811491084356 ...
  -0.430852840540344 0.45480231214611866 -0.6450546579643281]]


Split the data into training and test set

In [72]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train, y_test=train_test_split(features, target, test_size=0.2,random_state=1 )

Fit the training set to the model and run

In [74]:
from xgboost import XGBClassifier
model = XGBClassifier()
model=model.fit(x_train,y_train)

In [75]:
y_pred=model.predict(x_test)
from sklearn.metrics import confusion_matrix,accuracy_score
cm=confusion_matrix(y_test,y_pred)
print(cm)
print(accuracy_score(y_test,y_pred))
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = model, X = x_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

[[ 8  2]
 [ 1 28]]
0.9230769230769231
Accuracy: 92.38 %
Standard Deviation: 7.27 %


In [76]:
from sklearn.svm import SVC
best_params = {'C': 1, 'gamma': 0.2, 'kernel': 'rbf'}
classifier = SVC(**best_params, random_state=0)
classifier.fit(x_train, y_train)

In [77]:
y_pred=model.predict(x_test)
from sklearn.metrics import confusion_matrix,accuracy_score
cm=confusion_matrix(y_test,y_pred)
print(cm)
print(accuracy_score(y_test,y_pred))
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

[[ 8  2]
 [ 1 28]]
0.9230769230769231
Accuracy: 93.67 %
Standard Deviation: 7.40 %
