In [35]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score

#### Data Collection dan Data Pre-Processing

In [36]:
pd.set_option('display.max_columns', 30)

In [37]:
data = pd.read_csv("Dataset/parkinsons.csv")
data.head()

Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,MDVP:APQ,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,phon_R01_S01_1,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,0.426,0.02182,0.0313,0.02971,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,phon_R01_S01_2,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,0.626,0.03134,0.04518,0.04368,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
2,phon_R01_S01_3,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,0.482,0.02757,0.03858,0.0359,0.0827,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,phon_R01_S01_4,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,0.517,0.02924,0.04005,0.03772,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,phon_R01_S01_5,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,0.584,0.0349,0.04825,0.04465,0.1047,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335


In [38]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 24 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              195 non-null    object 
 1   MDVP:Fo(Hz)       195 non-null    float64
 2   MDVP:Fhi(Hz)      195 non-null    float64
 3   MDVP:Flo(Hz)      195 non-null    float64
 4   MDVP:Jitter(%)    195 non-null    float64
 5   MDVP:Jitter(Abs)  195 non-null    float64
 6   MDVP:RAP          195 non-null    float64
 7   MDVP:PPQ          195 non-null    float64
 8   Jitter:DDP        195 non-null    float64
 9   MDVP:Shimmer      195 non-null    float64
 10  MDVP:Shimmer(dB)  195 non-null    float64
 11  Shimmer:APQ3      195 non-null    float64
 12  Shimmer:APQ5      195 non-null    float64
 13  MDVP:APQ          195 non-null    float64
 14  Shimmer:DDA       195 non-null    float64
 15  NHR               195 non-null    float64
 16  HNR               195 non-null    float64
 1

In [39]:
data = data.rename(columns={"MDVP:Fo(Hz)": "mdvp_fo_hz",
                            "MDVP:Fhi(Hz)": "mdvp_fhi_hz",
                            "MDVP:Flo(Hz)": "mdvp_flo_hz",
                            "MDVP:Jitter(%)": "mdvp_jitter_percent",
                            "MDVP:Jitter(Abs)": "mdvp_jitter_abs",
                            "MDVP:RAP": "mdvp_rap",
                            "MDVP:PPQ": "mdvp_ppq",
                            "Jitter:DDP": "jitter_ddp",
                            "MDVP:Shimmer": "mdvp_shimmer",
                            "MDVP:Shimmer(dB)": "mdvp_shimmer_db",
                            "Shimmer:APQ3": "shimmer_apq3",
                            "Shimmer:APQ5": "shimmer_apq5",
                            "MDVP:APQ": "mdvp_apq",
                            "Shimmer:DDA": "shimmer_dda",
                            "NHR": "nhr",
                            "HNR": "hnr",
                            "RPDE": "rpde",
                            "DFA": "dfa",
                            "spread1": "spread1",
                            "spread2": "spread2",
                            "D2": "d2",
                            "PPE": "ppe"})


In [63]:
data.sample(3)

Unnamed: 0,name,mdvp_fo_hz,mdvp_fhi_hz,mdvp_flo_hz,mdvp_jitter_percent,mdvp_jitter_abs,mdvp_rap,mdvp_ppq,jitter_ddp,mdvp_shimmer,mdvp_shimmer_db,shimmer_apq3,shimmer_apq5,mdvp_apq,shimmer_dda,nhr,hnr,status,rpde,dfa,spread1,spread2,d2,ppe
43,phon_R01_S10_2,241.404,248.834,232.483,0.00281,1e-05,0.00157,0.00173,0.0047,0.0176,0.154,0.01006,0.01038,0.01251,0.03017,0.00675,23.145,0,0.457702,0.634267,-6.793547,0.158266,2.256699,0.117399
1,phon_R01_S01_2,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,0.626,0.03134,0.04518,0.04368,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
58,phon_R01_S16_5,116.879,131.897,108.153,0.00788,7e-05,0.00334,0.00493,0.01003,0.02645,0.265,0.01394,0.01625,0.02137,0.04183,0.00786,22.603,1,0.540049,0.813432,-4.476755,0.262633,1.827012,0.326197


In [41]:
data.shape

(195, 24)

In [42]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 24 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   name                 195 non-null    object 
 1   mdvp_fo_hz           195 non-null    float64
 2   mdvp_fhi_hz          195 non-null    float64
 3   mdvp_flo_hz          195 non-null    float64
 4   mdvp_jitter_percent  195 non-null    float64
 5   mdvp_jitter_abs      195 non-null    float64
 6   mdvp_rap             195 non-null    float64
 7   mdvp_ppq             195 non-null    float64
 8   jitter_ddp           195 non-null    float64
 9   mdvp_shimmer         195 non-null    float64
 10  mdvp_shimmer_db      195 non-null    float64
 11  shimmer_apq3         195 non-null    float64
 12  shimmer_apq5         195 non-null    float64
 13  mdvp_apq             195 non-null    float64
 14  shimmer_dda          195 non-null    float64
 15  nhr                  195 non-null    flo

In [43]:
data.isnull().sum()

name                   0
mdvp_fo_hz             0
mdvp_fhi_hz            0
mdvp_flo_hz            0
mdvp_jitter_percent    0
mdvp_jitter_abs        0
mdvp_rap               0
mdvp_ppq               0
jitter_ddp             0
mdvp_shimmer           0
mdvp_shimmer_db        0
shimmer_apq3           0
shimmer_apq5           0
mdvp_apq               0
shimmer_dda            0
nhr                    0
hnr                    0
status                 0
rpde                   0
dfa                    0
spread1                0
spread2                0
d2                     0
ppe                    0
dtype: int64

In [44]:
data.describe()

Unnamed: 0,mdvp_fo_hz,mdvp_fhi_hz,mdvp_flo_hz,mdvp_jitter_percent,mdvp_jitter_abs,mdvp_rap,mdvp_ppq,jitter_ddp,mdvp_shimmer,mdvp_shimmer_db,shimmer_apq3,shimmer_apq5,mdvp_apq,shimmer_dda,nhr,hnr,status,rpde,dfa,spread1,spread2,d2,ppe
count,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0
mean,154.228641,197.104918,116.324631,0.00622,4.4e-05,0.003306,0.003446,0.00992,0.029709,0.282251,0.015664,0.017878,0.024081,0.046993,0.024847,21.885974,0.753846,0.498536,0.718099,-5.684397,0.22651,2.381826,0.206552
std,41.390065,91.491548,43.521413,0.004848,3.5e-05,0.002968,0.002759,0.008903,0.018857,0.194877,0.010153,0.012024,0.016947,0.030459,0.040418,4.425764,0.431878,0.103942,0.055336,1.090208,0.083406,0.382799,0.090119
min,88.333,102.145,65.476,0.00168,7e-06,0.00068,0.00092,0.00204,0.00954,0.085,0.00455,0.0057,0.00719,0.01364,0.00065,8.441,0.0,0.25657,0.574282,-7.964984,0.006274,1.423287,0.044539
25%,117.572,134.8625,84.291,0.00346,2e-05,0.00166,0.00186,0.004985,0.016505,0.1485,0.008245,0.00958,0.01308,0.024735,0.005925,19.198,1.0,0.421306,0.674758,-6.450096,0.174351,2.099125,0.137451
50%,148.79,175.829,104.315,0.00494,3e-05,0.0025,0.00269,0.00749,0.02297,0.221,0.01279,0.01347,0.01826,0.03836,0.01166,22.085,1.0,0.495954,0.722254,-5.720868,0.218885,2.361532,0.194052
75%,182.769,224.2055,140.0185,0.007365,6e-05,0.003835,0.003955,0.011505,0.037885,0.35,0.020265,0.02238,0.0294,0.060795,0.02564,25.0755,1.0,0.587562,0.761881,-5.046192,0.279234,2.636456,0.25298
max,260.105,592.03,239.17,0.03316,0.00026,0.02144,0.01958,0.06433,0.11908,1.302,0.05647,0.0794,0.13778,0.16942,0.31482,33.047,1.0,0.685151,0.825288,-2.434031,0.450493,3.671155,0.527367


In [45]:
data.status.value_counts()

1    147
0     48
Name: status, dtype: int64

1 --> Parkinson's Positive<br>
<br>
0 --> Healthy 

In [46]:
# grouping the data bas3ed on the target variable
data.groupby('status').mean()

  data.groupby('status').mean()


Unnamed: 0_level_0,mdvp_fo_hz,mdvp_fhi_hz,mdvp_flo_hz,mdvp_jitter_percent,mdvp_jitter_abs,mdvp_rap,mdvp_ppq,jitter_ddp,mdvp_shimmer,mdvp_shimmer_db,shimmer_apq3,shimmer_apq5,mdvp_apq,shimmer_dda,nhr,hnr,rpde,dfa,spread1,spread2,d2,ppe
status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,181.937771,223.63675,145.207292,0.003866,2.3e-05,0.001925,0.002056,0.005776,0.017615,0.162958,0.009504,0.010509,0.013305,0.028511,0.011483,24.67875,0.442552,0.695716,-6.759264,0.160292,2.154491,0.123017
1,145.180762,188.441463,106.893558,0.006989,5.1e-05,0.003757,0.0039,0.011273,0.033658,0.321204,0.017676,0.020285,0.0276,0.053027,0.029211,20.974048,0.516816,0.725408,-5.33342,0.248133,2.456058,0.233828


### Dataset Splitting

In [47]:
X = data.drop(columns=['name','status'], axis=1)
Y = data['status']

In [48]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [49]:
print(X.shape, X_train.shape, X_test.shape)

(195, 22) (156, 22) (39, 22)


In [50]:
print(Y.shape, Y_train.shape, Y_test.shape)

(195,) (156,) (39,)


### Data Normalization

In [51]:
scaler = StandardScaler()

In [52]:
scaler.fit(X_train)

In [53]:
X_train = scaler.transform(X_train)

X_test = scaler.transform(X_test)

### Training Model

In [54]:
model = svm.SVC(kernel='linear')

In [55]:
# training the SVM model with training data
model.fit(X_train, Y_train)

### Evaluasi

In [56]:
# accuracy score on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)

In [57]:
print('Accuracy score of training data : ', training_data_accuracy)

Accuracy score of training data :  0.8974358974358975


In [58]:
# accuracy score on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)

In [59]:
print('Accuracy score of training data : ', test_data_accuracy)

Accuracy score of training data :  0.8974358974358975


In [60]:
# input_data = (197.07600,206.89600,192.05500,0.00289,0.00001,0.00166,0.00168,0.00498,0.01098,0.09700,0.00563,0.00680,0.00802,0.01689,0.00339,26.77500,0.422229,0.741367,-7.348300,0.177551,1.743867,0.085569)

input_data = (217.11600,233.48100,93.97800,0.00404,0.00002,0.00127,0.00128,0.00381,0.01299,0.12400,0.00679,0.00631,0.01075,0.02038,0.00681,24.58100,0.462516,0.582710,-5.517173,0.389295,2.925862,0.220657)

# changing input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardize the data
std_data = scaler.transform(input_data_reshaped)

prediction = model.predict(std_data)
print(prediction)


if (prediction[0] == 0):
  print("The Person does not have Parkinsons Disease")

else:
  print("The Person has Parkinsons")


[1]
The Person has Parkinsons




In [61]:
import joblib

In [62]:
# Simpan scaler dan model
joblib.dump(scaler, "scaler_parkinson.pkl")
joblib.dump(model, "model_parkinson.pkl")


['model_parkinson.pkl']