Importing the Dependencies

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score

Data Collection and Analysis 💹

In [2]:
# -- load the data from csv file to pandas DataFrame
pd_dataset = pd.read_csv('./pd_speech_features.csv')

In [3]:
# -- 1st 5 rows
pd_dataset.head()

Unnamed: 0,id,gender,PPE,DFA,RPDE,numPulses,numPeriodsPulses,meanPeriodPulses,stdDevPeriodPulses,locPctJitter,...,tqwt_kurtosisValue_dec_28,tqwt_kurtosisValue_dec_29,tqwt_kurtosisValue_dec_30,tqwt_kurtosisValue_dec_31,tqwt_kurtosisValue_dec_32,tqwt_kurtosisValue_dec_33,tqwt_kurtosisValue_dec_34,tqwt_kurtosisValue_dec_35,tqwt_kurtosisValue_dec_36,class
0,0,1,0.85247,0.71826,0.57227,240,239,0.008064,8.7e-05,0.00218,...,1.562,2.6445,3.8686,4.2105,5.1221,4.4625,2.6202,3.0004,18.9405,1
1,0,1,0.76686,0.69481,0.53966,234,233,0.008258,7.3e-05,0.00195,...,1.5589,3.6107,23.5155,14.1962,11.0261,9.5082,6.5245,6.3431,45.178,1
2,0,1,0.85083,0.67604,0.58982,232,231,0.00834,6e-05,0.00176,...,1.5643,2.3308,9.4959,10.7458,11.0177,4.8066,2.9199,3.1495,4.7666,1
3,1,0,0.41121,0.79672,0.59257,178,177,0.010858,0.000183,0.00419,...,3.7805,3.5664,5.2558,14.0403,4.2235,4.6857,4.846,6.265,4.0603,1
4,1,0,0.3279,0.79782,0.53028,236,235,0.008162,0.002669,0.00535,...,6.1727,5.8416,6.0805,5.7621,7.7817,11.6891,8.2103,5.0559,6.1164,1


In [4]:
pd_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 756 entries, 0 to 755
Columns: 755 entries, id to class
dtypes: float64(749), int64(6)
memory usage: 4.4 MB


In [6]:
pd_dataset.shape

(756, 755)

In [5]:
# -- check for the nullity of values
pd_dataset.isnull().sum()

id                           0
gender                       0
PPE                          0
DFA                          0
RPDE                         0
                            ..
tqwt_kurtosisValue_dec_33    0
tqwt_kurtosisValue_dec_34    0
tqwt_kurtosisValue_dec_35    0
tqwt_kurtosisValue_dec_36    0
class                        0
Length: 755, dtype: int64

In [7]:
# -- statistical measures about the data
pd_dataset.describe()

Unnamed: 0,id,gender,PPE,DFA,RPDE,numPulses,numPeriodsPulses,meanPeriodPulses,stdDevPeriodPulses,locPctJitter,...,tqwt_kurtosisValue_dec_28,tqwt_kurtosisValue_dec_29,tqwt_kurtosisValue_dec_30,tqwt_kurtosisValue_dec_31,tqwt_kurtosisValue_dec_32,tqwt_kurtosisValue_dec_33,tqwt_kurtosisValue_dec_34,tqwt_kurtosisValue_dec_35,tqwt_kurtosisValue_dec_36,class
count,756.0,756.0,756.0,756.0,756.0,756.0,756.0,756.0,756.0,756.0,...,756.0,756.0,756.0,756.0,756.0,756.0,756.0,756.0,756.0,756.0
mean,125.5,0.515873,0.746284,0.700414,0.489058,323.972222,322.678571,0.00636,0.000383,0.002324,...,26.237251,22.840337,18.587888,13.872018,12.218953,12.375335,14.79923,14.751559,31.48111,0.746032
std,72.793721,0.500079,0.169294,0.069718,0.137442,99.219059,99.402499,0.001826,0.000728,0.002628,...,42.220693,32.626464,25.537464,20.046029,17.783642,16.341665,15.722502,14.432979,34.230991,0.435568
min,0.0,0.0,0.041551,0.5435,0.1543,2.0,1.0,0.002107,1.1e-05,0.00021,...,1.5098,1.5317,1.5829,1.7472,1.7895,1.6287,1.8617,1.9559,2.364,0.0
25%,62.75,0.0,0.762833,0.647053,0.386537,251.0,250.0,0.005003,4.9e-05,0.00097,...,2.408675,3.4528,3.354825,3.07745,2.937025,3.114375,3.665925,3.741275,3.94875,0.0
50%,125.5,1.0,0.809655,0.700525,0.484355,317.0,316.0,0.006048,7.7e-05,0.001495,...,5.5863,7.06275,6.0774,4.77085,4.30045,4.74145,6.7257,7.33425,10.63725,1.0
75%,188.25,1.0,0.834315,0.754985,0.586515,384.25,383.25,0.007528,0.000171,0.00252,...,28.958075,29.83085,21.94405,13.188,10.87615,12.201325,21.92205,22.495175,61.125325,1.0
max,251.0,1.0,0.90766,0.85264,0.87123,907.0,905.0,0.012966,0.003483,0.02775,...,239.7888,203.3113,121.5429,102.207,85.5717,73.5322,62.0073,57.5443,156.4237,1.0


In [8]:
# -- distribution of target variable 
pd_dataset["class"].value_counts()

1    564
0    192
Name: class, dtype: int64

1 ➡️ Parkinson's Positive <br />
0 ➡️ Healthy

In [9]:
# -- grouping the data based on the target variable 
pd_dataset.groupby("class").mean()

Unnamed: 0_level_0,id,gender,PPE,DFA,RPDE,numPulses,numPeriodsPulses,meanPeriodPulses,stdDevPeriodPulses,locPctJitter,...,tqwt_kurtosisValue_dec_27,tqwt_kurtosisValue_dec_28,tqwt_kurtosisValue_dec_29,tqwt_kurtosisValue_dec_30,tqwt_kurtosisValue_dec_31,tqwt_kurtosisValue_dec_32,tqwt_kurtosisValue_dec_33,tqwt_kurtosisValue_dec_34,tqwt_kurtosisValue_dec_35,tqwt_kurtosisValue_dec_36
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,139.421875,0.359375,0.767434,0.663866,0.430807,372.244792,371.03125,0.005699,0.00029,0.001405,...,47.053772,45.754404,32.006881,21.592319,14.797452,10.726868,9.021939,9.619535,9.326543,15.07483
1,120.760638,0.569149,0.739084,0.712856,0.508888,307.539007,306.218085,0.006585,0.000414,0.002637,...,12.165554,19.593113,19.719812,17.565102,13.556976,12.726897,13.516916,16.562531,16.598373,37.066227


Data Pre-Processing

In [10]:
# -- separating the features and target 
X = pd_dataset.drop(columns=["class"])
y = pd_dataset["class"]

In [12]:
X

Unnamed: 0,id,gender,PPE,DFA,RPDE,numPulses,numPeriodsPulses,meanPeriodPulses,stdDevPeriodPulses,locPctJitter,...,tqwt_kurtosisValue_dec_27,tqwt_kurtosisValue_dec_28,tqwt_kurtosisValue_dec_29,tqwt_kurtosisValue_dec_30,tqwt_kurtosisValue_dec_31,tqwt_kurtosisValue_dec_32,tqwt_kurtosisValue_dec_33,tqwt_kurtosisValue_dec_34,tqwt_kurtosisValue_dec_35,tqwt_kurtosisValue_dec_36
0,0,1,0.85247,0.71826,0.57227,240,239,0.008064,0.000087,0.00218,...,1.5466,1.5620,2.6445,3.8686,4.2105,5.1221,4.4625,2.6202,3.0004,18.9405
1,0,1,0.76686,0.69481,0.53966,234,233,0.008258,0.000073,0.00195,...,1.5530,1.5589,3.6107,23.5155,14.1962,11.0261,9.5082,6.5245,6.3431,45.1780
2,0,1,0.85083,0.67604,0.58982,232,231,0.008340,0.000060,0.00176,...,1.5399,1.5643,2.3308,9.4959,10.7458,11.0177,4.8066,2.9199,3.1495,4.7666
3,1,0,0.41121,0.79672,0.59257,178,177,0.010858,0.000183,0.00419,...,6.9761,3.7805,3.5664,5.2558,14.0403,4.2235,4.6857,4.8460,6.2650,4.0603
4,1,0,0.32790,0.79782,0.53028,236,235,0.008162,0.002669,0.00535,...,7.8832,6.1727,5.8416,6.0805,5.7621,7.7817,11.6891,8.2103,5.0559,6.1164
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,250,0,0.80903,0.56355,0.28385,417,416,0.004627,0.000052,0.00064,...,3.4394,3.0706,3.0190,3.1212,2.4921,3.5844,3.5400,3.3805,3.2003,6.8671
752,250,0,0.16084,0.56499,0.59194,415,413,0.004550,0.000220,0.00143,...,19.6733,1.9704,1.7451,1.8277,2.4976,5.2981,4.2616,6.3042,10.9058,28.4170
753,251,0,0.88389,0.72335,0.46815,381,380,0.005069,0.000103,0.00076,...,137.3126,51.5607,44.4641,26.1586,6.3076,2.8601,2.5361,3.5377,3.3545,5.0424
754,251,0,0.83782,0.74890,0.49823,340,339,0.005679,0.000055,0.00092,...,5.4425,19.1607,12.8312,8.9434,2.2044,1.9496,1.9664,2.6801,2.8332,3.7131


In [14]:
y

0      1
1      1
2      1
3      1
4      1
      ..
751    0
752    0
753    0
754    0
755    0
Name: class, Length: 756, dtype: int64

Split the Data into Training and Test Data

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [16]:
print("X Dim \nX Original : {} \nX_train : {} \nX_test : {}".format(X.shape, X_train.shape, X_test.shape))

X Dim 
X Original : (756, 754) 
X_train : (604, 754) 
X_test : (152, 754)


In [17]:
print("y Dim \ny Original : {} \ny_train : {} \ny_test : {}".format(y.shape, y_train.shape, y_test.shape))

y Dim 
y Original : (756,) 
y_train : (604,) 
y_test : (152,)


Data Standardization

In [18]:
scaler = StandardScaler()

In [19]:
scaler.fit(X_train)

StandardScaler()

In [21]:
X_train = scaler.transform(X_train)

X_test = scaler.transform(X_test)



In [22]:
X_train

array([[ -1.70428383,   0.99008258,  -2.49131266, ...,  -0.94941002,
         -1.00777536,  -0.8654935 ],
       [ -1.71217334,  -3.00996128,  -0.94782195, ...,  -0.96731224,
         -1.03559569,  -0.88110488],
       [ -1.68981973,  -3.00996128, -17.30705548, ...,  -0.85790031,
         -1.00945612,  -0.92518718],
       ...,
       [ -1.69244956,   0.99008258,  -1.86792074, ...,  -0.89864343,
         -1.009217  ,  -0.88044041],
       [ -1.69038326,  -3.00996128,  -0.88748131, ...,  -0.79413827,
         -0.86234447,  -0.85442551],
       [ -1.71273687,   0.99008258,  -3.16774285, ...,  -0.8619434 ,
         -0.92899005,  -0.8682637 ]])

Model Training

In [23]:
model = svm.SVC(kernel="linear")

In [24]:
model.fit(X_train, y_train)

SVC(kernel='linear')

Model Evaluation

<small>Accuracy Score</small>

In [27]:
# -- accuracy score on training data 
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(y_train, X_train_prediction)
print("Accuracy Score of Training Data : {}".format(training_data_accuracy))

Accuracy Score of Training Data : 0.7450331125827815


In [28]:
# -- accuracy score on test data 
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(y_test, X_test_prediction)
print("Accuracy Score of Test Data : {}".format(test_data_accuracy))

Accuracy Score of Test Data : 0.75


Building a Predictive System

In [32]:
input_data = (0,1,0.85247,0.71826,0.57227,240,239,0.00806353,8.68E-05,0.00218,1.76E-05,0.00067,0.00129,0.002,0.05883,0.517,0.03011,0.03496,0.04828,0.09034,0.970805,0.036223,18.995,69.9974958,76.08804597,72.46551224,539.3427351,1031.84904,2447.162183,3655.054806,101.0922179,83.14744022,255.21483,396.6436311,0.77778,11.7245,2.8277,1.173,0.26512,0.083127,1200445.612,1.5347,3.0152,0.00046263,0.00025997,0.83621,209.6062,455.9654,1.2825,1.3305,51.6843,5.7784,23.261,0.2685,5.8573,0.20023,9.1817,11.4283,8.4781,2.4874,0.02292,0.55527,-1.6334,-1.5144,0.39725,0.61775,0.091821,0.54775,-0.0068247,-0.67274,-0.001013,-7.50E-05,-0.0033637,-0.00019208,0.0013567,-0.00089961,-0.003002,0.0015484,0.0051873,-0.00020542,-0.0047367,0.0024652,0.0025138,-0.0011875,-0.00017942,-0.00082784,-0.00049845,0.0013855,0.00017264,-0.00028585,0.00042833,-4.41E-05,-0.00033822,0.00043047,3.83E-05,-0.00035615,-7.62E-05,2.66E-05,0.34158,1.0279,0.65735,0.78036,0.30141,0.35418,0.19127,0.22892,0.3355,0.17101,0.28111,0.21511,0.16605,0.17762,0.049933,0.17226,0.089754,0.14627,0.040231,0.060808,0.030284,0.02883,0.042253,0.028115,0.04718,0.035775,0.020827,0.023057,0.016392,0.0788,0.043847,0.06641,0.021565,0.029594,0.015351,0.014642,0.019681,0.012829,0.021703,0.017089,0.010043,0.01213,99.9996,1.53E-07,8.86E-07,7.43E-06,2.10E-05,0.00010224,1.31E-05,4.06E-05,2.69E-05,1.04E-05,0.00020186,1.7166,5.5625,0.04894,-72.2192,-866.9373,-54.5335,-319.9468,-165.257,-42.9705,-2336.3644,-779.1377,-290.208,-78.3447,-10.7944,10.5089,-46.3816,-35.9347,-46.8515,-31.2923,-18.7456,0.0024859,0.02815,0.32455,1.5948,7.7828,2.3013,5.6628,3.5683,2.0302,44.7582,0.005012,0.046228,0.68635,3.5072,26.1213,3.7358,15.283,9.2028,4.7887,89.767,-34415952.86,-41505780.29,-54991273,-79779216.69,-129684181.7,-232253303.7,-459058057.8,-902118385.7,-1886558836,-3937753319,1116.2921,672.8105,445.4876,322.8542,262.2272,234.6861,231.969,227.8718,238.269,248.6663,572.1636,2023.8169,6477.7714,18923.0837,49173.8993,115701.9285,245931.5887,524800.1114,1049747.089,2101636.245,4181.8549,11092.1222,27917.7341,67151.7571,151463.1285,326874.1892,672089.5253,1381934.183,2777808.879,5547195.189,100,6.59E-09,3.86E-08,3.25E-07,9.05E-07,4.39E-06,5.71E-07,1.73E-06,1.15E-06,4.50E-07,8.71E-06,0.00033833,0.0016911,0.011168,0.026503,0.095478,0.01607,0.039222,0.029105,0.012719,0.14851,-1820.192,-871.5514,-446.3217,-261.8311,-182.3583,-210.734,-190.3697,-192.1166,-175.6679,-163.4186,1.62E-07,1.85E-06,2.15E-05,0.00010395,0.00050042,0.00015082,0.00036353,0.00022942,0.00013184,0.0029043,3.25E-07,3.04E-06,4.55E-05,0.00022608,0.0016888,0.00024467,0.00098152,0.0005974,0.00031087,0.0058211,-19278.0371,-25711.8622,-36938.137,-57264.6625,-98433.1856,-184901.7535,-381059.351,-776445.2329,-1676725.978,-3601122.613,414.6434,276.485,198.5803,153.8978,132.2489,124.1971,127.9812,130.3804,140.7776,151.1748,0.86121,3.0487,9.7825,28.5949,74.3411,174.9214,371.7296,793.068,1586.1824,3173.0448,6.299,16.7003,42.0762,101.0889,228.8489,493.8563,1015.7707,2091.946,4188.2456,8373.9278,1.11E-05,3.98E-06,4.92E-06,3.54E-06,7.19E-06,3.87E-05,0.00016363,0.0003761,0.00039246,0.0004061,0.00041458,0.00023899,0.0019048,0.012066,0.029439,0.037555,0.089055,0.17728,0.13122,0.051588,0.019785,0.05609,0.12525,0.057908,0.01007,0.057575,0.10807,0.032743,0.00025665,1.33E-05,6.75E-06,2.75E-06,2.04E-06,1.67E-06,1.11E-06,5.35E-05,0.20497,0.076626,0.091515,0.066105,0.12716,0.59587,2.1832,4.5851,4.6877,4.8635,4.884,2.818,18.8625,89.7525,174.692,189.5,360.7126,541.6719,422.8302,225.6461,99.3648,214.7895,337.8141,201.3841,53.7314,178.7287,230.2402,111.0346,2.2372,0.15901,0.083824,0.036109,0.027414,0.023151,0.015562,0.55112,-2037542.225,-1802551.782,-1485424.634,-1245979.172,-991083.9631,-745188.5092,-560498.2903,-432705.197,-352797.2473,-281439.269,-229943.2967,-201985.0408,-127736.2777,-83962.4084,-62201.7084,-52209.7866,-34671.0477,-23650.0278,-20345.6618,-18507.9939,-19238.3429,-11545.4036,-7369.2484,-7030.0263,-8442.5303,-4459.3054,-2800.8383,-3160.3491,-6399.2202,-7011.7391,-6228.1902,-5536.5796,-4540.083,-3726.2679,-3190.1752,-2528.9188,1.61E-07,7.56E-08,1.35E-07,9.89E-08,1.67E-07,1.13E-06,6.78E-06,2.44E-05,2.95E-05,3.58E-05,5.03E-05,2.33E-05,0.00012787,0.0017165,0.0069193,0.008643,0.023558,0.074782,0.064414,0.030886,0.010913,0.032924,0.13939,0.069247,0.00481,0.07756,0.25704,0.073123,0.00040142,4.43E-05,2.98E-05,1.30E-05,1.22E-05,1.26E-05,1.01E-05,1.44E-05,9.97E-07,4.14E-07,5.22E-07,6.11E-07,8.58E-07,2.53E-06,1.50E-05,4.83E-05,5.90E-05,5.87E-05,7.36E-05,5.90E-05,0.00018979,0.0012918,0.0065317,0.013296,0.02762,0.071125,0.063262,0.020787,0.01234,0.027799,0.080709,0.032363,0.0092097,0.024016,0.050924,0.017784,0.0004321,6.20E-05,4.36E-05,2.30E-05,1.77E-05,1.23E-05,9.74E-06,0.00011957,8.19E-07,2.37E-07,5.77E-20,2.84E-07,0,9.30E-07,-1.27E-07,-1.41E-18,9.16E-06,-1.71E-06,-3.10E-05,-9.93E-19,-5.56E-18,0.00056006,1.03E-05,0,0,-6.10E-17,-0.00038921,0,3.81E-17,6.15E-17,6.74E-17,0,0,-2.22E-17,-0.013372,0.056236,0.0004446,2.85E-19,0.00014027,-8.36E-05,-0.00016867,-0.00020162,-4.27E-05,-0.01432,-2.89E-21,-3.77E-21,1.70E-22,-9.18E-22,-3.98E-22,-2.86E-21,-3.52E-22,1.06E-21,1.23E-20,-1.36E-20,-1.63E-20,-2.63E-21,1.84E-21,1.34E-19,-2.65E-20,1.82E-21,9.60E-20,-7.50E-20,-1.06E-18,7.54E-21,-6.33E-20,-1.25E-18,-1.90E-18,-2.42E-19,9.38E-20,-7.21E-19,1.84E-18,-1.85E-18,-7.29E-21,2.04E-20,0,6.53E-20,-2.02E-20,7.96E-20,1.87E-20,-0.01461,0.00034139,0.00022424,0.00027317,0.00025393,0.00039625,0.0010075,0.0022682,0.003767,0.0042153,0.0046972,0.0051989,0.0043241,0.013373,0.036869,0.063087,0.078064,0.13169,0.20354,0.1918,0.13177,0.089375,0.16487,0.26991,0.20099,0.091819,0.24065,0.36097,0.21776,0.021112,0.0052758,0.0041096,0.0028762,0.0027106,0.0026957,0.0024093,0.0029658,-0.0096443,-0.005025,-0.0064911,-0.006014,-0.0085182,-0.0092406,-0.016506,-0.026006,-0.026273,-0.02416,-0.026321,-0.024286,-0.048924,-0.10839,-0.19432,-0.29603,-0.46639,-0.61778,-0.596,-0.32867,-0.26301,-0.44541,-0.66075,-0.45846,-0.2454,-0.45883,-0.65524,-0.36007,-0.060063,-0.017824,-0.013598,-0.012346,-0.009196,-0.0069326,-0.006569,-0.035389,0.01432,0.0066623,0.0064911,0.0074746,0.0085182,0.0094429,0.01609,0.026006,0.028117,0.025222,0.022796,0.024286,0.048924,0.10575,0.20486,0.29603,0.46639,0.61778,0.59309,0.32867,0.26301,0.44541,0.66075,0.45846,0.2454,0.45883,0.64375,0.39877,0.056849,0.017824,0.014505,0.0084926,0.009728,0.0067535,0.0060726,-0.0090458,0.86453,0.43538,1.11E-15,0.36745,0,-0.0043099,-0.0053825,-1.11E-17,-0.00082879,0.0016047,-0.00036202,1.01E-15,-5.85E-17,-0.0031426,0.0074332,1.51E-17,2.21E-17,-9.37E-18,0.0007342,-4.33E-18,-4.47E-17,-1.39E-17,-3.50E-17,-4.29E-19,-9.66E-18,-1.66E-17,0.00011457,-0.1029,0.030756,7.07E-17,-0.086911,-0.28507,-0.17362,0.071728,0.010352,-2.7303,66.5007,36.9934,26.3508,51.9577,21.5451,8.2488,8.0024,6.9635,6.9189,5.0622,4.6068,7.7218,2.7224,2.4171,2.9383,4.2077,3.1541,2.8531,2.7496,2.155,2.9457,2.1993,1.983,1.8314,2.0062,1.6058,1.5466,1.562,2.6445,3.8686,4.2105,5.1221,4.4625,2.6202,3.0004,18.9405,)

# -- change input data to a numpy array 
input_data_as_np_array = np.asarray(input_data)

# -- reshape the numpy array 
input_data_reshaped = input_data_as_np_array.reshape(1, -1)

# -- standardize the data 
std_data = scaler.transform(input_data_reshaped)

prediction = model.predict(std_data)

if(prediction[0] == 0):
    print("The Person is HEALTHY")
else:
    print("The Person has Parkinson's Disease")

[1]
The Person has Parkinson's Disease


