In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

In [2]:
# loading the data from csv file to a Pandas DataFrame
parkinsons_data = pd.read_csv('E:\Multiple disease prediction system\dataset-20230622T140539Z-001\parkinsons (13).csv')

In [3]:
# printing the first 5 rows of the dataframe
parkinsons_data.head()

Unnamed: 0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),NHR,HNR,status,RPDE,DFA,spread1,spread2
0,119.992,157.302,74.997,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482
1,122.4,148.65,113.819,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559
2,116.682,131.111,111.555,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173
3,116.676,137.871,111.366,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147
4,116.014,141.781,110.655,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513


In [4]:
# number of rows and columns in the dataframe
parkinsons_data.shape

(195, 10)

In [5]:
# getting more information about the dataset
parkinsons_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   MDVP:Fo(Hz)   195 non-null    float64
 1   MDVP:Fhi(Hz)  195 non-null    float64
 2   MDVP:Flo(Hz)  195 non-null    float64
 3   NHR           195 non-null    float64
 4   HNR           195 non-null    float64
 5   status        195 non-null    int64  
 6   RPDE          195 non-null    float64
 7   DFA           195 non-null    float64
 8   spread1       195 non-null    float64
 9   spread2       195 non-null    float64
dtypes: float64(9), int64(1)
memory usage: 15.4 KB


In [6]:
# checking for missing values in each column
parkinsons_data.isnull().sum()

MDVP:Fo(Hz)     0
MDVP:Fhi(Hz)    0
MDVP:Flo(Hz)    0
NHR             0
HNR             0
status          0
RPDE            0
DFA             0
spread1         0
spread2         0
dtype: int64

In [7]:
# getting some statistical measures about the data
parkinsons_data.describe()

Unnamed: 0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),NHR,HNR,status,RPDE,DFA,spread1,spread2
count,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0
mean,154.228641,197.104918,116.324631,0.024847,21.885974,0.753846,0.498536,0.718099,-5.684397,0.22651
std,41.390065,91.491548,43.521413,0.040418,4.425764,0.431878,0.103942,0.055336,1.090208,0.083406
min,88.333,102.145,65.476,0.00065,8.441,0.0,0.25657,0.574282,-7.964984,0.006274
25%,117.572,134.8625,84.291,0.005925,19.198,1.0,0.421306,0.674758,-6.450096,0.174351
50%,148.79,175.829,104.315,0.01166,22.085,1.0,0.495954,0.722254,-5.720868,0.218885
75%,182.769,224.2055,140.0185,0.02564,25.0755,1.0,0.587562,0.761881,-5.046192,0.279234
max,260.105,592.03,239.17,0.31482,33.047,1.0,0.685151,0.825288,-2.434031,0.450493


In [8]:
# distribution of target Variable
parkinsons_data['status'].value_counts()

1    147
0     48
Name: status, dtype: int64

In [9]:
X = parkinsons_data.drop(columns=['status'], axis=1)
Y = parkinsons_data['status']

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [11]:
print(X.shape, X_train.shape, X_test.shape)

(195, 9) (156, 9) (39, 9)


In [12]:
model = svm.SVC(kernel='linear')

In [13]:
# training the SVM model with training data
model.fit(X_train, Y_train)

In [14]:
# accuracy score on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)
print('Accuracy score of training data : ', training_data_accuracy)

Accuracy score of training data :  0.8846153846153846


In [15]:
# accuracy score on training data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)
print('Accuracy score of test data : ', test_data_accuracy)

Accuracy score of test data :  0.8717948717948718


In [16]:
input_data = (119.992,157.302,74.997,0.02211,21.033,0.414783,0.815285,-4.813031,0.266482)
# changing input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)


if (prediction[0] == 0):
  print("The Person does not have Parkinsons Disease")

else:
  print("The Person has Parkinsons")


[1]
The Person has Parkinsons




In [17]:
import pickle

In [18]:
filename = 'parkinsons_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [19]:
# loading the saved model
loaded_model = pickle.load(open('parkinsons_model.sav', 'rb'))

In [20]:
for column in X.columns:
  print(column)

MDVP:Fo(Hz)
MDVP:Fhi(Hz)
MDVP:Flo(Hz)
NHR
HNR
RPDE
DFA
spread1
spread2
