### Parkinson's prediction

#### Step 1:Importing the libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

#### Data Collection and Analysis


In [None]:
pk = pd.read_csv("parkinsons.csv")

#### Dataset Description: Below is a brief description of the features present in the dataset:
- name: ASCII subject name and recording number	
- MDVP:Fo(Hz): Average vocal fundamental frequency
- MDVP:Fhi(Hz): Maximum vocal fundamental frequency	
- MDVP:Flo(Hz): Minimum vocal fundamental frequency
- Five measures of variation in Frequency
- MDVP:Jitter(%): Percentage of cycle-to-cycle variability of the period duration
- MDVP:Jitter(Abs): Absolute value of cycle-to-cycle variability of the period duration
- MDVP:RAP: Relative measure of the pitch disturbance
- MDVP:PPQ: Pitch perturbation quotient
- Jitter:DDP: Average absolute difference of differences between jitter cycles	
- Six measures of variation in amplitude
- MDVP:Shimmer: Variations in the voice amplitdue
- MDVP:Shimmer(dB): Variations in the voice amplitdue in dB
- Shimmer:APQ3: Three point amplitude perturbation quotient measured against the average of the three amplitude
- Shimmer:APQ5: Five point amplitude perturbation quotient measured against the average of the three amplitude
- MDVP:APQ: Amplitude perturbation quotient from MDVP
- Shimmer:DDA: Average absolute difference between the amplitudes of consecutive periods
- Two measures of ratio of noise to tonal components in the voice
- NHR: Noise-to-harmonics Ratio and
- HNR: Harmonics-to-noise Ratio
- status: Health status of the subject (one) - Parkinson's, (zero) - healthy
- Two nonlinear dynamical complexity measures
- RPDE: Recurrence period density entropy
- DFA: Signal fractal scaling exponent
- Three nonlinear measures of fundamental frequency variation
- spread1: discrete probability distribution of occurrence of relative semitone variations
- spread2: Three nonlinear measures of fundamental frequency variation
- D2: correlation dimension
- PPE: Entropy of the discrete probability distribution of occurrence of relative semitone variations 

In [None]:
pk.head()

In [None]:
pk.rename(columns={
    "MDVP:Fo(Hz)": "Fundamental_Frequency",
    "MDVP:Fhi(Hz)": "Max_Frequency",
    "MDVP:Flo(Hz)": "Min_Frequency",
    "MDVP:Jitter(%)": "Jitter_Percent",
    "MDVP:Jitter(Abs)": "Jitter_Abs",
    "MDVP:RAP": "RAP",
    "MDVP:PPQ": "PPQ",
    "Jitter:DDP": "Jitter_DDP",
    "MDVP:Shimmer": "Shimmer",
    "MDVP:Shimmer(dB)": "Shimmer_dB",
    "Shimmer:APQ3": "Shimmer_APQ3",
    "Shimmer:APQ5": "Shimmer_APQ5",
    "MDVP:APQ": "MDVP_APQ",
    "Shimmer:DDA": "Shimmer_DDA"
}, inplace=True)

In [None]:
pk.shape

In [None]:
pk.describe()

In [None]:
pk.info()

In [None]:
pk.isnull().sum()

In [None]:
pk['status'].value_counts()

In [None]:
pk.groupby('status').mean()

In [None]:
X = pk.drop(columns=['name','status'], axis=1)
Y = pk['status']

In [None]:
X

In [None]:
Y

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2,stratify=Y,random_state=0 )

In [None]:
print(X_train.shape,X.shape)
print(Y_train.shape,Y.shape)
print(X_test.shape,X.shape)
print(Y_test.shape,Y.shape)

In [None]:
scaler= StandardScaler()

In [None]:
scaler.fit(X_train)

In [None]:
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)



In [None]:
X_train

In [None]:
model = svm.SVC(kernel='linear')



model.fit(X_train,Y_train)

In [None]:
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction,Y_train)

In [None]:
training_data_accuracy

In [None]:
X_test_prediction = model.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction,Y_test)

In [None]:
testing_data_accuracy

In [None]:
input_data = (122.40000,148.65000,113.81900,0.00968,0.00008,0.00465,0.00696,0.01394,0.06134,0.62600,0.03134,0.04518,0.04368,0.09403,0.01929,19.08500,0.458359,0.819521,-4.075192,0.335590,2.486855,0.368674)

# changing input data to numpy array
input_numpy = np.asarray(input_data)

# Reshape the array as we are predicting for one instance

input_reshape = input_numpy.reshape(1,-1)


std_data = scaler.transform(input_reshape)

print(std_data)

prediction = model.predict(std_data)
print(prediction)

In [None]:
import pickle

# Save trained model
pickle.dump(model, open('parkinson_model.pkl', 'wb'))

# Save scaler
pickle.dump(scaler, open('parkinson_scaler.pkl', 'wb'))
