# **PARKINSONS DISEASE PREDICTION**

### **Importing the Dependencies**

In [40]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
import pickle


### **Data Collection and Processing**

In [42]:
# Data Collection & Analysis
# Loading the data from csv file to a Pandas DataFrame
parkinsons_data = pd.read_csv('parkinsons.csv')

# Displaying the first 5 rows of the dataframe
print(parkinsons_data.head())

# Number of rows and columns in the dataframe
print(f"Shape of dataset: {parkinsons_data.shape}")

# Getting more information about the dataset
print(parkinsons_data.info())

# Checking for missing values in each column
print(parkinsons_data.isnull().sum())

# Getting some statistical measures about the data
print(parkinsons_data.describe())

# Distribution of target variable
print(parkinsons_data['status'].value_counts())




             name  MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
0  phon_R01_S01_1      119.992       157.302        74.997         0.00784   
1  phon_R01_S01_2      122.400       148.650       113.819         0.00968   
2  phon_R01_S01_3      116.682       131.111       111.555         0.01050   
3  phon_R01_S01_4      116.676       137.871       111.366         0.00997   
4  phon_R01_S01_5      116.014       141.781       110.655         0.01284   

   MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  ...  \
0           0.00007   0.00370   0.00554     0.01109       0.04374  ...   
1           0.00008   0.00465   0.00696     0.01394       0.06134  ...   
2           0.00009   0.00544   0.00781     0.01633       0.05233  ...   
3           0.00009   0.00502   0.00698     0.01505       0.05492  ...   
4           0.00011   0.00655   0.00908     0.01966       0.06425  ...   

   Shimmer:DDA      NHR     HNR  status      RPDE       DFA   spread1  \
0      0.0654

### **Splitting the Features and Target**

In [44]:
X = parkinsons_data.drop(columns=['name', 'status'], axis=1)
Y = parkinsons_data['status']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)
print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")



Training data shape: (156, 22)
Test data shape: (39, 22)


### **Splitting the Data into Training Data & Test Data**

In [45]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### **Model Training and Evaluation**

In [46]:
models = {
    'Support Vector Machine': svm.SVC(kernel='linear', probability=True),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier()
}

results = {}

In [47]:
for model_name, model in models.items():
    # Training the model
    model.fit(X_train, Y_train)

    # Predicting on training and test data
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    test_probabilities = model.predict_proba(X_test)[:, 1]


In [49]:
 # Calculating metrics
accuracy = accuracy_score(Y_test, test_predictions)
precision = precision_score(Y_test, test_predictions)
recall = recall_score(Y_test, test_predictions)
f1 = f1_score(Y_test, test_predictions)
roc_auc = roc_auc_score(Y_test, test_probabilities)

# Storing results
results[model_name] = {
'Accuracy': accuracy,
'Precision': precision,
'Recall': recall,
'F1 Score': f1,
'ROC AUC': roc_auc
}

# Printing classification report
print(f"\n{model_name} Classification Report:")
print(classification_report(Y_test, test_predictions))

# Displaying the comparison of models
results_df = pd.DataFrame(results).T
print("\nModel Comparison:")
print(results_df)



Decision Tree Classification Report:
              precision    recall  f1-score   support

           0       0.44      0.88      0.58         8
           1       0.96      0.71      0.81        31

    accuracy                           0.74        39
   macro avg       0.70      0.79      0.70        39
weighted avg       0.85      0.74      0.77        39


Model Comparison:
               Accuracy  F1 Score  Precision   ROC AUC    Recall
Decision Tree   0.74359  0.814815   0.956522  0.792339  0.709677


In [51]:
# Model Training & Evaluation
models = {
    'Support Vector Machine': svm.SVC(kernel='linear', probability=True),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier()
}

results = {}

for model_name, model in models.items():
    # Training the model
    model.fit(X_train, Y_train)

    # Predicting on test data
    test_predictions = model.predict(X_test)
    test_probabilities = model.predict_proba(X_test)[:, 1]

    # Calculating metrics
    accuracy = accuracy_score(Y_test, test_predictions)
    precision = precision_score(Y_test, test_predictions)
    recall = recall_score(Y_test, test_predictions)
    f1 = f1_score(Y_test, test_predictions)
    roc_auc = roc_auc_score(Y_test, test_probabilities)

    # Storing results
    results[model_name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'ROC AUC': roc_auc
    }


# Displaying the comparison of models
results_df = pd.DataFrame(results).T
print("\nModel Comparison:")
print(results_df)



Model Comparison:
                        Accuracy  Precision    Recall  F1 Score   ROC AUC
Support Vector Machine  0.871795   0.906250  0.935484  0.920635  0.818548
Random Forest           0.820513   0.900000  0.870968  0.885246  0.915323
Gradient Boosting       0.820513   0.928571  0.838710  0.881356  0.919355
K-Nearest Neighbors     0.769231   0.923077  0.774194  0.842105  0.897177
Logistic Regression     0.820513   0.900000  0.870968  0.885246  0.834677
Decision Tree           0.769231   0.958333  0.741935  0.836364  0.808468


## MODELS COMPARISON

In [54]:
# Displaying the comparison of models
results_df = pd.DataFrame(results).T
print("\nModel Comparison:")
print(results_df)


Model Comparison:
                        Accuracy  Precision    Recall  F1 Score   ROC AUC
Support Vector Machine  0.906250   0.906250  0.935484  0.920635  0.818548
Random Forest           0.820513   0.900000  0.870968  0.885246  0.915323
Gradient Boosting       0.820513   0.928571  0.838710  0.881356  0.919355
K-Nearest Neighbors     0.769231   0.923077  0.774194  0.842105  0.897177
Logistic Regression     0.820513   0.900000  0.870968  0.885246  0.834677
Decision Tree           0.769231   0.958333  0.741935  0.836364  0.808468


### **Building a Predictive System**

In [52]:
input_data = (197.07600, 206.89600, 192.05500, 0.00289, 0.00001, 0.00166, 0.00168, 0.00498, 0.01098, 0.09700, 0.00563, 0.00680, 0.00802, 0.01689, 0.00339, 26.77500, 0.422229, 0.741367, -7.348300, 0.177551, 1.743867, 0.085569)

# Changing input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# Reshaping the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

# Using the best model (replace 'Support Vector Machine' with the best model based on your results)
best_model_name = 'Support Vector Machine'
best_model = models[best_model_name]

# Making a prediction
prediction = best_model.predict(input_data_reshaped)
print("\nPrediction for input data with", best_model_name, ":")
if prediction[0] == 0:
    print("The Person does not have Parkinson's Disease")
else:
    print("The Person has Parkinson's Disease")



Prediction for input data with Support Vector Machine :
The Person does not have Parkinson's Disease


In [53]:
# Save the trained model
filename = 'parkinsons_model.sav'
pickle.dump(model, open(filename, 'wb'))

# Load the saved model
loaded_model = pickle.load(open('parkinsons_model.sav', 'rb'))
print("\nLoaded Model Columns:")
for column in X.columns:
    print(column)



Loaded Model Columns:
MDVP:Fo(Hz)
MDVP:Fhi(Hz)
MDVP:Flo(Hz)
MDVP:Jitter(%)
MDVP:Jitter(Abs)
MDVP:RAP
MDVP:PPQ
Jitter:DDP
MDVP:Shimmer
MDVP:Shimmer(dB)
Shimmer:APQ3
Shimmer:APQ5
MDVP:APQ
Shimmer:DDA
NHR
HNR
RPDE
DFA
spread1
spread2
D2
PPE


# **Conclusion**
This script covers the entire workflow from data loading to model evaluation and comparison. You can further refine the evaluation by using cross-validation or other techniques, but this script should give you a comprehensive start for your analysis.