In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score

In [2]:
# loading the data from csv file to a Pandas DataFrame
pdata = pd.read_csv('parkinsons.csv')

In [3]:
#to check target variable --> 'Status' whether patient is diagnosed with disease or not
target_variable = "status"
pdata[target_variable].value_counts()

status
1    147
0     48
Name: count, dtype: int64

In [4]:
#separating feature and target

X = pdata.drop(columns=['name','status'], axis=1)
Y = pdata['status']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Scale the features using StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

# Apply SMOTE to the scaled training data
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_scale, y_train)


In [10]:
print(X_train_smote.shape, X_train.shape, X_test.shape)

(230, 22) (156, 22) (39, 22)


In [8]:
from sklearn.ensemble import RandomForestClassifier
ran_cls = RandomForestClassifier().fit(X_train_smote,y_train_smote)

In [11]:
testing_pred = ran_cls.predict(X_test_scale)

In [13]:
train_pred = ran_cls.predict(X_train_smote)

In [15]:
from sklearn.metrics import accuracy_score, confusion_matrix
accuracy_score(y_train_smote, train_pred) #training accuracy

1.0

In [16]:
accuracy_score(y_test, testing_pred) #testing accuracy

0.9487179487179487

In [17]:
confusion_matrix(y_test, testing_pred)

array([[ 5,  2],
       [ 0, 32]], dtype=int64)

In [22]:
input_data = (197.07600,206.89600,192.05500,0.00289,0.00001,0.00166,0.00168,0.00498,0.01098,0.09700,0.00563,0.00680,0.00802,0.01689,0.00339,26.77500,0.422229,0.741367,-7.348300,0.177551,1.743867,0.085569)

# changing input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

std_data = scaler.transform(input_data_reshaped)
print(std_data)
prediction = ran_cls.predict(std_data)
print(prediction)


if (prediction[0] == 0):
  print("The Person does not have Parkinsons Disease")

else:
  print("The Person has Parkinsons")


[[ 1.0321006   0.17037105  1.73032438 -0.61870304 -0.89559516 -0.49899799
  -0.5876078  -0.49908599 -0.96372964 -0.91708521 -0.9602884  -0.89978052
  -0.91586181 -0.96030781 -0.50372579  1.01924827 -0.69937416  0.352341
  -1.43646735 -0.56855036 -1.616282   -1.25435719]]
[0]
The Person does not have Parkinsons Disease




In [18]:
import pickle

In [19]:
filename = 'randomforestmodel.sav'
pickle.dump(ran_cls,open(filename, 'wb'))

In [20]:
#loading the saved model
loaded_model = pickle.load(open('randomforestmodel.sav', 'rb'))