In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from google.colab import drive
drive.mount('/gdrive')
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.preprocessing import StandardScaler

In [3]:
data = pd.read_csv('/gdrive/My Drive/Parkinson_disease.csv', delimiter=",")
data.head()

Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,phon_R01_S01_1,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,...,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,phon_R01_S01_2,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,...,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
2,phon_R01_S01_3,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,...,0.0827,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,phon_R01_S01_4,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,...,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,phon_R01_S01_5,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,...,0.1047,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335


In [4]:
data = data.drop("name", axis=1)
data.head()

Unnamed: 0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,0.426,...,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,0.626,...,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
2,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,0.482,...,0.0827,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,0.517,...,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,0.584,...,0.1047,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335


In [5]:
data.shape

(195, 23)

In [6]:
X = data.drop('status', axis=1)
y = data['status']

In [7]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [8]:
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

In [9]:
!pip install -U keras-tuner



In [10]:
import keras_tuner as kt

In [11]:
def create_model(optimizer='rmsprop', dropout_rate=0.5, filters=32, kernel_size=3, pool_size=2):
    model = Sequential([
        Conv1D(32, kernel_size, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
        MaxPooling1D(pool_size),
        Conv1D(32*2, kernel_size, activation='relu'),
        MaxPooling1D(pool_size),
        Conv1D(32*4, kernel_size, activation='relu'),
        MaxPooling1D(pool_size),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid')  # Binary classification
    ])
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [12]:
input_shape=(X_train.shape[1], X_train.shape[2])

In [13]:
len(input_shape)
print(input_shape)

(22, 1)


In [14]:
model = create_model()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 20, 32)            128       
                                                                 
 max_pooling1d (MaxPooling1  (None, 10, 32)            0         
 D)                                                              
                                                                 
 conv1d_1 (Conv1D)           (None, 8, 64)             6208      
                                                                 
 max_pooling1d_1 (MaxPoolin  (None, 4, 64)             0         
 g1D)                                                            
                                                                 
 conv1d_2 (Conv1D)           (None, 2, 128)            24704     
                                                                 
 max_pooling1d_2 (MaxPoolin  (None, 1, 128)            0

In [15]:
model.fit(X_train, y_train)
y_pred=model.predict(X_test)



In [16]:
X_train.shape

(156, 22, 1)

In [17]:
X_test.shape

(39, 22, 1)

In [18]:
# Evaluate the best model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)

# Print the accuracy
print("Test Accuracy:", test_accuracy)


Test Accuracy: 0.8205128312110901


In [19]:
newinput = np.array([[274.688, 240.005, 274.688, 240.005, 174.287, 0.01360, 0.01008, 0.00624, 0.00564, 0.01873, 1.02308, 0.256, 0.51268, 0.01365, 0.81667, 0.63804, 0.10715, 6.883, 0.607567, 0.158453, 3.679772, 0.131728]])
newinput = newinput.reshape((1, 22, 1))

In [20]:
output = model.predict(newinput)

if output == 1.0:
  output = 1
  print("status:",output)
else:
  print("status:",output[0])

if output > 0.9:
    print("Result - Positive")

else:
    print("Result - Negative")

status: 1
Result - Positive


In [21]:
custom_input = np.array([[60.0, 20.0, 60.0, 20.0, 8.0, 0.002, 0.0015, 0.001, 0.001, 0.006, 0.5, 0.03, 0.07, 0.002, 0.4, 0.2, 0.008, 2.5, 0.3, 0.05, 1.0, 0.008]])
custom_input = custom_input.reshape((1, 22, 1))

In [22]:
output = model.predict(custom_input)
if output == 1.0:
  output = 1
  print("status:",output)
else:
  print("status:",output[0])

if output > 0.9:
    print("Result - Positive")

elif 0.7 < output < 0.9:
    print("On borderline. Needs to be checked throughly")

else:
    print("Result - Negative")

status: [0.88421]
On borderline. Needs to be checked throughly


# **Hyperparameter Tuning**

*   We find which parameters are best for our model.

*   Finding which features are important and how they may impact.

In [23]:
data.corr()['status']  # for feature importance

MDVP:Fo(Hz)        -0.383535
MDVP:Fhi(Hz)       -0.166136
MDVP:Flo(Hz)       -0.380200
MDVP:Jitter(%)      0.278220
MDVP:Jitter(Abs)    0.338653
MDVP:RAP            0.266668
MDVP:PPQ            0.288698
Jitter:DDP          0.266646
MDVP:Shimmer        0.367430
MDVP:Shimmer(dB)    0.350697
Shimmer:APQ3        0.347617
Shimmer:APQ5        0.351148
MDVP:APQ            0.364316
Shimmer:DDA         0.347608
NHR                 0.189429
HNR                -0.361515
status              1.000000
RPDE                0.308567
DFA                 0.231739
spread1             0.564838
spread2             0.454842
D2                  0.340232
PPE                 0.531039
Name: status, dtype: float64

In [24]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier()
rf_model.fit(X, y)
feature_importances = pd.DataFrame(rf_model.feature_importances_, index=X.columns, columns=['Importance'])

feature_importances = feature_importances.sort_values(by='Importance', ascending=False)

print(feature_importances)

                  Importance
PPE                 0.125090
spread1             0.117491
MDVP:Fo(Hz)         0.107116
spread2             0.067706
MDVP:Fhi(Hz)        0.056014
MDVP:Flo(Hz)        0.050057
NHR                 0.045607
D2                  0.045466
MDVP:APQ            0.037110
Jitter:DDP          0.036004
Shimmer:APQ5        0.033772
MDVP:PPQ            0.032483
RPDE                0.031398
MDVP:RAP            0.031316
HNR                 0.028700
Shimmer:DDA         0.027463
DFA                 0.024296
MDVP:Shimmer        0.023956
Shimmer:APQ3        0.022806
MDVP:Jitter(%)      0.019159
MDVP:Shimmer(dB)    0.018804
MDVP:Jitter(Abs)    0.018184


In [25]:
from sklearn.feature_selection import SelectKBest, f_classif

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize SelectKBest with f_classif scoring function
selector = SelectKBest(score_func=f_classif)

# Fit selector to training data
selector.fit(x_train, y_train)

# Get indices of the selected features
selected_indices = selector.get_support(indices=True)

# Get the names of the selected features
selected_features = X.columns[selected_indices]
print(selected_features)

# Select the best features from the original dataset
x_train_selected = x_train[selected_features]
x_test_selected = x_test[selected_features]

# Train a model with the selected features
model = RandomForestClassifier()  # Example classifier
model.fit(x_train_selected, y_train)

# Evaluate the model
accuracy = model.score(x_test_selected, y_test)
print("Accuracy with selected features:", accuracy)

Index(['MDVP:Fo(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Shimmer', 'Shimmer:APQ5',
       'MDVP:APQ', 'HNR', 'spread1', 'spread2', 'D2', 'PPE'],
      dtype='object')
Accuracy with selected features: 0.9487179487179487


In [26]:
X_train.shape

(156, 22, 1)

In [27]:
X_test.shape

(39, 22, 1)

### None of the columns are skewed.

In [28]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

In [29]:
optimizer='rmsprop'
dropout_rate=0.5
filters=32
kernel_size=3
pool_size=2
model_next = Sequential([
        Conv1D(filters, kernel_size, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
        MaxPooling1D(pool_size),
        Conv1D(filters*2, kernel_size, activation='relu'),
        MaxPooling1D(pool_size),
        Conv1D(filters*4, kernel_size, activation='relu'),
        MaxPooling1D(pool_size),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid')  # Binary classification
    ])

In [30]:
# Define the hyperparameters search space
def build_model(hp):
    optimizer = hp.Choice('optimizer', values=['rmsprop', 'adam', 'sgd', 'adadelta', 'adagrad', 'adamax', 'nadam', 'ftrl'])
    model_next.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model_next

# Instantiate the tuner
tuner = kt.Hyperband(build_model, objective='val_accuracy', max_epochs=100, factor=3, directory='my_dir', project_name='my_project')

tuner.search(X_train, y_train, epochs=100, validation_data=(X_test, y_test))

Trial 8 Complete [00h 00m 04s]
val_accuracy: 0.8205128312110901

Best val_accuracy So Far: 0.8974359035491943
Total elapsed time: 00h 00m 21s


In [31]:
best_model = tuner.get_best_models(num_models=1)[0]

best_model.fit(X_train, y_train, batch_size=32, epochs=100, initial_epoch=7, validation_data=(X_test, y_test))

Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
E

<keras.src.callbacks.History at 0x7863edd5e890>

In [32]:
best_hyperparameters = tuner.get_best_hyperparameters()[0]
print("Best Hyperparameters:")
print(best_hyperparameters.values)

Best Hyperparameters:
{'optimizer': 'rmsprop', 'tuner/epochs': 2, 'tuner/initial_epoch': 0, 'tuner/bracket': 4, 'tuner/round': 0}


In [33]:
# Evaluate the best model on the test data
test_loss, test_accuracy = best_model.evaluate(X_test, y_test)

# Print the accuracy
print("Test Accuracy:", test_accuracy)

Test Accuracy: 0.9230769276618958


In [34]:
custom_input = np.array([[60.0, 20.0, 60.0, 20.0, 8.0, 0.002, 0.0015, 0.001, 0.001, 0.006, 0.5, 0.03, 0.07, 0.002, 0.4, 0.2, 0.008, 2.5, 0.3, 0.05, 1.0, 0.008]])
custom_input = custom_input + 16.2
print(custom_input)

[[76.2    36.2    76.2    36.2    24.2    16.202  16.2015 16.201  16.201
  16.206  16.7    16.23   16.27   16.202  16.6    16.4    16.208  18.7
  16.5    16.25   17.2    16.208 ]]


In [35]:
custom_input = custom_input.reshape((1, 22, 1))

In [36]:
output = best_model.predict(custom_input)
print("status:",output[0])

if output > 0.9:
    print("Result - Positive")

elif 0.7 < output < 0.9:
    print("On borderline. Needs to be checked throughly")

else:
    print("Result - Negative")

status: [1.]
Result - Positive


In [37]:
newinput = np.array([[274.688, 240.005, 274.688, 240.005, 174.287, 0.01360, 0.01008, 0.00624, 0.00564, 0.01873, 1.02308, 0.256, 0.51268, 0.01365, 0.81667, 0.63804, 0.10715, 6.883, 0.607567, 0.158453, 3.679772, 0.131728]])
newinput = newinput.reshape((1, 22, 1))

In [38]:
output = best_model.predict(newinput)
print("status:",output[0])

if output > 0.9:
    print("Result - Positive")

elif 0.7 < output < 0.9:
    print("On borderline. Needs to be checked throughly")

else:
    print("Result - Negative")

status: [0.]
Result - Negative
