# Classifications with ML

In [25]:
# Import dependencies
import pandas as pd

# Read CSV file into a pandas dataframe

# File paths
path = 'Data\parkinsons.data'

# Using pd.read_csv() method to read the data into a pandas dataframe
classification_df = pd.read_csv(path)

# Print the first 5 rows of the dataframe
print(classification_df.head())

             name  MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
0  phon_R01_S01_1      119.992       157.302        74.997         0.00784   
1  phon_R01_S01_2      122.400       148.650       113.819         0.00968   
2  phon_R01_S01_3      116.682       131.111       111.555         0.01050   
3  phon_R01_S01_4      116.676       137.871       111.366         0.00997   
4  phon_R01_S01_5      116.014       141.781       110.655         0.01284   

   MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  ...  \
0           0.00007   0.00370   0.00554     0.01109       0.04374  ...   
1           0.00008   0.00465   0.00696     0.01394       0.06134  ...   
2           0.00009   0.00544   0.00781     0.01633       0.05233  ...   
3           0.00009   0.00502   0.00698     0.01505       0.05492  ...   
4           0.00011   0.00655   0.00908     0.01966       0.06425  ...   

   Shimmer:DDA      NHR     HNR  status      RPDE       DFA   spread1  \
0      0.0654

In [26]:
# Remove name column
classification_df = classification_df.drop('name', axis=1)

In [27]:
# Build model to create classifications based on classifications_df.status (1 = Parkinsons, 0 = Healthy)

# Import dependencies
from sklearn.model_selection import train_test_split

# Create the features (X) and target (y) sets
X = classification_df.drop(columns='status')
y = classification_df[['status']]
print(X.shape)
print(y.shape)

(195, 22)
(195, 1)


In [28]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Create a StandardScaler instance
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [29]:
# Create a LogisticRegression model
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs', random_state=1)

# Train the model
classifier.fit(X_train_scaled, y_train)

# Evaluate the model
print(f'Training Data Score: {classifier.score(X_train_scaled, y_train)}')

Training Data Score: 0.8493150684931506


  y = column_or_1d(y, warn=True)


In [30]:
# Make predictions
predictions = classifier.predict(X_test_scaled)
results = pd.DataFrame({
    'Prediction': predictions,
    'Actual': y_test['status']
}).reset_index(drop=True)

# Print the first 5 rows of the results
print(results.head())

   Prediction  Actual
0           1       1
1           1       1
2           1       1
3           1       0
4           1       1


In [31]:
# Print the accuracy score
from sklearn.metrics import accuracy_score

# Store prediction and accuracy scores for logistic regression
logistic_regression_prediction = accuracy_score(y_test['status'], predictions)
logistic_regression_accuracy = accuracy_score(y_test['status'], predictions)

print(accuracy_score(y_test['status'], predictions))

0.8571428571428571


In [32]:
# Print the confusion matrix
from sklearn.metrics import confusion_matrix

print(confusion_matrix(y_test['status'], predictions))

[[ 8  4]
 [ 3 34]]


In [33]:
# Print the classification report
from sklearn.metrics import classification_report

print(classification_report(y_test['status'], predictions))

              precision    recall  f1-score   support

           0       0.73      0.67      0.70        12
           1       0.89      0.92      0.91        37

    accuracy                           0.86        49
   macro avg       0.81      0.79      0.80        49
weighted avg       0.85      0.86      0.85        49



In [34]:
# Create a random forest classifier
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fit the model
rf_model = rf_model.fit(X_train_scaled, y_train)

  return fit_method(estimator, *args, **kwargs)


In [35]:
# Evaluate the model
print(f'Training Data Score: {rf_model.score(X_train_scaled, y_train)}')
print(f'Testing Data Score: {rf_model.score(X_test_scaled, y_test)}')

Training Data Score: 1.0
Testing Data Score: 0.8979591836734694


In [36]:
# Make predictions
predictions = rf_model.predict(X_test_scaled)
results = pd.DataFrame({
    'Prediction': predictions,
    'Actual': y_test['status']
}).reset_index(drop=True)

In [37]:
# Print the first 5 rows of the results
print(results.head())

   Prediction  Actual
0           1       1
1           1       1
2           1       1
3           0       0
4           1       1


In [38]:
# Store the rf_model predictions and accuracy score
rf_predictions = predictions
rf_acc_score = accuracy_score(y_test['status'], predictions)

# Print the accuracy score
print(accuracy_score(y_test['status'], predictions))

0.8979591836734694


In [39]:
# Print the confusion matrix
print(confusion_matrix(y_test['status'], predictions))

[[ 8  4]
 [ 1 36]]


In [40]:
# Print the classification report
print(classification_report(y_test['status'], predictions))

              precision    recall  f1-score   support

           0       0.89      0.67      0.76        12
           1       0.90      0.97      0.94        37

    accuracy                           0.90        49
   macro avg       0.89      0.82      0.85        49
weighted avg       0.90      0.90      0.89        49



In [41]:
# Create a support vector machine classifier
from sklearn.svm import SVC

# Create the SVM model
svm_model = SVC(kernel='linear')

# Train the model
svm_model.fit(X_train_scaled, y_train)

# Evaluate the model
print(f'Training Data Score: {svm_model.score(X_train_scaled, y_train)}')



Training Data Score: 0.8767123287671232


  y = column_or_1d(y, warn=True)


In [42]:
# Make predictions
predictions = svm_model.predict(X_test_scaled)
results = pd.DataFrame({
    'Prediction': predictions,
    'Actual': y_test['status']
}).reset_index(drop=True)

# Print the first 5 rows of the results
print(results.head())

   Prediction  Actual
0           1       1
1           1       1
2           1       1
3           1       0
4           1       1


In [43]:
# Store the SVM model predictions and accuracy score
svm_predictions = predictions
svm_acc_score = accuracy_score(y_test['status'], predictions)

# Print the accuracy score
print(accuracy_score(y_test['status'], predictions))

0.8775510204081632


In [44]:
# Print the confusion matrix
print(confusion_matrix(y_test['status'], predictions))

[[ 8  4]
 [ 2 35]]


In [45]:
# Print the classification report
print(classification_report(y_test['status'], predictions))

              precision    recall  f1-score   support

           0       0.80      0.67      0.73        12
           1       0.90      0.95      0.92        37

    accuracy                           0.88        49
   macro avg       0.85      0.81      0.82        49
weighted avg       0.87      0.88      0.87        49



In [46]:
# Build a neural network model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense




In [47]:
# Import keras tuner to use to create a model with hyperparameters

# Import dependencies
import keras_tuner as kt

In [48]:
# Use keras tuner to find the best learning rate and parameters for the model

# Define the model
def create_nn_model(hp):
    # Create a sequential model
    nn = tf.keras.models.Sequential()

    # Add our first hidden layer where the input dimensions are the 45 columns of our dataset
    # Make Keras Tuner find the optimal number of nodes in this layer
    # Make Keras Tuner find the optimal activation function to use in this layer
    nn.add(tf.keras.layers.Dense(units=hp.Int('first_units',
                                        min_value=1,
                                        max_value=80,
                                        step=2), input_dim=43, activation=hp.Choice('first_activation', ['relu', 'tanh', 'sigmoid'])))
    
    # Make Keras Tuner find the optimal number of hidden layers to use
    # Make Keras Tuner find the optimal number of nodes to use in each hidden layer
    # Make Keras Tuner find the optimal activation function to use in each hidden layer
    for i in range(hp.Int('num_layers', 1, 5)):
        nn.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=1,
                                            max_value=80,
                                            step=2),
                               activation=hp.Choice('activation_' + str(i), ['relu', 'tanh', 'sigmoid'])))
        
    # Add our output layer
    # Make Keras Tuner find the optimal activation function to use in the output layer
    nn.add(tf.keras.layers.Dense(units=1, activation=hp.Choice('output_activation', ['relu', 'tanh', 'sigmoid'])))

    # Compile our model
    # Make Keras Tuner find the optimal learning rate to use
    nn.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])), metrics=["accuracy"])

    # Return our model
    return nn

In [None]:
# Create a Keras Tuner Hyperband tuner
tuner = kt.Hyperband(create_nn_model,
                     objective='val_accuracy',
                     max_epochs=40,
                     factor=3,
                     directory='KerasTunerTrials',
                     project_name='KerasTunerTrials')