In [1]:
# Step 1: Import necessary libraries

# Import NumPy for numerical computations
# Import pandas for data manipulation
# Import scikit-learn modules for machine learning tasks
# Import accuracy_score for evaluating the model
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Step 2: Load and preprocess the data

# Load the dataset from 'parkinsons_data.csv' file using pandas
# Replace 'parkinsons_data.csv' with the path to your dataset file
data = pd.read_csv('parkinsons.data')

In [3]:
# Separate features and target variable
# Drop 'name' column as it is not relevant for the model
# Drop 'status' column as it represents the target variable
features = data.drop(['name', 'status'], axis=1)
target = data['status']


In [4]:
# Split the data into training and testing sets
# Use 80% of the data for training and 20% for testing
# Set random_state to ensure reproducibility of results
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


In [5]:
# Perform feature scaling on the training and testing data
# Standardize the features so that they have zero mean and unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [6]:
# Step 3: Train the machine learning model

# Initialize the RandomForestClassifier
# Set random_state for reproducibility
classifier = RandomForestClassifier(random_state=42)


In [7]:
# Train the classifier using the training data
classifier.fit(X_train, y_train)


In [8]:
# Step 4: Evaluate the model

# Make predictions on the test set
y_pred = classifier.predict(X_test)

In [9]:

# Calculate accuracy by comparing the predicted labels with the actual labels
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9487179487179487


In [10]:
# Step 5: Make predictions

# Assume `new_data` is a new set of features for prediction
# Scale the new data using the same scaler used for training data
new_data = np.array([[0.027, 0.815, 0.552, 0.809, 0.413, 0.479, 0.340, 0.370, 0.754, 0.560, 2.051, 0.347, 2.091, 0.051, 2.495, 0.095, 0.187, 0.263, 2.070, 0.142, 2.536, 0.085]])
new_data = scaler.transform(new_data)

# Make a prediction using the trained model
prediction = classifier.predict(new_data)
print("Prediction:", prediction)


Prediction: [1]


