In [305]:
import cv2
import numpy as np
import os
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
import csv

Time Decorator

In [None]:
def timeit(fn):
    def wrapper(*args, **kwargs):
        start=time()
        res=fn(*args, **kwargs)
        print(fn.__name__, "took", time()-start, "seconds.")
        return res
    return wrapper

In [319]:
# Load the feature CSV file
output_csv_path = "pd_features.csv"
def load_features(file_path):
    data = np.genfromtxt(file_path, delimiter=',', skip_header=1)
    features = data[:, 1:-1]  # Extract feature columns (skip first column for file names, last column for labels)
    labels = data[:, -1]  # The last column should contain the binary labels (0 or 1)
    return features, labels.astype(int)  # Ensure labels are integers

output_csv_path = "pd_features.csv"

features, labels = load_features(output_csv_path)
    
print (labels)

[143 317 240 315 210 208 255 363 310 323 194 370 323 325 292 278 178 246
 246 292 405 262 365 188 241]


In [308]:
# Preprocess features
scaler = StandardScaler()
features = scaler.fit_transform(features)

print(scaler)


StandardScaler()


In [309]:
print (features)

[[ 1.15844938  1.16824525  1.16457344  0.96544221  0.95679163  1.04682006
   0.8560265   1.0440808   0.61159908]
 [-0.31143177 -0.30819178 -0.31050064 -0.11653894 -0.11479099 -0.11984925
   0.04007495  0.03221609  0.00683206]
 [-0.34658811 -0.38054186 -0.3173827  -0.29616704 -0.27328537 -0.31730087
  -0.4106434  -0.35100434 -0.41273042]
 [-0.44490609 -0.46260025 -0.41900972 -0.49055247 -0.47198142 -0.52479953
  -0.60035119 -0.58465453 -0.6037477 ]
 [-0.25760195 -0.26869006 -0.21435435 -0.46673447 -0.43232942 -0.51427085
  -0.66954393 -0.61398086 -0.67128743]
 [-0.78155846 -0.76444119 -0.84372301 -0.40803822 -0.39580633 -0.43731974
  -0.42331944 -0.46058092 -0.36966991]
 [-0.71769601 -0.7268095  -0.7337055  -0.4828769  -0.46468838 -0.49841761
  -0.62842613 -0.63055934 -0.59770511]
 [-0.39483148 -0.41020247 -0.36766174 -0.5271088  -0.49551541 -0.56058335
  -0.70292322 -0.64738581 -0.70389557]
 [ 0.06605052  0.01593011  0.16628963  0.10039501 -0.02829809  0.27876124
   0.7176425   0.40840

In [310]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=7)

In [311]:
print(X_train)

[[-0.69076062 -0.68745016 -0.71606603 -0.59998521 -0.56091087 -0.65765519
  -0.85395179 -0.80571845 -0.87206148]
 [ 0.53693651  0.50841204  0.67182693  0.36563118  0.28677846  0.44835224
   0.72087691  0.61199781  0.83462957]
 [ 4.2496453   4.25585113  4.15623958  4.42080537  4.52854207  4.21182581
   2.80330526  3.52236497  1.68476036]
 [-0.09177276 -0.12016957 -0.0809773  -0.45404076 -0.39457478 -0.52947859
  -0.71286382 -0.62492565 -0.74311713]
 [ 1.15844938  1.16824525  1.16457344  0.96544221  0.95679163  1.04682006
   0.8560265   1.0440808   0.61159908]
 [-0.54215083 -0.55714278 -0.56052752 -0.44570504 -0.42758279 -0.47894108
  -0.52243003 -0.5210966  -0.51927992]
 [-0.71769601 -0.7268095  -0.7337055  -0.4828769  -0.46468838 -0.49841761
  -0.62842613 -0.63055934 -0.59770511]
 [-0.28187572 -0.19935454 -0.36993625 -0.44277996 -0.36670803 -0.53757098
  -0.68005798 -0.52156522 -0.80597949]
 [-0.17529527 -0.10222878 -0.25945755 -0.26798048 -0.22248485 -0.32763287
  -0.18441685 -0.15715

In [312]:
print(y_train)

[262 323 178 323 143 188 255 405 246 241 194 246 310 292 363 292 315 365
 210 278]


In [313]:
# Function to calculate false positive rate
def calculate_false_positive_rate(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    false_positives = cm[0, 1]
    true_negatives = cm[0, 0]
    return false_positives / (false_positives + true_negatives)

    


In [314]:
# List of classifiers
classifiers = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

## Classification ##

In [315]:
# Splitting the dataset into positive and negative cases
pos_indices = np.where(labels == 1)[0]
neg_indices = np.where(labels == 0)[0]

# Shuffle indices to ensure randomness
np.random.shuffle(pos_indices)
np.random.shuffle(neg_indices)

# Split the data into training and testing sets
train_pos = pos_indices[:-5]
test_pos = pos_indices[-5:]
train_neg = neg_indices[:-5]
test_neg = neg_indices[-5:]

# Create training and testing datasets
train_indices = np.concatenate([train_pos, train_neg])
test_indices = np.concatenate([test_pos, test_neg])

X_train = features[train_indices]
y_train = labels[train_indices]
X_test = features[test_indices]
y_test = labels[test_indices]

print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)


Training set shape: (0, 9)
Testing set shape: (0, 9)


In [316]:
def accuracy(prediction,actual):
    correct = 0
    not_correct = 0
    for i in range(len(prediction)):
        if prediction[i] == actual[i]:
            correct+=1
        else:
            not_correct+=1
    return (correct*100)/(correct+not_correct)


def metrics(prediction,actual):
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    for i in range(len(prediction)):
        if prediction[i] == actual[i] and actual[i]==1:
            tp+=1
        if prediction[i] == actual[i] and actual[i]==0:
            tn+=1
        if prediction[i] != actual[i] and actual[i]==0:
            fp+=1
        if prediction[i] != actual[i] and actual[i]==1:
            fn+=1
    metrics = {'Precision':(tp/(tp+fp+tn+fn)),'Recall':(tp/(tp+fn)),'F1':(2*(tp/(tp+fp+tn+fn))*(tp/(tp+fn)))/((tp/(tp+fp+tn+fn))+(tp/(tp+fn)))}
    return (metrics)

Logistic regression

In [320]:

clf = LogisticRegression()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
false_positive_rate = calculate_false_positive_rate(y_test, y_pred)
print(f"Logistic Regression Accuracy: {accuracy * 100:.2f}%")
print(f"Logistic Regression False Positive Rate: {false_positive_rate * 100:.2f}%\n")




ValueError: Found array with 0 sample(s) (shape=(0, 9)) while a minimum of 1 is required by LogisticRegression.

Random Forest

In [None]:
clf=RandomForestClassifier()
clf.fit(train_x, train_y)
preds=clf.predict(test_x)
print('accuracy:',accuracy(test_y.tolist(), preds.tolist()), '%')
print(metrics(test_y.tolist(), preds.tolist()))

NameError: name 'train_x' is not defined

SVM

In [None]:
clf=SVC()
clf.fit(train_x, train_y)
preds=clf.predict(test_x)
print('accuracy:',accuracy(test_y.tolist(), preds.tolist()), '%')
print(metrics(test_y.tolist(), preds.tolist()))

NameError: name 'train_x' is not defined

SVM

In [None]:
clf=KNeighborsClassifier()
clf.fit(train_x, train_y)
preds=clf.predict(test_x)
print('accuracy:',accuracy(test_y.tolist(), preds.tolist()), '%')
print(metrics(test_y.tolist(), preds.tolist()))

XGBoost

In [None]:
model = XGBClassifier()
model.fit(x_train, y_train, eval_metric='error')
y_pred = model.predict(x_test)
ccr_scr = accuracy_score(y_test, y_pred)*100
print(f"Accuracy: {ccr_scr}%")

NameError: name 'x_train' is not defined

Decision Tree

In [None]:
clf=DecisionTreeClassifier()
clf.fit(train_x, train_y)
preds=clf.predict(test_x)
print('accuracy:',accuracy(test_y.tolist(), preds.tolist()), '%')
print(metrics(test_y.tolist(), preds.tolist()))

NameError: name 'DecisionTreeClassifier' is not defined

In [None]:
# Train and evaluate models
for name, clf in classifiers.items():
    print(f"Training {name}...")
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    false_positive_rate = calculate_false_positive_rate(y_test, y_pred)

    print(f"{name} Accuracy: {accuracy * 100:.2f}%")
    print(f"{name} False Positive Rate: {false_positive_rate * 100:.2f}%\n")



Training Logistic Regression...
Logistic Regression Accuracy: 0.00%
Logistic Regression False Positive Rate: nan%

Training Random Forest...
Random Forest Accuracy: 0.00%
Random Forest False Positive Rate: nan%

Training K-Nearest Neighbors...
K-Nearest Neighbors Accuracy: 0.00%
K-Nearest Neighbors False Positive Rate: nan%

Training Support Vector Machine...
Support Vector Machine Accuracy: 0.00%
Support Vector Machine False Positive Rate: nan%

Training XGBoost...


  return false_positives / (false_positives + true_negatives)
  return false_positives / (false_positives + true_negatives)
  return false_positives / (false_positives + true_negatives)
  return false_positives / (false_positives + true_negatives)


ValueError: Invalid classes inferred from unique values of `y`.  Expected: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14], got [194 208 210 240 241 246 255 262 278 292 315 323 363 365 405]

### Visualization ###

In [None]:
def plot(f, plot_func, t_id=0, x=None, y=None):
    global header_row
    df=pd.read_csv(f, sep=';', header=None, names=header_row)
    df=df[df["Test_ID"]==t_id]
    initial_timestamp=df['Timestamp'][0]
    df['Timestamp']=df['Timestamp']- initial_timestamp
    plot_func(data=df, x=x, y=y, fit_reg=False, scatter_kws={"s": 0.5})
    print(metrics(test_y.tolist(), preds.tolist()))

Pressure

In [None]:
plot(f=parkinson_file_list[35],  plot_func=sns.regplot, t_id=0, x='Timestamp', y='Pressure')

NameError: name 'plot' is not defined

Pressure (Healthy Person)

In [None]:
plot(control_file_list[1], plot_func=sns.regplot, t_id=0, x='Timestamp', y='Pressure')

NameError: name 'plot' is not defined

In [None]:
plot(f=parkinson_file_list[35],  plot_func=sns.barplot, t_id=0, x='Timestamp', y='Pressure')

NameError: name 'plot' is not defined

In [None]:
plot(control_file_list[1], plot_func=sns.regplot, t_id=0, x='Timestamp', y='Pressure')

NameError: name 'plot' is not defined