## Transform to Frequency domain

In [1]:
import numpy as np
import os
import pandas as pd
import csv

*Walking can be seen as a periodic motion, so each patient’s data can be treated as a finite sequence of equally-space samples of some function. Therefore, the vertical reaction force time series data can be approximated using the following model and turnedinto its frequency domain using discrete Fourier transform, whereN= 8000 is the length ofthe time series and i is the imaginary unit. Since the force data are real values, we will use the modulus of onlyk= 0, 1,. . ., 3999. Therefore, the frequency domain representation ofeach patient has 16×4000 real non-negative values and will be evaluated using LogisticRegression and SVM.*

In [2]:
def transform_into_frequency_domain(time_series_data):
    # Define the number of samples in the time series
    N = 8000

    # Compute the discrete Fourier transform of the time series data using np.fft.fft()
    fft_result = np.fft.fft(time_series_data)

    # Since the force data are real values, we only use the modulus of the frequencies
    mod_fft_result = np.abs(fft_result[:N//2])

    # Keep only k = 0, 1, ..., 3999 values
    mod_fft_result = mod_fft_result[:4000]

    # Reshape the result into a 16x4000 array to represent each patient's frequency domain representation
    freq_domain_data = mod_fft_result.reshape((16, 4000))
    return freq_domain_data.tolist()[0]

    
def find_label(filename):
    label = filename.split('_')[0][2:4]
    return label

In [3]:
import csv
import os
import pandas as pd

result_directory ='../dataset/processed_data/'
directory_path = '../dataset/'
cols=['time', 'l1','l2','l3','l4','l5','l6','l7','l8','r1','r2','r3','r4','r5','r6','r7','r8','total_l','total_r']
filtered_cols = ['l1','l2','l3','l4','l5','l6','l7','l8','r1','r2','r3','r4','r5','r6','r7','r8']

with open(result_directory+'Parkinson_FD.tab', 'w', newline='') as file:
    writer = csv.writer(file, delimiter='\t')
    for filename in os.listdir(directory_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(directory_path, filename)
            df = pd.read_csv(file_path, delimiter='\t', header=None, names=cols)
            df_new = df[filtered_cols]
            if(len(df_new) > 8000):
                filtered_df = df_new.iloc[:8000, :]
                res =transform_into_frequency_domain(filtered_df)
                res.append(find_label(filename))
                writer.writerow(res)

### Logistic regression

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

class ParkinsonModel:
    def __init__(self, data_file):
        self.df = pd.read_csv(data_file, delimiter='\t')

    def preprocess_data(self):
        # Set up features and target variable
        X = self.df.iloc[:, :-1]
        y = self.df.iloc[:, -1]
        # split data into training and testing sets
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    def train_model(self):
        # create a logistic regression model
        self.logreg = LogisticRegression(max_iter=1000)
        # fit the model to the training data
        self.logreg.fit(self.X_train, self.y_train)

    def evaluate_model(self):
        # make predictions on the testing data
        y_pred = self.logreg.predict(self.X_test)
        # calculate the accuracy of the model
        self.accuracy = accuracy_score(self.y_test, y_pred)
        # print the accuracy score
        print("Accuracy of Logistic regression model: {:.2f}%".format(self.accuracy*100))

# Example usage
model = ParkinsonModel('../dataset/processed_data/Parkinson_FD.tab')
model.preprocess_data()
model.train_model()
model.evaluate_model()


Accuracy of Logistic regression model: 64.81%


### SVM

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load data
df = pd.read_csv('../dataset/processed_data/Parkinson_FD.tab', delimiter='\t')

# Set up features and target variable
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM model with a linear kernel
svm = SVC(kernel='linear')

# Fit the model to the training data
svm.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = svm.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy score
print("Accuracy of SVM model: {:.2f}%".format(accuracy*100))


Accuracy of SVM model: 66.67%
