In [3]:
import os
import pandas as pd
import warnings

# ignore warnings
warnings.filterwarnings('ignore')

# define directory path and file format
dir_path = '../dataset2_filtered/'
file_format = 'txt'

# define column names to calculate coefficient of variation
col_names = ['ACC ML [g]', 'ACC AP [g]', 'ACC SI [g]', 'GYR ML [deg/s]', 'GYR AP [deg/s]', 'GYR SI [deg/s]']

# create an empty dataframe to store the results
result_df = pd.DataFrame(columns=['Entry', 'ACC ML [g]', 'ACC AP [g]', 'ACC SI [g]', 'GYR ML [deg/s]', 'GYR AP [deg/s]', 'GYR SI [deg/s]', 'label'])

# iterate through files in directory
for file in os.listdir(dir_path):
    if file.endswith(file_format):
        # read file into pandas dataframe
        df = pd.read_csv(os.path.join(dir_path, file), delimiter='\t')
        # calculate coefficient of variation for selected columns
        cv = df[col_names].std() / df[col_names].mean()
        # add label based on freezing event flag
        if 1 in df['Freezing event [flag]'].values:
            label = 'Pt'
        else:
            label = 'Co'
        # get filename without file extension
        filename = os.path.splitext(file)[0]
        # add result to dataframe
        result_df = result_df.append({'filename': filename, 
                                      'ACC ML [g]': cv['ACC ML [g]'], 
                                      'ACC AP [g]': cv['ACC AP [g]'], 
                                      'ACC SI [g]': cv['ACC SI [g]'], 
                                      'GYR ML [deg/s]': cv['GYR ML [deg/s]'], 
                                      'GYR AP [deg/s]': cv['GYR AP [deg/s]'], 
                                      'GYR SI [deg/s]': cv['GYR SI [deg/s]'], 
                                      'label': label}, ignore_index=True)

# save results to file
result_df.to_csv('../dataset2_filtered/processed/Parkinson_CV_dataset2.tab', sep='\t', index=False)


### Frequency domain transformation

In [4]:
import os
import pandas as pd
import numpy as np

# define directory path and file format
dir_path = '../dataset2_filtered/'
file_format = 'txt'

# define function for frequency domain transformation
def frequency_domain_transformation(data):
    # apply fast fourier transform
    fft_data = np.fft.fft(data)
    # take absolute value of complex numbers to get magnitude
    abs_fft_data = np.abs(fft_data)
    # take square of magnitude and divide by length of data to get power spectral density
    psd_data = (abs_fft_data ** 2) / len(data)
    # take positive frequencies only (first half of data)
    psd_data = psd_data[:len(data)//2]
    return psd_data

# create empty dataframe to store results
result = pd.DataFrame(columns=['Entry', 'ACC ML [g]', 'ACC AP [g]', 'ACC SI [g]', 'GYR ML [deg/s]', 'GYR AP [deg/s]', 'GYR SI [deg/s]'])

# iterate through files in directory
for filename in os.listdir(dir_path):
    if filename.endswith(file_format):
        # read file and select relevant columns
        file_path = os.path.join(dir_path, filename)
        data = pd.read_csv(file_path, delimiter='\t')
        data = data[['ACC ML [g]', 'ACC AP [g]', 'ACC SI [g]', 'GYR ML [deg/s]', 'GYR AP [deg/s]', 'GYR SI [deg/s]']]
        # apply frequency domain transformation to selected columns
        fd_data = data.apply(frequency_domain_transformation)
        # add entry to results dataframe
        entry = filename[:-4]
        fd_row = {'Entry': entry}
        for column in fd_data:
            fd_row[column + '_FD'] = fd_data[column].values.tolist()
        result = result.append(fd_row, ignore_index=True)

# save results dataframe as tab-separated file
result.to_csv('../dataset2_filtered/processed/Parkinson_FD.tab', sep='\t', index=False)
