In [1]:
import pandas as pd
import matplotlib # import for plots
import matplotlib.pyplot as plt # import for plots
import numpy as np
from scipy.signal import savgol_filter
import json
import os


# folder path
dir_path = 'ktbl' #folder with raw data
span=1.6 # span to be cut around feature center optimal about 1.6 s, too short looses information too long overlaps features
p_number=50 # number of points in the data after interpolation from original span*400(Hz)
interim_path='ktbl\\interim\\' # path to store interim data - cut, filtered, resampled

# list to store files
folder_content = []

# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        folder_content.append(path)
        #print(folder_content)

# select data files by *.csv extension
list_time_series=list(filter(lambda filename: filename[-4:]=='.csv', folder_content))
# select label files by *.lbl extension
list_feature_labels=list(filter(lambda filename: filename[-4:]=='.lbl', folder_content))

print(list_time_series) # print list of discovered files
print(list_feature_labels) # print list of discovered files


['ktbl_1.csv', 'ktbl_10.csv', 'ktbl_11.csv', 'ktbl_12.csv', 'ktbl_13.csv', 'ktbl_14.csv', 'ktbl_15.csv', 'ktbl_2.csv', 'ktbl_3.csv', 'ktbl_4.csv', 'ktbl_5.csv', 'ktbl_6.csv', 'ktbl_7.csv', 'ktbl_8.csv', 'ktbl_9.csv']
['ktbl_1.lbl', 'ktbl_10.lbl', 'ktbl_11.lbl', 'ktbl_12.lbl', 'ktbl_13.lbl', 'ktbl_14.lbl', 'ktbl_15.lbl', 'ktbl_3.lbl', 'ktbl_4.lbl', 'ktbl_5.lbl', 'ktbl_6.lbl', 'ktbl_7.lbl', 'ktbl_8.lbl', 'ktbl_9.lbl']


In [2]:

xvals = np.linspace(0, span, p_number) # time axis values for resampling

for time_series_data in list_time_series: # loop through all time series data
    labels=time_series_data[:-4]+'.lbl' # replace *.csv by *.lbl
    
    if labels in list_feature_labels: # check whather file with data labels exists
        time_series_df = pd.read_csv(dir_path+'\\'+time_series_data) # read time series data into a dataframe
        time_series_df['rel_t']=time_series_df['time']-time_series_df['time'].values[0] # convert dataframe to relarive time
        #print(time_series_df.columns)
        labels_df=pd.read_csv(dir_path+'\\'+labels) # read labels into a dataframe
        for i in np.arange(len(labels_df)): # for each labeled feature
            #print(i)
            center=labels_df['time'][i] #read feature center coordinate
            # cut data around the feature center within the given span
            cut_df=time_series_df[(time_series_df['rel_t']>(center-span/2)) & (time_series_df['rel_t']<(center+span/2))].copy()
            # reset cut dataframe relative time to start from zero
            cut_df['rel_t']=cut_df['rel_t']-cut_df['rel_t'].values[0]
            # apply smoothing filter to feature variables
            
            filtered_TgF=savgol_filter( cut_df['TgF'].values, 101, 3)
            filtered_gFy=savgol_filter( cut_df['gFy'].values, 101, 3)
            filtered_gFz=savgol_filter( cut_df['gFz'].values, 101, 3)
            # interpolate filtered data to lower resolution
            interpolated_TgF = np.interp(xvals, cut_df['rel_t'].values,filtered_TgF )
            interpolated_gFy = np.interp(xvals, cut_df['rel_t'].values,filtered_gFy )
            interpolated_gFz = np.interp(xvals, cut_df['rel_t'].values,filtered_gFz )
            """
            #interpolate unfiltered values
            interpolated_unfiltered_TgF = np.interp(xvals, cut_df['rel_t'].values,cut_df['TgF'].values )
            interpolated_unfiltered_gFy = np.interp(xvals, cut_df['rel_t'].values,cut_df['gFy'].values )
            interpolated_unfiltered_gFz = np.interp(xvals, cut_df['rel_t'].values,cut_df['gFz'].values )
            """
           
            #create result dataframe
            export_df=pd.DataFrame(list(zip(xvals, interpolated_TgF,interpolated_gFy, interpolated_gFz )),columns =['rel_t', 'TgF', 'gFy', 'gFz'])
            
            #export_df_unfiltered=pd.DataFrame(list(zip(xvals, interpolated_unfiltered_TgF,interpolated_unfiltered_gFy, interpolated_unfiltered_gFz )),columns =['rel_t', 'TgF', 'gFy', 'gFz'])
            #generate export dataframe name using original time series name, feature number, and feature label
            export_name=time_series_data[:-4]+'_'+str(i)+'_'+labels_df['label'][i]+'.csv'
            
            #export_name_unfiltered=time_series_data[:-4]+'_'+str(i)+'_'+'unfiltered'+'_'+labels_df['label'][i]+'.unf'
            # export filtered and interpolated single-feature file into interim data folder
            export_df.to_csv(interim_path+export_name, index=False)
            
            #export_df_unfiltered.to_csv(interim_path+export_name_unfiltered, index=False)
            
            

            
            

In [3]:


# function making a list of all files with certain feature label in the interim data folder
def select_files_by_label(selected_label):
    interim_folder_content = [] # empty list from previous runs
    selected_label_file_list=[] # empty list from previous runs
    for path in os.listdir(interim_path): # check files in the interim data folder
    # check if current path is a file
        if os.path.isfile(os.path.join(interim_path, path)): # check if path is a file
            interim_folder_content.append(path) # add files to the file list
    # print number of files in the interim data folder
    #print(len(interim_folder_content))
    for file_item in interim_folder_content: # loop over files in the folder
        name_decomp=file_item.split('_') # split file names to look for labels
        if name_decomp[-1][:-4]==selected_label: # for mathing names
            selected_label_file_list.append(file_item) # add matrhing name to the list
    return selected_label_file_list # return list with all matches

swing_files=select_files_by_label('swing') # select all of the type
jerk_files=select_files_by_label('jerk')
snatch_files=select_files_by_label('snatch')

#check whether all are covered
print(len(swing_files), len(jerk_files), len(snatch_files),len(swing_files)+ len(jerk_files)+ len(snatch_files) )

def get_mean(selected_label):
    mean_TgF=np.zeros(p_number)
    mean_gFy=np.zeros(p_number)
    mean_gFz=np.zeros(p_number)
    selected_label_list=select_files_by_label(selected_label)
    for feature_file in selected_label_list:
        current_df=pd.read_csv(interim_path+feature_file)
        mean_TgF=mean_TgF+current_df['TgF'].values
        mean_gFy=mean_gFy+current_df['gFy'].values
        mean_gFz=mean_gFz+current_df['gFz'].values
    
    mean_TgF=mean_TgF/len(selected_label_list)
    mean_gFy=mean_gFy/len(selected_label_list)
    mean_gFz=mean_gFz/len(selected_label_list)
    
    mean_df=pd.DataFrame(list(zip(xvals, mean_TgF,mean_gFy, mean_gFz )), columns =['rel_t', 'TgF', 'gFy', 'gFz'])
    export_name=selected_label+'_'+'mean'+'.csv'
    mean_df.to_csv(interim_path+export_name, index=False)
    print(f'saved file {export_name}')
    
    return 0

get_mean('swing')
get_mean('snatch')
get_mean('jerk')
    



80 251 72 403
saved file swing_mean.csv
saved file snatch_mean.csv
saved file jerk_mean.csv


0

In [None]:
def DTWDistance2(s1, s2,w=10):
    DTW={}
    w = max(w, abs(len(s1)-len(s2)))
    for i in range(-1,len(s1)):
        for j in range(-1,len(s2)):
            DTW[(i, j)] = float('inf')
    DTW[(-1, -1)] = 0
    for i in range(len(s1)):
        for j in range(max(0, i-w), min(len(s2), i+w)):
            dist = (s1[i]-s2[j])**2
            DTW[(i, j)] = dist + min(DTW[(i-1, j)],DTW[(i, j-1)], DTW[(i-1, j-1)])
    return np.sqrt(DTW[len(s1)-1, len(s2)-1])

