In [1]:
import pandas as pd
import numpy as np

from ipywidgets import IntProgress
from IPython.display import display

### Create dataframe from all CSV's

In [2]:
boxing_data = {
    'path': './backend_data/boxing/',
    'repetitions': [
        {
            'number': '5',
            'files': 2
        },
        {
            'number': '10',
            'files': 9
        },
                {
            'number': '15',
            'files': 2
        },
        {
            'number': '20',
            'files': 10
        }
    ]
}
    
jumping_jacks_data = {
    'path': './backend_data/jumping_jacks/',
    'repetitions': [
        {
            'number': '5',
            'files': 20
        },
        {
            'number': '10',
            'files': 34
        },
                {
            'number': '15',
            'files': 7
        },
        {
            'number': '20',
            'files': 5
        }
    ]
}

running_data = {
    'path': './backend_data/running/',
    'repetitions': [
        {
            'number': '5',
            'files': 3
        },
        {
            'number': '10',
            'files': 1
        },
                {
            'number': '15',
            'files': 5
        },
        {
            'number': '20',
            'files': 1
        }
    ]
}

squats_data = {
    'path': './backend_data/squats/',
    'repetitions': [
        {
            'number': '5',
            'files': 11
        },
        {
            'number': '10',
            'files': 20
        },
                {
            'number': '15',
            'files': 3
        },
        {
            'number': '20',
            'files': 4
        }
    ]
}


def getFileList(data_obj):
    file_list = []
    for data in data_obj['repetitions']:
        if (data['files'] > 0):
            for file in range(data['files']):
                path = data_obj['path'] + data['number'] + "/" + str(file + 1) + ".csv"
                file_list.append(path)
    return file_list

file_list = []

file_list.append(getFileList(boxing_data))
file_list.append(getFileList(jumping_jacks_data))
file_list.append(getFileList(running_data))
file_list.append(getFileList(squats_data))

flatten_list = [item for sublist in file_list for item in sublist]

# print(flatten_list)

df_list = [pd.read_csv(file) for file in flatten_list]

dataframe = pd.concat(df_list)

activities = {
    'boxing': 0,
    'jumping_jacks': 1,
    'running': 2,
    'squats': 3
}

dataframe['activity_type'] = dataframe['activity_type'].map(activities)

print(dataframe.shape)

dataframe.head(5)

(31115, 12)


Unnamed: 0,activity_type,repetitions,time(sec),gyroscope_x(deg/sec),gyroscope_y(deg/sec),gyroscope_z(deg/sec),accelerometer_x(g),accelerometer_y(g),accelerometer_z(g),magnetometer_x(T),magnetometer_y(T),magnetometer_z(T)
0,0,10,2022-05-21T18:06:03.792,23.231709,15.670732,21.585367,0.076355,1.04425,0.022095,3.6e-05,-3.3e-05,-5e-05
1,0,10,2022-05-21T18:06:03.841,26.585367,22.621952,25.365854,0.073059,1.047424,-0.065308,3.6e-05,-3.3e-05,-4.9e-05
2,0,10,2022-05-21T18:06:03.843,34.512196,15.853659,30.792685,0.083557,1.110962,-0.065002,3.5e-05,-3.1e-05,-4.7e-05
3,0,10,2022-05-21T18:06:03.890,38.780487,7.560976,31.95122,0.001221,1.208496,-0.148193,3.4e-05,-3.4e-05,-4.7e-05
4,0,10,2022-05-21T18:06:03.938,36.036587,-45.670734,33.841465,-0.043823,1.319031,-0.209351,3.2e-05,-3.3e-05,-4.3e-05


### Transform data to windowed features 

In [3]:
def data2features(dataframe, wl, us, ws_freq):

    win_length = wl   # length of time window in ms
    undersampling = us # add every n-th sample
    win_step = win_length // ws_freq

    progress = IntProgress(min=0, max=len(dataframe)//win_step)
    display(progress)

    windowed_arr = []
    for win_end in range(win_length, len(dataframe), win_step):
        win_start = win_end - win_length
        activity = int(dataframe.iloc[win_start, 0])

        features = [activity]
        is_consistent = True
        for j in range(win_start, win_end, undersampling):
            if dataframe.iloc[j, 0] != activity:
                is_consistent = False
                break

            if j + undersampling < win_end and abs(dataframe.iloc[j, 0] - dataframe.iloc[j + undersampling, 0]) > undersampling * 0.01 + 0.001:
                is_consistent = False
                break

            features = features + list(dataframe.iloc[j, 3:12])

        if is_consistent:
            windowed_arr.append(features)

        progress.value += 1
        
    windowed_df = pd.DataFrame(windowed_arr)
    windowed_df.dropna(inplace=True)
    
    return windowed_df

In [4]:
win_length = 100
undersampling = 10
win_step_freq = 4

windowed_df = data2features(dataframe, win_length, undersampling, win_step_freq)

IntProgress(value=0, max=1244)

In [5]:
windowed_df.iloc[:, 0].value_counts()

1    769
3    360
0     85
2     15
Name: 0, dtype: int64

In [6]:
windowed_df.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,81,82,83,84,85,86,87,88,89,90
0,0,23.231709,15.670732,21.585367,0.076355,1.04425,0.022095,3.6e-05,-3.3e-05,-5e-05,...,-4.3e-05,-12.865854,-60.304878,5.0,0.049377,0.928711,0.367981,3.6e-05,-3.5e-05,-4.1e-05
1,0,-41.951221,-58.963417,-17.682928,0.066284,0.838867,0.053528,3.6e-05,-3.6e-05,-3.4e-05,...,-3.8e-05,-23.04878,-6.890244,-10.426829,0.059082,1.296509,-0.15802,3.5e-05,-3.5e-05,-4.1e-05
2,0,-9.634147,101.890244,-4.390244,0.024902,1.262085,-0.118713,3.3e-05,-3.3e-05,-4.8e-05,...,-4.1e-05,-46.463417,-53.963417,-18.109756,0.052124,0.760864,0.077332,3e-05,-3.4e-05,-3.7e-05
3,0,-47.804878,-12.256098,-0.182927,0.062622,0.752625,0.041138,3e-05,-3.3e-05,-3.6e-05,...,-4.1e-05,56.036587,66.341461,0.548781,0.144348,0.941528,0.337585,3.4e-05,-3.5e-05,-3.9e-05
4,0,45.792683,83.597565,15.670732,-0.018799,0.57428,0.092041,3.5e-05,-3.6e-05,-4e-05,...,-4e-05,9.268292,-25.365854,-0.487805,-0.033447,1.066467,-0.007202,3.8e-05,-3.5e-05,-4.2e-05


In [7]:
windowed_df.to_csv(f'./backend_data/PPWID_{win_length}_{undersampling}.csv', index=False)