In [23]:
#imports
import pandas as pd
import numpy as np
import os
import math

In [24]:
#preparing function
def prepare_data(raw_data: np.array, activity: int):
    # accepts data from segment and converts it into
    # np.array holding 16 attributes and activity
    result = np.array([])

    #maximum_x
    result = np.append(result, np.max(raw_data[:,0]))

    #minimum_x
    result = np.append(result, np.min(raw_data[:,0]))

    #entropy_x
    a, counts = np.unique(raw_data[:,0], return_counts=True)
    probabilities = counts / len(raw_data[:,0])
    entropy = -np.sum(probabilities*np.log2(probabilities))
    result = np.append(result, entropy)

    #interquartile_range_x
    left, right = np.split(np.sort(raw_data[:,0]), 2) #segments have 30 rows, so it should not raise errors
    q1 = np.median(left)
    q2 = np.median(right)
    result = np.append(result, q2 - q1)

    #maximum_y
    result = np.append(result, np.max(raw_data[:,1]))

    #minimum_index_y
    result = np.append(result, raw_data[:,1].argmin())

    #mean_absolute_deviation_y
    MAD = np.sum(np.abs(raw_data[:,1] - np.mean(raw_data[:,1]))) / len(raw_data[:,1])
    result = np.append(result, MAD)

    #median_y
    median = np.median(raw_data[:,1])
    result = np.append(result, median)

    #skewness_y
    mean = np.mean(raw_data[:,1])
    temp = raw_data[:,1] - mean
    standard_deviation = math.sqrt(np.sum(temp * temp) / len(raw_data[:,1]))
    skewness = 3 * ((mean - median) / standard_deviation)
    result = np.append(result, skewness)

    #standart_deviation_y
    result = np.append(result, standard_deviation)

    #RMS_y
    RMS = math.sqrt(np.sum(raw_data[:,1] * raw_data[:,1]) / len(raw_data[:,1]))
    result = np.append(result, RMS)

    #skewness_z
    median = np.median(raw_data[:,2])
    mean = np.mean(raw_data[:,2])
    temp = raw_data[:,2] - mean
    standard_deviation = math.sqrt(np.sum(temp * temp) / len(raw_data[:,2]))
    skewness = 3 * ((mean - median) / standard_deviation)
    result = np.append(result, skewness)

    
    
    #activity
    result = np.append(result, activity)

    return result

In [26]:
#reading and preparing
columns = ["maximum_x", "minimum_x", "entropy_x", "interquartile_range_x", "maximum_y",
           "minimum_index_y", "mean_absolute_deviation_y", "median_y", "skewness_y", "standard_deviation_y",
           "RMS_y", "skewness_z",
           "activity"]
#does not yet include magnitudes
#or now I am not adding magnitudes. The model is suspiciously precise even without them.
prepared_data = pd.DataFrame(columns=columns)
data_dirrectory = "data/"
data_inner = ["idle", "walking", "running", "stairs"]

for inner in data_inner:
    for file in os.listdir(data_dirrectory+inner):
        segment = pd.read_csv(data_dirrectory+inner+"/"+file)
        prepared_segment = prepare_data(segment.to_numpy(), inner)
        prepared_data.loc[len(prepared_data)] = prepared_segment

prepared_data.to_csv("data.csv")

['5.09965' '-0.909797' '4.5232314287976205' '0.067037' '4.616021' '2.0'
 '0.5660105955555556' '-0.13168099999999994' '0.7884279799260282'
 '1.138146568674783' '1.1503964250373577' '-1.2376021760347788' 'idle']
['0.407014' '-0.320823' '4.123231428797621' '0.04309600000000009'
 '0.0239419999999999' '7.0' '0.028411266666666664' '-0.13168099999999994'
 '0.03040358027005305' '0.04725101409898227' '0.1394512750913379'
 '-0.8047478071987767' 'idle']
['-0.062249' '-0.124498' '3.3492553971685' '0.02394199999999999'
 '0.244209' '14.0' '0.012045448888888892' '0.225055' '-0.7729065013209537'
 '0.01610751103622848' '0.22149160219927075' '0.03353352340902187' 'idle']
['0.560243' '0.268151' '4.189898095464287' '0.05267300000000008'
 '0.086191' '28.0' '0.03788156444444444' '-0.02154799999999995'
 '-0.4465975441730409' '0.053608445259886256' '0.06120290635963838'
 '0.16264700345807748' 'idle']
['0.560243' '0.268151' '4.189898095464287' '0.05267300000000008'
 '0.086191' '26.0' '0.03681750222222221' '-0.

KeyboardInterrupt: 